From 2b8671fba63ed5e4d6266c7a6c1a7b89ed914341 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9ophane=20Loloum?= Date: Mon, 8 Jul 2024 10:14:42 +0200 Subject: [PATCH 01/39] oacc : d_vect + i_vect + d_csr --- openacc/Makefile | 48 ++ openacc/impl/psb_d_oacc_csr_vect_mv.F90 | 61 ++ openacc/psb_d_oacc_csr_mat_mod.F90 | 185 +++++ openacc/psb_d_oacc_vect_mod.F90 | 966 ++++++++++++++++++++++++ openacc/psb_i_oacc_vect_mod.F90 | 455 +++++++++++ openacc/psb_oacc_mod.F90 | 7 + test/openacc/Makefile | 53 ++ test/openacc/test.F90 | 617 +++++++++++++++ test/openacc/timers.c | 97 +++ test/openacc/vectoacc.F90 | 85 +++ 10 files changed, 2574 insertions(+) create mode 100644 openacc/Makefile create mode 100644 openacc/impl/psb_d_oacc_csr_vect_mv.F90 create mode 100644 openacc/psb_d_oacc_csr_mat_mod.F90 create mode 100644 openacc/psb_d_oacc_vect_mod.F90 create mode 100644 openacc/psb_i_oacc_vect_mod.F90 create mode 100644 openacc/psb_oacc_mod.F90 create mode 100644 test/openacc/Makefile create mode 100644 test/openacc/test.F90 create mode 100644 test/openacc/timers.c create mode 100644 test/openacc/vectoacc.F90 diff --git a/openacc/Makefile b/openacc/Makefile new file mode 100644 index 00000000..3590249c --- /dev/null +++ b/openacc/Makefile @@ -0,0 +1,48 @@ +.SUFFIXES: +.SUFFIXES: .F90 .f90 .o .s .c + +# Compilers and flags +CC=mpicc +FC=mpif90 +FCOPT=-O0 -march=native +OFFLOAD=-fopenacc -foffload=nvptx-none="-march=sm_70" + +# Directories +LIBDIR=../lib +INCDIR=../include +MODDIR=../modules +IMPLDIR=./impl # Adding the impl directory + +# Include and library paths +INCLUDES=-I$(LIBDIR) -I$(INCDIR) -I$(MODDIR) -I$(IMPLDIR) +LIBS=-L$(LIBDIR) -lpsb_util -lpsb_ext -lpsb_base -lopenblas -lmetis + +# Source files +FOBJS= psb_i_oacc_vect_mod.o psb_d_oacc_vect_mod.o \ + psb_oacc_mod.o psb_d_oacc_csr_mat_mod.o \ + impl/psb_d_oacc_csr_vect_mv.o + +# Library name +LIBNAME=libpsb_openacc.a + +# Rules +all: $(LIBNAME) + +$(LIBNAME): $(FOBJS) + ar cr $(LIBNAME) $(FOBJS) + /bin/cp -p $(LIBNAME) $(LIBDIR) + +clean: + /bin/rm -fr *.o $(LIBNAME) *.mod impl/*.o + +.f90.o: + $(FC) $(FCOPT) $(OFFLOAD) $(INCLUDES) -c $< -o $@ + +.c.o: + $(CC) -c $< -o $@ + +.F90.o: + $(FC) $(FCOPT) $(OFFLOAD) $(INCLUDES) -c $< -o $@ + +.F90.s: + $(FC) $(FCOPT) $(INCLUDES) -c -S $< -o $@ diff --git a/openacc/impl/psb_d_oacc_csr_vect_mv.F90 b/openacc/impl/psb_d_oacc_csr_vect_mv.F90 new file mode 100644 index 00000000..f0394591 --- /dev/null +++ b/openacc/impl/psb_d_oacc_csr_vect_mv.F90 @@ -0,0 +1,61 @@ +subroutine psb_d_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans) + use psb_base_mod + use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_vect_mv + implicit none + + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: m, n + + info = psb_success_ + m = a%get_nrows() + n = a%get_ncols() + + if ((n /= size(x%v)) .or. (n /= size(y%v))) then + write(0,*) 'Size error ', m, n, size(x%v), size(y%v) + info = psb_err_invalid_mat_state_ + return + end if + + if (a%is_host()) call a%sync() + if (x%is_host()) call x%sync() + if (y%is_host()) call y%sync() + + call inner_spmv(m, n, alpha, a%val, a%ja, a%irp, x%v, beta, y%v, info) + call y%set_dev() + +contains + + subroutine inner_spmv(m, n, alpha, val, ja, irp, x, beta, y, info) + implicit none + integer(psb_ipk_) :: m, n + real(psb_dpk_), intent(in) :: alpha, beta + real(psb_dpk_) :: val(:), x(:), y(:) + integer(psb_ipk_) :: ja(:), irp(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, j, ii, isz + real(psb_dpk_) :: tmp + integer(psb_ipk_), parameter :: vsz = 256 + + info = 0 + + !$acc parallel loop vector_length(vsz) private(isz) + do ii = 1, m, vsz + isz = min(vsz, m - ii + 1) + !$acc loop independent private(tmp) + do i = ii, ii + isz - 1 + tmp = 0.0_psb_dpk_ + !$acc loop seq + do j = irp(i), irp(i + 1) - 1 + tmp = tmp + val(j) * x(ja(j)) + end do + y(i) = alpha * tmp + beta * y(i) + end do + end do + end subroutine inner_spmv + +end subroutine psb_d_oacc_csr_vect_mv diff --git a/openacc/psb_d_oacc_csr_mat_mod.F90 b/openacc/psb_d_oacc_csr_mat_mod.F90 new file mode 100644 index 00000000..c6b7e3d0 --- /dev/null +++ b/openacc/psb_d_oacc_csr_mat_mod.F90 @@ -0,0 +1,185 @@ +module psb_d_oacc_csr_mat_mod + + use iso_c_binding + use psb_d_mat_mod + use psb_d_oacc_vect_mod + use oaccsparse_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_d_csr_sparse_mat) :: psb_d_oacc_csr_sparse_mat + integer(psb_ipk_) :: devstate = is_host + contains + procedure, pass(a) :: all => d_oacc_csr_all + procedure, pass(a) :: is_host => d_oacc_csr_is_host + procedure, pass(a) :: is_sync => d_oacc_csr_is_sync + procedure, pass(a) :: is_dev => d_oacc_csr_is_dev + procedure, pass(a) :: set_host => d_oacc_csr_set_host + procedure, pass(a) :: set_sync => d_oacc_csr_set_sync + procedure, pass(a) :: set_dev => d_oacc_csr_set_dev + procedure, pass(a) :: sync_space => d_oacc_csr_sync_space + procedure, pass(a) :: sync => d_oacc_csr_sync + procedure, pass(a) :: vect_mv => psb_d_oacc_csr_vect_mv + end type psb_d_oacc_csr_sparse_mat + + interface + subroutine psb_d_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans) + import :: psb_d_oacc_csr_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_csr_vect_mv + end interface + +contains + + subroutine d_oacc_csr_all(m, n, nz, a, info) + implicit none + integer(psb_ipk_), intent(in) :: m, n, nz + class(psb_d_oacc_csr_sparse_mat), intent(out) :: a + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (allocated(a%val)) then + !$acc exit data delete(a%val) finalize + deallocate(a%val, stat=info) + end if + if (allocated(a%ja)) then + !$acc exit data delete(a%ja) finalize + deallocate(a%ja, stat=info) + end if + if (allocated(a%irp)) then + !$acc exit data delete(a%irp) finalize + deallocate(a%irp, stat=info) + end if + + call a%set_nrows(m) + call a%set_ncols(n) + + allocate(a%val(nz),stat=info) + allocate(a%ja(nz),stat=info) + allocate(a%irp(m+1),stat=info) + if (info == 0) call a%set_host() + if (info == 0) call a%sync_space() + end subroutine d_oacc_csr_all + + function d_oacc_csr_is_host(a) result(res) + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function d_oacc_csr_is_host + + function d_oacc_csr_is_sync(a) result(res) + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function d_oacc_csr_is_sync + + function d_oacc_csr_is_dev(a) result(res) + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function d_oacc_csr_is_dev + + subroutine d_oacc_csr_set_host(a) + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine d_oacc_csr_set_host + + subroutine d_oacc_csr_set_sync(a) + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine d_oacc_csr_set_sync + + subroutine d_oacc_csr_set_dev(a) + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine d_oacc_csr_set_dev + + subroutine d_oacc_csr_sync_space(a) + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + if (allocated(a%val)) then + call d_oacc_create_dev(a%val) + end if + if (allocated(a%ja)) then + call i_oacc_create_dev(a%ja) + end if + if (allocated(a%irp)) then + call i_oacc_create_dev(a%irp) + end if + contains + subroutine d_oacc_create_dev(v) + implicit none + real(psb_dpk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine d_oacc_create_dev + subroutine i_oacc_create_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev + end subroutine d_oacc_csr_sync_space + + subroutine d_oacc_csr_sync(a) + implicit none + class(psb_d_oacc_csr_sparse_mat), target, intent(in) :: a + class(psb_d_oacc_csr_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (a%is_dev()) then + call d_oacc_csr_to_host(a%val) + call i_oacc_csr_to_host(a%ja) + call i_oacc_csr_to_host(a%irp) + else if (a%is_host()) then + call d_oacc_csr_to_dev(a%val) + call i_oacc_csr_to_dev(a%ja) + call i_oacc_csr_to_dev(a%irp) + end if + call tmpa%set_sync() + end subroutine d_oacc_csr_sync + + subroutine d_oacc_csr_to_dev(v) + implicit none + real(psb_dpk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine d_oacc_csr_to_dev + + subroutine d_oacc_csr_to_host(v) + implicit none + real(psb_dpk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine d_oacc_csr_to_host + + subroutine i_oacc_csr_to_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine i_oacc_csr_to_dev + + subroutine i_oacc_csr_to_host(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine i_oacc_csr_to_host + + +end module psb_d_oacc_csr_mat_mod + diff --git a/openacc/psb_d_oacc_vect_mod.F90 b/openacc/psb_d_oacc_vect_mod.F90 new file mode 100644 index 00000000..ac5428f3 --- /dev/null +++ b/openacc/psb_d_oacc_vect_mod.F90 @@ -0,0 +1,966 @@ +module psb_d_oacc_vect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_d_vect_mod + use psb_i_oacc_vect_mod + use psb_i_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_d_base_vect_type) :: psb_d_vect_oacc + integer :: state = is_host + + contains + procedure, pass(x) :: get_nrows => d_oacc_get_nrows + procedure, nopass :: get_fmt => d_oacc_get_fmt + + procedure, pass(x) :: all => d_oacc_vect_all + procedure, pass(x) :: zero => d_oacc_zero + procedure, pass(x) :: asb_m => d_oacc_asb_m + procedure, pass(x) :: sync => d_oacc_sync + procedure, pass(x) :: sync_space => d_oacc_sync_space + procedure, pass(x) :: bld_x => d_oacc_bld_x + procedure, pass(x) :: bld_mn => d_oacc_bld_mn + procedure, pass(x) :: free => d_oacc_vect_free + procedure, pass(x) :: ins_a => d_oacc_ins_a + procedure, pass(x) :: ins_v => d_oacc_ins_v + procedure, pass(x) :: is_host => d_oacc_is_host + procedure, pass(x) :: is_dev => d_oacc_is_dev + procedure, pass(x) :: is_sync => d_oacc_is_sync + procedure, pass(x) :: set_host => d_oacc_set_host + procedure, pass(x) :: set_dev => d_oacc_set_dev + procedure, pass(x) :: set_sync => d_oacc_set_sync + procedure, pass(x) :: set_scal => d_oacc_set_scal + + procedure, pass(x) :: gthzv_x => d_oacc_gthzv_x + procedure, pass(x) :: gthzbuf_x => d_oacc_gthzbuf + procedure, pass(y) :: sctb => d_oacc_sctb + procedure, pass(y) :: sctb_x => d_oacc_sctb_x + procedure, pass(y) :: sctb_buf => d_oacc_sctb_buf + + procedure, pass(x) :: get_size => oacc_get_size + procedure, pass(x) :: dot_v => d_oacc_vect_dot + procedure, pass(x) :: dot_a => d_oacc_dot_a + procedure, pass(y) :: axpby_v => d_oacc_axpby_v + procedure, pass(y) :: axpby_a => d_oacc_axpby_a + procedure, pass(z) :: abgdxyz => d_oacc_abgdxyz + procedure, pass(y) :: mlt_v => d_oacc_mlt_v + procedure, pass(y) :: mlt_a => d_oacc_mlt_a + procedure, pass(z) :: mlt_a_2 => d_oacc_mlt_a_2 + procedure, pass(z) :: mlt_v_2 => d_oacc_mlt_v_2 + procedure, pass(x) :: scal => d_oacc_scal + procedure, pass(x) :: nrm2 => d_oacc_nrm2 + procedure, pass(x) :: amax => d_oacc_amax + procedure, pass(x) :: asum => d_oacc_asum + procedure, pass(x) :: absval1 => d_oacc_absval1 + procedure, pass(x) :: absval2 => d_oacc_absval2 + + end type psb_d_vect_oacc + + real(psb_dpk_), allocatable :: v1(:),v2(:),p(:) + +contains + + subroutine d_oacc_absval1(x) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: n, i + + if (x%is_host()) call x%sync_space() + n = size(x%v) + !$acc parallel loop + do i = 1, n + x%v(i) = abs(x%v(i)) + end do + call x%set_dev() + end subroutine d_oacc_absval1 + + subroutine d_oacc_absval2(x, y) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_) :: n + integer(psb_ipk_) :: i + + n = min(size(x%v), size(y%v)) + select type (yy => y) + class is (psb_d_vect_oacc) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + !$acc parallel loop + do i = 1, n + yy%v(i) = abs(x%v(i)) + end do + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call x%psb_d_base_vect_type%absval(y) + end select + end subroutine d_oacc_absval2 + + + + subroutine d_oacc_scal(alpha, x) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + real(psb_dpk_), intent(in) :: alpha + integer(psb_ipk_) :: info + integer(psb_ipk_) :: i + + if (x%is_host()) call x%sync_space() + !$acc parallel loop + do i = 1, size(x%v) + x%v(i) = alpha * x%v(i) + end do + call x%set_dev() + end subroutine d_oacc_scal + + function d_oacc_nrm2(n, x) result(res) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + real(psb_dpk_) :: sum + integer(psb_ipk_) :: i + + if (x%is_host()) call x%sync_space() + sum = 0.0 + !$acc parallel loop reduction(+:sum) + do i = 1, n + sum = sum + x%v(i) * x%v(i) + end do + res = sqrt(sum) + end function d_oacc_nrm2 + + function d_oacc_amax(n, x) result(res) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + real(psb_dpk_) :: max_val + integer(psb_ipk_) :: i + + if (x%is_host()) call x%sync_space() + max_val = -huge(0.0) + !$acc parallel loop reduction(max:max_val) + do i = 1, n + if (x%v(i) > max_val) max_val = x%v(i) + end do + res = max_val + end function d_oacc_amax + + function d_oacc_asum(n, x) result(res) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + real(psb_dpk_) :: sum + integer(psb_ipk_) :: i + + if (x%is_host()) call x%sync_space() + sum = 0.0 + !$acc parallel loop reduction(+:sum) + do i = 1, n + sum = sum + abs(x%v(i)) + end do + res = sum + end function d_oacc_asum + + + subroutine d_oacc_mlt_v(x, y, info) + use psi_serial_mod + implicit none + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_vect_oacc), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, n + + info = 0 + n = min(x%get_nrows(), y%get_nrows()) + select type(xx => x) + type is (psb_d_base_vect_type) + if (y%is_dev()) call y%sync() + !$acc parallel loop + do i = 1, n + y%v(i) = y%v(i) * xx%v(i) + end do + call y%set_host() + class default + if (xx%is_dev()) call xx%sync() + if (y%is_dev()) call y%sync() + !$acc parallel loop + do i = 1, n + y%v(i) = y%v(i) * xx%v(i) + end do + call y%set_host() + end select + end subroutine d_oacc_mlt_v + + subroutine d_oacc_mlt_a(x, y, info) + use psi_serial_mod + implicit none + real(psb_dpk_), intent(in) :: x(:) + class(psb_d_vect_oacc), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (y%is_dev()) call y%sync_space() + !$acc parallel loop + do i = 1, size(x) + y%v(i) = y%v(i) * x(i) + end do + call y%set_host() + end subroutine d_oacc_mlt_a + + subroutine d_oacc_mlt_a_2(alpha, x, y, beta, z, info) + use psi_serial_mod + implicit none + real(psb_dpk_), intent(in) :: alpha, beta + real(psb_dpk_), intent(in) :: x(:) + real(psb_dpk_), intent(in) :: y(:) + class(psb_d_vect_oacc), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (z%is_dev()) call z%sync_space() + !$acc parallel loop + do i = 1, size(x) + z%v(i) = alpha * x(i) * y(i) + beta * z%v(i) + end do + call z%set_host() + end subroutine d_oacc_mlt_a_2 + + subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) + use psi_serial_mod + use psb_string_mod + implicit none + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + class(psb_d_vect_oacc), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + character(len=1), intent(in), optional :: conjgx, conjgy + integer(psb_ipk_) :: i, n + logical :: conjgx_, conjgy_ + + conjgx_ = .false. + conjgy_ = .false. + if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C') + if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C') + + n = min(x%get_nrows(), y%get_nrows(), z%get_nrows()) + + info = 0 + select type(xx => x) + class is (psb_d_vect_oacc) + select type (yy => y) + class is (psb_d_vect_oacc) + if (xx%is_host()) call xx%sync_space() + if (yy%is_host()) call yy%sync_space() + if ((beta /= dzero) .and. (z%is_host())) call z%sync_space() + !$acc parallel loop + do i = 1, n + z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) + end do + call z%set_dev() + class default + if (xx%is_dev()) call xx%sync_space() + if (yy%is_dev()) call yy%sync() + if ((beta /= dzero) .and. (z%is_dev())) call z%sync_space() + !$acc parallel loop + do i = 1, n + z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) + end do + call z%set_host() + end select + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + if ((beta /= dzero) .and. (z%is_dev())) call z%sync_space() + !$acc parallel loop + do i = 1, n + z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i) + end do + call z%set_host() + end select + end subroutine d_oacc_mlt_v_2 + + + subroutine d_oacc_axpby_v(m, alpha, x, beta, y, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_vect_oacc), intent(inout) :: y + real(psb_dpk_), intent(in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny, i + + info = psb_success_ + + select type(xx => x) + type is (psb_d_vect_oacc) + if ((beta /= dzero) .and. y%is_host()) call y%sync_space() + if (xx%is_host()) call xx%sync_space() + nx = size(xx%v) + ny = size(y%v) + if ((nx < m) .or. (ny < m)) then + info = psb_err_internal_error_ + else + !$acc parallel loop + do i = 1, m + y%v(i) = alpha * xx%v(i) + beta * y%v(i) + end do + end if + call y%set_dev() + class default + if ((alpha /= dzero) .and. (x%is_dev())) call x%sync() + call y%axpby(m, alpha, x%v, beta, info) + end select + end subroutine d_oacc_axpby_v + + subroutine d_oacc_axpby_a(m, alpha, x, beta, y, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + real(psb_dpk_), intent(in) :: x(:) + class(psb_d_vect_oacc), intent(inout) :: y + real(psb_dpk_), intent(in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i + + if ((beta /= dzero) .and. (y%is_dev())) call y%sync_space() + !$acc parallel loop + do i = 1, m + y%v(i) = alpha * x(i) + beta * y%v(i) + end do + call y%set_host() + end subroutine d_oacc_axpby_a + + subroutine d_oacc_abgdxyz(m, alpha, beta, gamma, delta, x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + class(psb_d_vect_oacc), intent(inout) :: z + real(psb_dpk_), intent(in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny, nz, i + logical :: gpu_done + + info = psb_success_ + gpu_done = .false. + + select type(xx => x) + class is (psb_d_vect_oacc) + select type(yy => y) + class is (psb_d_vect_oacc) + select type(zz => z) + class is (psb_d_vect_oacc) + if ((beta /= dzero) .and. yy%is_host()) call yy%sync_space() + if ((delta /= dzero) .and. zz%is_host()) call zz%sync_space() + if (xx%is_host()) call xx%sync_space() + nx = size(xx%v) + ny = size(yy%v) + nz = size(zz%v) + if ((nx < m) .or. (ny < m) .or. (nz < m)) then + info = psb_err_internal_error_ + else + !$acc parallel loop + do i = 1, m + yy%v(i) = alpha * xx%v(i) + beta * yy%v(i) + zz%v(i) = gamma * yy%v(i) + delta * zz%v(i) + end do + end if + call yy%set_dev() + call zz%set_dev() + gpu_done = .true. + end select + end select + end select + + if (.not. gpu_done) then + if (x%is_host()) call x%sync() + if (y%is_host()) call y%sync() + if (z%is_host()) call z%sync() + call y%axpby(m, alpha, x, beta, info) + call z%axpby(m, gamma, y, delta, info) + end if + end subroutine d_oacc_abgdxyz + + + subroutine d_oacc_sctb_buf(i, n, idx, beta, y) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + real(psb_dpk_) :: beta + class(psb_d_vect_oacc) :: y + integer(psb_ipk_) :: info + + if (.not.allocated(y%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') + return + end if + + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync_space(info) + if (y%is_host()) call y%sync_space() + + !$acc parallel loop + do i = 1, n + y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i) + end do + + class default + !$acc parallel loop + do i = 1, n + y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i) + end do + end select + end subroutine d_oacc_sctb_buf + + subroutine d_oacc_sctb_x(i, n, idx, x, beta, y) + use psb_base_mod + implicit none + integer(psb_ipk_):: i, n + class(psb_i_base_vect_type) :: idx + real(psb_dpk_) :: beta, x(:) + class(psb_d_vect_oacc) :: y + integer(psb_ipk_) :: info, ni + + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync_space(info) + class default + call psb_errpush(info, 'd_oacc_sctb_x') + return + end select + + if (y%is_host()) call y%sync_space() + + !$acc parallel loop + do i = 1, n + y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i) + end do + + call y%set_dev() + end subroutine d_oacc_sctb_x + + + + subroutine d_oacc_sctb(n, idx, x, beta, y) + use psb_base_mod + implicit none + integer(psb_ipk_) :: n + integer(psb_ipk_) :: idx(:) + real(psb_dpk_) :: beta, x(:) + class(psb_d_vect_oacc) :: y + integer(psb_ipk_) :: info + integer(psb_ipk_) :: i + + if (n == 0) return + if (y%is_dev()) call y%sync_space() + + !$acc parallel loop + do i = 1, n + y%v(idx(i)) = beta * y%v(idx(i)) + x(i) + end do + + call y%set_host() + end subroutine d_oacc_sctb + + + subroutine d_oacc_gthzbuf(i, n, idx, x) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + class(psb_d_vect_oacc) :: x + integer(psb_ipk_) :: info + + info = 0 + if (.not.allocated(x%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') + return + end if + + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync_space(info) + class default + call psb_errpush(info, 'd_oacc_gthzbuf') + return + end select + + if (x%is_host()) call x%sync_space() + + !$acc parallel loop + do i = 1, n + x%combuf(i) = x%v(idx%v(i)) + end do + end subroutine d_oacc_gthzbuf + + subroutine d_oacc_gthzv_x(i, n, idx, x, y) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type):: idx + real(psb_dpk_) :: y(:) + class(psb_d_vect_oacc):: x + integer(psb_ipk_) :: info + + info = 0 + + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync_space(info) + class default + call psb_errpush(info, 'd_oacc_gthzv_x') + return + end select + + if (x%is_host()) call x%sync_space() + + !$acc parallel loop + do i = 1, n + y(i) = x%v(idx%v(i)) + end do + end subroutine d_oacc_gthzv_x + + subroutine d_oacc_ins_v(n, irl, val, dupl, x, info) + use psi_serial_mod + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + class(psb_i_base_vect_type), intent(inout) :: irl + class(psb_d_base_vect_type), intent(inout) :: val + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, isz + logical :: done_oacc + + info = 0 + if (psb_errstatus_fatal()) return + + done_oacc = .false. + select type(virl => irl) + type is (psb_i_vect_oacc) + select type(vval => val) + type is (psb_d_vect_oacc) + if (vval%is_host()) call vval%sync_space() + if (virl%is_host()) call virl%sync_space(info) + if (x%is_host()) call x%sync_space() + !$acc parallel loop + do i = 1, n + x%v(virl%v(i)) = vval%v(i) + end do + call x%set_dev() + done_oacc = .true. + end select + end select + + if (.not.done_oacc) then + select type(virl => irl) + type is (psb_i_vect_oacc) + if (virl%is_dev()) call virl%sync_space(info) + end select + select type(vval => val) + type is (psb_d_vect_oacc) + if (vval%is_dev()) call vval%sync_space() + end select + call x%ins(n, irl%v, val%v, dupl, info) + end if + + if (info /= 0) then + call psb_errpush(info, 'oacc_vect_ins') + return + end if + + end subroutine d_oacc_ins_v + + + + subroutine d_oacc_ins_a(n, irl, val, dupl, x, info) + use psi_serial_mod + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + integer(psb_ipk_), intent(in) :: irl(:) + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + if (x%is_dev()) call x%sync_space() + call x%psb_d_base_vect_type%ins(n, irl, val, dupl, info) + call x%set_host() + !$acc update device(x%v) + + end subroutine d_oacc_ins_a + + + + subroutine d_oacc_bld_mn(x, n) + use psb_base_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(n, info) + if (info /= 0) then + call psb_errpush(info, 'd_oacc_bld_mn', i_err=(/n, n, n, n, n/)) + end if + call x%set_host() + !$acc update device(x%v) + + end subroutine d_oacc_bld_mn + + + subroutine d_oacc_bld_x(x, this) + use psb_base_mod + implicit none + real(psb_dpk_), intent(in) :: this(:) + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + + call psb_realloc(size(this), x%v, info) + if (info /= 0) then + info = psb_err_alloc_request_ + call psb_errpush(info, 'd_oacc_bld_x', & + i_err=(/size(this), izero, izero, izero, izero/)) + return + end if + + x%v(:) = this(:) + call x%set_host() + !$acc update device(x%v) + + end subroutine d_oacc_bld_x + + + subroutine d_oacc_asb_m(n, x, info) + use psb_base_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_mpk_) :: nd + + info = psb_success_ + + if (x%is_dev()) then + nd = size(x%v) + if (nd < n) then + call x%sync() + call x%psb_d_base_vect_type%asb(n, info) + if (info == psb_success_) call x%sync_space() + call x%set_host() + end if + else + if (size(x%v) < n) then + call x%psb_d_base_vect_type%asb(n, info) + if (info == psb_success_) call x%sync_space() + call x%set_host() + end if + end if + end subroutine d_oacc_asb_m + + + + subroutine d_oacc_set_scal(x, val, first, last) + class(psb_d_vect_oacc), intent(inout) :: x + real(psb_dpk_), intent(in) :: val + integer(psb_ipk_), optional :: first, last + + integer(psb_ipk_) :: first_, last_ + first_ = 1 + last_ = x%get_nrows() + if (present(first)) first_ = max(1, first) + if (present(last)) last_ = min(last, last_) + + !$acc parallel loop + do i = first_, last_ + x%v(i) = val + end do + !$acc end parallel loop + + call x%set_dev() + end subroutine d_oacc_set_scal + + + + subroutine d_oacc_zero(x) + use psi_serial_mod + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + call x%set_dev() + call x%set_scal(dzero) + end subroutine d_oacc_zero + + function d_oacc_get_nrows(x) result(res) + implicit none + class(psb_d_vect_oacc), intent(in) :: x + integer(psb_ipk_) :: res + + if (allocated(x%v)) res = size(x%v) + end function d_oacc_get_nrows + + function d_oacc_get_fmt() result(res) + implicit none + character(len=5) :: res + res = "dOACC" + + end function d_oacc_get_fmt + + function d_oacc_vect_dot(n, x, y) result(res) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + real(psb_dpk_), external :: ddot + integer(psb_ipk_) :: info + integer(psb_ipk_) :: i + + res = dzero + + select type(yy => y) + type is (psb_d_base_vect_type) + if (x%is_dev()) call x%sync() + res = ddot(n, x%v, 1, yy%v, 1) + type is (psb_d_vect_oacc) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + + !$acc parallel loop reduction(+:res) present(x%v, yy%v) + do i = 1, n + res = res + x%v(i) * yy%v(i) + end do + !$acc end parallel loop + + class default + call x%sync() + res = y%dot(n, x%v) + end select + + end function d_oacc_vect_dot + + + + + function d_oacc_dot_a(n, x, y) result(res) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + real(psb_dpk_), intent(in) :: y(:) + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + real(psb_dpk_), external :: ddot + + if (x%is_dev()) call x%sync() + res = ddot(n, y, 1, x%v, 1) + + end function d_oacc_dot_a + + ! subroutine d_oacc_set_vect(x,y) + ! implicit none + ! class(psb_d_vect_oacc), intent(inout) :: x + ! real(psb_dpk_), intent(in) :: y(:) + ! integer(psb_ipk_) :: info + + ! if (size(x%v) /= size(y)) then + ! call x%free(info) + ! call x%all(size(y),info) + ! end if + ! x%v(:) = y(:) + ! call x%set_host() + ! end subroutine d_oacc_set_vect + + subroutine d_oacc_to_dev(v) + implicit none + real(psb_dpk_) :: v(:) + !$acc update device(v) + end subroutine d_oacc_to_dev + + subroutine d_oacc_to_host(v) + implicit none + real(psb_dpk_) :: v(:) + !$acc update self(v) + end subroutine d_oacc_to_host + + subroutine d_oacc_sync_space(x) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + if (allocated(x%v)) then + call d_oacc_create_dev(x%v) + end if + contains + subroutine d_oacc_create_dev(v) + implicit none + real(psb_dpk_) :: v(:) + !$acc enter data copyin(v) + end subroutine d_oacc_create_dev + end subroutine d_oacc_sync_space + + subroutine d_oacc_sync(x) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + if (x%is_dev()) then + call d_oacc_to_host(x%v) + end if + if (x%is_host()) then + call d_oacc_to_dev(x%v) + end if + call x%set_sync() + end subroutine d_oacc_sync + + subroutine d_oacc_set_host(x) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + + x%state = is_host + end subroutine d_oacc_set_host + + subroutine d_oacc_set_dev(x) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + + x%state = is_dev + end subroutine d_oacc_set_dev + + subroutine d_oacc_set_sync(x) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + + x%state = is_sync + end subroutine d_oacc_set_sync + + function d_oacc_is_dev(x) result(res) + implicit none + class(psb_d_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function d_oacc_is_dev + + function d_oacc_is_host(x) result(res) + implicit none + class(psb_d_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function d_oacc_is_host + + function d_oacc_is_sync(x) result(res) + implicit none + class(psb_d_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function d_oacc_is_sync + + subroutine d_oacc_vect_all(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: n + class(psb_d_vect_oacc), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(n, x%v, info) + if (info == 0) then + call x%set_host() + !$acc enter data create(x%v) + call x%sync_space() + end if + if (info /= 0) then + info = psb_err_alloc_request_ + call psb_errpush(info, 'd_oacc_all', & + i_err=(/n, n, n, n, n/)) + end if + end subroutine d_oacc_vect_all + + + subroutine d_oacc_vect_free(x, info) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + info = 0 + if (allocated(x%v)) then + !$acc exit data delete(x%v) finalize + deallocate(x%v, stat=info) + end if + + end subroutine d_oacc_vect_free + + function oacc_get_size(x) result(res) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: res + + if (x%is_dev()) call x%sync() + res = size(x%v) + end function oacc_get_size + + + subroutine initialize(N) + integer(psb_ipk_) :: N + integer(psb_ipk_) :: i + allocate(v1(N),v2(N),p(N)) + !$acc enter data create(v1,v2,p) + !$acc parallel + !$acc loop + do i=1,n + v1(i) = i + v2(i) = n+i + end do + !$acc end parallel + end subroutine initialize + subroutine finalize_dev() + !$acc exit data delete(v1,v2,p) + end subroutine finalize_dev + subroutine finalize_host() + deallocate(v1,v2,p) + end subroutine finalize_host + subroutine to_dev() + !$acc update device(v1,v2) + end subroutine to_dev + subroutine to_host() + !$acc update self(v1,v2) + end subroutine to_host + function d_dot(N) result(res) + real(kind(1.d0)) :: res + integer(psb_ipk_) :: i,N + real(kind(1.d0)) :: t1,t2,t3 + res = 0.0d0 + !$acc parallel + !$acc loop reduction(+:res) + do i=1,N + res = res + v1(i) * v2(i) + end do + !$acc end parallel + + end function d_dot + function h_dot(N) result(res) + integer(psb_ipk_) :: i,N + real(kind(1.d0)) :: t1,t2,t3,res + res = 0.0d0 + do i=1,N + res = res + v1(i) * v2(i) + end do + end function h_dot + +end module psb_d_oacc_vect_mod \ No newline at end of file diff --git a/openacc/psb_i_oacc_vect_mod.F90 b/openacc/psb_i_oacc_vect_mod.F90 new file mode 100644 index 00000000..70fc325e --- /dev/null +++ b/openacc/psb_i_oacc_vect_mod.F90 @@ -0,0 +1,455 @@ +module psb_i_oacc_vect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_i_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_i_base_vect_type) :: psb_i_vect_oacc + integer :: state = is_host + contains + procedure, pass(x) :: get_nrows => i_oacc_get_nrows + procedure, nopass :: get_fmt => i_oacc_get_fmt + + procedure, pass(x) :: all => i_oacc_all + procedure, pass(x) :: zero => i_oacc_zero + procedure, pass(x) :: asb_m => i_oacc_asb_m + procedure, pass(x) :: sync => i_oacc_sync + procedure, pass(x) :: sync_space => i_oacc_sync_space + procedure, pass(x) :: bld_x => i_oacc_bld_x + procedure, pass(x) :: bld_mn => i_oacc_bld_mn + procedure, pass(x) :: free => i_oacc_free + procedure, pass(x) :: ins_a => i_oacc_ins_a + procedure, pass(x) :: ins_v => i_oacc_ins_v + procedure, pass(x) :: is_host => i_oacc_is_host + procedure, pass(x) :: is_dev => i_oacc_is_dev + procedure, pass(x) :: is_sync => i_oacc_is_sync + procedure, pass(x) :: set_host => i_oacc_set_host + procedure, pass(x) :: set_dev => i_oacc_set_dev + procedure, pass(x) :: set_sync => i_oacc_set_sync + procedure, pass(x) :: set_scal => i_oacc_set_scal + procedure, pass(x) :: gthzv_x => i_oacc_gthzv_x + procedure, pass(y) :: sctb => i_oacc_sctb + procedure, pass(y) :: sctb_x => i_oacc_sctb_x + procedure, pass(x) :: gthzbuf => i_oacc_gthzbuf + procedure, pass(y) :: sctb_buf => i_oacc_sctb_buf + + final :: i_oacc_vect_finalize + end type psb_i_vect_oacc + + public :: psb_i_vect_oacc_ + private :: constructor + interface psb_i_vect_oacc_ + module procedure constructor + end interface psb_i_vect_oacc_ + +contains + + function constructor(x) result(this) + integer(psb_ipk_) :: x(:) + type(psb_i_vect_oacc) :: this + integer(psb_ipk_) :: info + + this%v = x + call this%asb(size(x), info) + end function constructor + + + subroutine i_oacc_gthzv_x(i, n, idx, x, y) + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + integer(psb_ipk_) :: y(:) + class(psb_i_vect_oacc) :: x + integer :: info + + !$acc parallel loop + do i = 1, n + y(i) = x%v(idx%v(i)) + end do + end subroutine i_oacc_gthzv_x + + subroutine i_oacc_gthzbuf(i, n, idx, x) + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + class(psb_i_vect_oacc) :: x + integer :: info + + if (.not.allocated(x%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') + return + end if + + !$acc parallel loop + do i = 1, n + x%combuf(i) = x%v(idx%v(i)) + end do + end subroutine i_oacc_gthzbuf + + subroutine i_oacc_sctb(n, idx, x, beta, y) + implicit none + integer(psb_ipk_) :: n, idx(:) + integer(psb_ipk_) :: beta, x(:) + class(psb_i_vect_oacc) :: y + integer(psb_ipk_) :: info + integer :: i + + if (n == 0) return + + !$acc parallel loop + do i = 1, n + y%v(idx(i)) = beta * y%v(idx(i)) + x(i) + end do + end subroutine i_oacc_sctb + + subroutine i_oacc_sctb_x(i, n, idx, x, beta, y) + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + integer(psb_ipk_) :: beta, x(:) + class(psb_i_vect_oacc) :: y + integer :: info + + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync_space(info) + if (y%is_host()) call y%sync_space(info) + + !$acc parallel loop + do i = 1, n + y%v(ii%v(i)) = beta * y%v(ii%v(i)) + x(i) + end do + + class default + !$acc parallel loop + do i = 1, n + y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i) + end do + end select + end subroutine i_oacc_sctb_x + + subroutine i_oacc_sctb_buf(i, n, idx, beta, y) + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + integer(psb_ipk_) :: beta + class(psb_i_vect_oacc) :: y + integer(psb_ipk_) :: info + + if (.not.allocated(y%v)) then + call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') + return + end if + + !$acc parallel loop + do i = 1, n + y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%v(i) + end do + end subroutine i_oacc_sctb_buf + + subroutine i_oacc_set_host(x) + class(psb_i_vect_oacc), intent(inout) :: x + x%state = is_host + end subroutine i_oacc_set_host + + subroutine i_oacc_set_sync(x) + class(psb_i_vect_oacc), intent(inout) :: x + x%state = is_sync + end subroutine i_oacc_set_sync + + subroutine i_oacc_set_dev(x) + class(psb_i_vect_oacc), intent(inout) :: x + x%state = is_dev + end subroutine i_oacc_set_dev + + subroutine i_oacc_set_scal(x, val, first, last) + class(psb_i_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: val + integer(psb_ipk_), optional :: first, last + + integer(psb_ipk_) :: first_, last_ + + first_ = 1 + last_ = size(x%v) + if (present(first)) first_ = max(1, first) + if (present(last)) last_ = min(size(x%v), last) + + !$acc parallel loop + do i = first_, last_ + x%v(i) = val + end do + call x%set_dev() + end subroutine i_oacc_set_scal + + function i_oacc_is_host(x) result(res) + implicit none + class(psb_i_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function i_oacc_is_host + + function i_oacc_is_dev(x) result(res) + implicit none + class(psb_i_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function i_oacc_is_dev + + function i_oacc_is_sync(x) result(res) + implicit none + class(psb_i_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function i_oacc_is_sync + + subroutine i_oacc_free(x, info) + use psb_error_mod + implicit none + class(psb_i_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (allocated(x%v)) deallocate(x%v, stat=info) + if (info /= 0) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, 'i_oacc_free') + end if + call x%set_sync() + end subroutine i_oacc_free + + subroutine i_oacc_ins_a(n, irl, val, dupl, x, info) + implicit none + class(psb_i_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + integer(psb_ipk_), intent(in) :: irl(:) + integer(psb_ipk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + if (x%is_dev()) call x%sync() + call x%psb_i_base_vect_type%ins(n, irl, val, dupl, info) + call x%set_host() + end subroutine i_oacc_ins_a + + subroutine i_oacc_ins_v(n, irl, val, dupl, x, info) + implicit none + class(psb_i_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + class(psb_i_base_vect_type), intent(inout) :: irl + class(psb_i_base_vect_type), intent(inout) :: val + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, isz + logical :: done_oacc + + info = 0 + if (psb_errstatus_fatal()) return + + done_oacc = .false. + select type(virl => irl) + class is (psb_i_vect_oacc) + select type(vval => val) + class is (psb_i_vect_oacc) + if (vval%is_host()) call vval%sync() + if (virl%is_host()) call virl%sync() + if (x%is_host()) call x%sync() + ! Add the OpenACC kernel call here if needed + call x%set_dev() + done_oacc = .true. + end select + end select + + if (.not.done_oacc) then + if (irl%is_dev()) call irl%sync() + if (val%is_dev()) call val%sync() + call x%ins(n, irl%v, val%v, dupl, info) + end if + + if (info /= 0) then + call psb_errpush(info,'i_oacc_ins_v') + return + end if + end subroutine i_oacc_ins_v + + subroutine i_oacc_bld_x(x, this) + use psb_error_mod + implicit none + integer(psb_ipk_), intent(in) :: this(:) + class(psb_i_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + + call psb_realloc(size(this), x%v, info) + if (info /= 0) then + info = psb_err_alloc_request_ + call psb_errpush(info, 'i_oacc_bld_x', i_err = (/size(this), izero, izero, izero, izero/)) + end if + x%v(:) = this(:) + call x%set_host() + call x%sync() + end subroutine i_oacc_bld_x + + subroutine i_oacc_bld_mn(x, n) + use psb_error_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_i_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(n, info) + if (info /= 0) then + call psb_errpush(info, 'i_oacc_bld_mn', i_err = (/n, n, n, n, n/)) + end if + end subroutine i_oacc_bld_mn + + subroutine i_oacc_sync(x) + use psb_error_mod + implicit none + class(psb_i_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: n, info + + info = 0 + if (x%is_host()) then + n = size(x%v) + if (.not.allocated(x%v)) then + write(*, *) 'Incoherent situation : x%v not allocated' + call psb_realloc(n, x%v, info) + end if + if ((n > size(x%v)) .or. (n > x%get_nrows())) then + write(*, *) 'Incoherent situation : sizes', n, size(x%v), x%get_nrows() + call psb_realloc(n, x%v, info) + end if + !$acc update device(x%v) + else if (x%is_dev()) then + n = size(x%v) + if (.not.allocated(x%v)) then + write(*, *) 'Incoherent situation : x%v not allocated' + call psb_realloc(n, x%v, info) + end if + !$acc update self(x%v) + end if + if (info == 0) call x%set_sync() + if (info /= 0) then + info = psb_err_internal_error_ + call psb_errpush(info, 'i_oacc_sync') + end if + end subroutine i_oacc_sync + + subroutine i_oacc_sync_space(x, info) + use psb_error_mod + implicit none + class(psb_i_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nh, nd + + info = 0 + if (x%is_dev()) then + nh = size(x%v) + nd = nh + if (nh < nd) then + call psb_realloc(nd, x%v, info) + end if + else + nh = size(x%v) + nd = nh + if (nh < nd) then + call psb_realloc(nd, x%v, info) + end if + end if + end subroutine i_oacc_sync_space + + function i_oacc_get_nrows(x) result(res) + implicit none + class(psb_i_vect_oacc), intent(in) :: x + integer(psb_ipk_) :: res + + res = 0 + if (allocated(x%v)) res = size(x%v) + end function i_oacc_get_nrows + + function i_oacc_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'iOACC' + end function i_oacc_get_fmt + + subroutine i_oacc_all(n, x, info) + use psb_error_mod + implicit none + class(psb_i_vect_oacc), intent(out) :: x + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(n, x%v, info) + if (info == 0) call x%set_host() + if (info == 0) call x%sync_space(info) + if (info /= 0) then + info = psb_err_alloc_request_ + call psb_errpush(info, 'i_oacc_all', i_err=(/n, n, n, n, n/)) + end if + end subroutine i_oacc_all + + subroutine i_oacc_zero(x) + use psb_error_mod + implicit none + class(psb_i_vect_oacc), intent(inout) :: x + ! Ensure zeroing on the GPU side + call x%set_dev() + x%v = 0 + !$acc update device(x%v) + end subroutine i_oacc_zero + + subroutine i_oacc_asb_m(n, x, info) + use psb_error_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: n + class(psb_i_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nh, nd + + info = 0 + if (x%is_dev()) then + nd = size(x%v) + if (nd < n) then + call x%sync() + call x%psb_i_base_vect_type%asb(n, info) + if (info == psb_success_) call x%sync_space(info) + call x%set_host() + end if + else + nh = size(x%v) + if (nh < n) then + call x%psb_i_base_vect_type%asb(n, info) + if (info == psb_success_) call x%sync_space(info) + call x%set_host() + end if + end if + end subroutine i_oacc_asb_m + + subroutine i_oacc_vect_finalize(x) + use psi_serial_mod + use psb_realloc_mod + implicit none + type(psb_i_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + + info = 0 + call x%free(info) + end subroutine i_oacc_vect_finalize + +end module psb_i_oacc_vect_mod + + + + + + \ No newline at end of file diff --git a/openacc/psb_oacc_mod.F90 b/openacc/psb_oacc_mod.F90 new file mode 100644 index 00000000..ce5e85f9 --- /dev/null +++ b/openacc/psb_oacc_mod.F90 @@ -0,0 +1,7 @@ +module psb_oacc_mod + use psb_const_mod + + use psb_d_oacc_vect_mod + use psb_d_oacc_csr_mat_mod + +end module psb_oacc_mod \ No newline at end of file diff --git a/test/openacc/Makefile b/test/openacc/Makefile new file mode 100644 index 00000000..6df14d42 --- /dev/null +++ b/test/openacc/Makefile @@ -0,0 +1,53 @@ +TOPDIR=../.. +include $(TOPDIR)/Make.inc + +# Directories +LIBDIR=$(TOPDIR)/lib/ +PSBLIBDIR=$(TOPDIR)/lib/ +PSBINCDIR=$(TOPDIR)/include +PSBMODDIR=$(TOPDIR)/modules +INCDIR=$(TOPDIR)/include +MODDIR=$(TOPDIR)/modules +EXEDIR=./runs + +# Libraries +PSBLAS_LIB= -L$(LIBDIR) -L$(PSBLIBDIR) -lpsb_util -lpsb_ext -lpsb_base -lpsb_openacc -lopenblas -lmetis +LDLIBS=$(PSBGPULDLIBS) + +# Includes +FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FMFLAG). $(FMFLAG)$(PSBMODDIR) $(FMFLAG)$(PSBINCDIR) $(LIBRSB_DEFINES) + +# Compiler flags +FFLAGS=-O0 -march=native -fopenacc -foffload=nvptx-none="-march=sm_70" +CFLAGS=-O0 -march=native + +# Source files +SRCS=test.F90 vectoacc.F90 +CSRC=timers.c + +# Object files +OBJS=$(SRCS:.F90=.o) $(CSRC:.c=.o) + +# Default rule +all: dir + +dir: + @if test ! -d $(EXEDIR); then mkdir $(EXEDIR); fi + +# Pattern rule for creating executables +%: %.o timers.o + $(FC) $(FFLAGS) $^ -o $@ $(FINCLUDES) $(PSBLAS_LIB) $(LDLIBS) + /bin/mv $@ $(EXEDIR) + +# Compilation rules +%.o: %.F90 + $(FC) $(FFLAGS) $(FINCLUDES) -c $< -o $@ + +%.o: %.c + $(CC) $(CFLAGS) $(FINCLUDES) -c $< -o $@ + +clean: + /bin/rm -fr *.o *.mod $(EXEDIR)/* + +# Phony targets +.PHONY: all dir clean diff --git a/test/openacc/test.F90 b/test/openacc/test.F90 new file mode 100644 index 00000000..0d0b756f --- /dev/null +++ b/test/openacc/test.F90 @@ -0,0 +1,617 @@ +module psb_d_pde3d_mod + + + use psb_base_mod, only : psb_dpk_, psb_ipk_, psb_lpk_, psb_desc_type,& + & psb_dspmat_type, psb_d_vect_type, dzero,& + & psb_d_base_sparse_mat, psb_d_base_vect_type, & + & psb_i_base_vect_type, psb_l_base_vect_type + + interface + function d_func_3d(x,y,z) result(val) + import :: psb_dpk_ + real(psb_dpk_), intent(in) :: x,y,z + real(psb_dpk_) :: val + end function d_func_3d + end interface + + interface psb_gen_pde3d + module procedure psb_d_gen_pde3d + end interface psb_gen_pde3d + + contains + + function d_null_func_3d(x,y,z) result(val) + + real(psb_dpk_), intent(in) :: x,y,z + real(psb_dpk_) :: val + + val = dzero + + end function d_null_func_3d + ! + ! functions parametrizing the differential equation + ! + + ! + ! Note: b1, b2 and b3 are the coefficients of the first + ! derivative of the unknown function. The default + ! we apply here is to have them zero, so that the resulting + ! matrix is symmetric/hermitian and suitable for + ! testing with CG and FCG. + ! When testing methods for non-hermitian matrices you can + ! change the B1/B2/B3 functions to e.g. done/sqrt((3*done)) + ! + function b1(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: b1 + real(psb_dpk_), intent(in) :: x,y,z + b1=done/sqrt((3*done)) + end function b1 + function b2(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: b2 + real(psb_dpk_), intent(in) :: x,y,z + b2=done/sqrt((3*done)) + end function b2 + function b3(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: b3 + real(psb_dpk_), intent(in) :: x,y,z + b3=done/sqrt((3*done)) + end function b3 + function c(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: c + real(psb_dpk_), intent(in) :: x,y,z + c=dzero + end function c + function a1(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: a1 + real(psb_dpk_), intent(in) :: x,y,z + a1=done/80 + end function a1 + function a2(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: a2 + real(psb_dpk_), intent(in) :: x,y,z + a2=done/80 + end function a2 + function a3(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: a3 + real(psb_dpk_), intent(in) :: x,y,z + a3=done/80 + end function a3 + function g(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: g + real(psb_dpk_), intent(in) :: x,y,z + g = dzero + if (x == done) then + g = done + else if (x == dzero) then + g = exp(y**2-z**2) + end if + end function g + + + ! + ! subroutine to allocate and fill in the coefficient matrix and + ! the rhs. + ! + subroutine psb_d_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,afmt,info,& + & f,amold,vmold,imold,partition,nrl,iv,tnd) + use psb_base_mod + use psb_util_mod + ! + ! Discretizes the partial differential equation + ! + ! a1 dd(u) a2 dd(u) a3 dd(u) b1 d(u) b2 d(u) b3 d(u) + ! - ------ - ------ - ------ + ----- + ------ + ------ + c u = f + ! dxdx dydy dzdz dx dy dz + ! + ! with Dirichlet boundary conditions + ! u = g + ! + ! on the unit cube 0<=x,y,z<=1. + ! + ! + ! Note that if b1=b2=b3=c=0., the PDE is the Laplace equation. + ! + implicit none + integer(psb_ipk_) :: idim + type(psb_dspmat_type) :: a + type(psb_d_vect_type) :: xv,bv + type(psb_desc_type) :: desc_a + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: info + character(len=*) :: afmt + procedure(d_func_3d), optional :: f + class(psb_d_base_sparse_mat), optional :: amold + class(psb_d_base_vect_type), optional :: vmold + class(psb_i_base_vect_type), optional :: imold + integer(psb_ipk_), optional :: partition, nrl,iv(:) + logical, optional :: tnd + ! Local variables. + + integer(psb_ipk_), parameter :: nb=20 + type(psb_d_csc_sparse_mat) :: acsc + type(psb_d_coo_sparse_mat) :: acoo + type(psb_d_csr_sparse_mat) :: acsr + real(psb_dpk_) :: zt(nb),x,y,z + integer(psb_ipk_) :: nnz,nr,nlr,i,j,ii,ib,k, partition_ + integer(psb_lpk_) :: m,n,glob_row,nt + integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner + ! For 3D partition + ! Note: integer control variables going directly into an MPI call + ! must be 4 bytes, i.e. psb_mpk_ + integer(psb_mpk_) :: npdims(3), npp, minfo + integer(psb_ipk_) :: npx,npy,npz, iamx,iamy,iamz,mynx,myny,mynz + integer(psb_ipk_), allocatable :: bndx(:),bndy(:),bndz(:) + ! Process grid + integer(psb_ipk_) :: np, iam + integer(psb_ipk_) :: icoeff + integer(psb_lpk_), allocatable :: irow(:),icol(:),myidx(:) + real(psb_dpk_), allocatable :: val(:) + ! deltah dimension of each grid cell + ! deltat discretization time + real(psb_dpk_) :: deltah, sqdeltah, deltah2 + real(psb_dpk_), parameter :: rhs=dzero,one=done,zero=dzero + real(psb_dpk_) :: t0, t1, t2, t3, tasb, talc, ttot, tgen, tcdasb + integer(psb_ipk_) :: err_act + procedure(d_func_3d), pointer :: f_ + logical :: tnd_ + character(len=20) :: name, ch_err,tmpfmt + + info = psb_success_ + name = 'create_matrix' + call psb_erractionsave(err_act) + + call psb_info(ctxt, iam, np) + + + if (present(f)) then + f_ => f + else + f_ => d_null_func_3d + end if + + deltah = done/(idim+2) + sqdeltah = deltah*deltah + deltah2 = (2*done)* deltah + + if (present(partition)) then + if ((1<= partition).and.(partition <= 3)) then + partition_ = partition + else + write(*,*) 'Invalid partition choice ',partition,' defaulting to 3' + partition_ = 3 + end if + else + partition_ = 3 + end if + + ! initialize array descriptor and sparse matrix storage. provide an + ! estimate of the number of non zeroes + + m = (1_psb_lpk_*idim)*idim*idim + n = m + nnz = ((n*7)/(np)) + if(iam == psb_root_) write(psb_out_unit,'("Generating Matrix (size=",i0,")...")')n + t0 = psb_wtime() + select case(partition_) + case(1) + ! A BLOCK partition + if (present(nrl)) then + nr = nrl + else + ! + ! Using a simple BLOCK distribution. + ! + nt = (m+np-1)/np + nr = max(0,min(nt,m-(iam*nt))) + end if + + nt = nr + call psb_sum(ctxt,nt) + if (nt /= m) then + write(psb_err_unit,*) iam, 'Initialization error ',nr,nt,m + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end if + + ! + ! First example of use of CDALL: specify for each process a number of + ! contiguous rows + ! + call psb_cdall(ctxt,desc_a,info,nl=nr) + myidx = desc_a%get_global_indices() + nlr = size(myidx) + + case(2) + ! A partition defined by the user through IV + + if (present(iv)) then + if (size(iv) /= m) then + write(psb_err_unit,*) iam, 'Initialization error: wrong IV size',size(iv),m + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end if + else + write(psb_err_unit,*) iam, 'Initialization error: IV not present' + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end if + + ! + ! Second example of use of CDALL: specify for each row the + ! process that owns it + ! + call psb_cdall(ctxt,desc_a,info,vg=iv) + myidx = desc_a%get_global_indices() + nlr = size(myidx) + + case(3) + ! A 3-dimensional partition + + ! A nifty MPI function will split the process list + npdims = 0 + call mpi_dims_create(np,3,npdims,info) + npx = npdims(1) + npy = npdims(2) + npz = npdims(3) + + allocate(bndx(0:npx),bndy(0:npy),bndz(0:npz)) + ! We can reuse idx2ijk for process indices as well. + call idx2ijk(iamx,iamy,iamz,iam,npx,npy,npz,base=0) + ! Now let's split the 3D cube in hexahedra + call dist1Didx(bndx,idim,npx) + mynx = bndx(iamx+1)-bndx(iamx) + call dist1Didx(bndy,idim,npy) + myny = bndy(iamy+1)-bndy(iamy) + call dist1Didx(bndz,idim,npz) + mynz = bndz(iamz+1)-bndz(iamz) + + ! How many indices do I own? + nlr = mynx*myny*mynz + allocate(myidx(nlr)) + ! Now, let's generate the list of indices I own + nr = 0 + do i=bndx(iamx),bndx(iamx+1)-1 + do j=bndy(iamy),bndy(iamy+1)-1 + do k=bndz(iamz),bndz(iamz+1)-1 + nr = nr + 1 + call ijk2idx(myidx(nr),i,j,k,idim,idim,idim) + end do + end do + end do + if (nr /= nlr) then + write(psb_err_unit,*) iam,iamx,iamy,iamz, 'Initialization error: NR vs NLR ',& + & nr,nlr,mynx,myny,mynz + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + end if + + ! + ! Third example of use of CDALL: specify for each process + ! the set of global indices it owns. + ! + call psb_cdall(ctxt,desc_a,info,vl=myidx) + + case default + write(psb_err_unit,*) iam, 'Initialization error: should not get here' + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end select + + + if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz,& + & dupl=psb_dupl_err_) + ! define rhs from boundary conditions; also build initial guess + if (info == psb_success_) call psb_geall(xv,desc_a,info) + if (info == psb_success_) call psb_geall(bv,desc_a,info) + + call psb_barrier(ctxt) + talc = psb_wtime()-t0 + + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='allocation rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + ! we build an auxiliary matrix consisting of one row at a + ! time; just a small matrix. might be extended to generate + ! a bunch of rows per call. + ! + allocate(val(20*nb),irow(20*nb),& + &icol(20*nb),stat=info) + if (info /= psb_success_ ) then + info=psb_err_alloc_dealloc_ + call psb_errpush(info,name) + goto 9999 + endif + + + ! loop over rows belonging to current process in a block + ! distribution. + + call psb_barrier(ctxt) + t1 = psb_wtime() + do ii=1, nlr,nb + ib = min(nb,nlr-ii+1) + icoeff = 1 + do k=1,ib + i=ii+k-1 + ! local matrix pointer + glob_row=myidx(i) + ! compute gridpoint coordinates + call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim) + ! x, y, z coordinates + x = (ix-1)*deltah + y = (iy-1)*deltah + z = (iz-1)*deltah + zt(k) = f_(x,y,z) + ! internal point: build discretization + ! + ! term depending on (x-1,y,z) + ! + val(icoeff) = -a1(x,y,z)/sqdeltah-b1(x,y,z)/deltah2 + if (ix == 1) then + zt(k) = g(dzero,y,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix-1,iy,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x,y-1,z) + val(icoeff) = -a2(x,y,z)/sqdeltah-b2(x,y,z)/deltah2 + if (iy == 1) then + zt(k) = g(x,dzero,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x,y,z-1) + val(icoeff)=-a3(x,y,z)/sqdeltah-b3(x,y,z)/deltah2 + if (iz == 1) then + zt(k) = g(x,y,dzero)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + + ! term depending on (x,y,z) + val(icoeff)=(2*done)*(a1(x,y,z)+a2(x,y,z)+a3(x,y,z))/sqdeltah & + & + c(x,y,z) + call ijk2idx(icol(icoeff),ix,iy,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + ! term depending on (x,y,z+1) + val(icoeff)=-a3(x,y,z)/sqdeltah+b3(x,y,z)/deltah2 + if (iz == idim) then + zt(k) = g(x,y,done)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy,iz+1,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x,y+1,z) + val(icoeff)=-a2(x,y,z)/sqdeltah+b2(x,y,z)/deltah2 + if (iy == idim) then + zt(k) = g(x,done,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy+1,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x+1,y,z) + val(icoeff)=-a1(x,y,z)/sqdeltah+b1(x,y,z)/deltah2 + if (ix==idim) then + zt(k) = g(done,y,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix+1,iy,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + + end do + call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info) + if(info /= psb_success_) exit + call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info) + if(info /= psb_success_) exit + zt(:)=dzero + call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info) + if(info /= psb_success_) exit + end do + + tgen = psb_wtime()-t1 + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='insert rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + deallocate(val,irow,icol) + + call psb_barrier(ctxt) + t1 = psb_wtime() + call psb_cdasb(desc_a,info,mold=imold) + tcdasb = psb_wtime()-t1 + call psb_barrier(ctxt) + t1 = psb_wtime() + if (info == psb_success_) then + if (present(amold)) then + call psb_spasb(a,desc_a,info,mold=amold,bld_and=tnd) + else + call psb_spasb(a,desc_a,info,afmt=afmt,bld_and=tnd) + end if + end if + call psb_barrier(ctxt) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='asb rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if (info == psb_success_) call psb_geasb(xv,desc_a,info,mold=vmold) + if (info == psb_success_) call psb_geasb(bv,desc_a,info,mold=vmold) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='asb rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + tasb = psb_wtime()-t1 + call psb_barrier(ctxt) + ttot = psb_wtime() - t0 + + call psb_amx(ctxt,talc) + call psb_amx(ctxt,tgen) + call psb_amx(ctxt,tasb) + call psb_amx(ctxt,ttot) + if(iam == psb_root_) then + tmpfmt = a%get_fmt() + write(psb_out_unit,'("The matrix has been generated and assembled in ",a3," format.")')& + & tmpfmt + write(psb_out_unit,'("-allocation time : ",es12.5)') talc + write(psb_out_unit,'("-coeff. gen. time : ",es12.5)') tgen + write(psb_out_unit,'("-desc asbly time : ",es12.5)') tcdasb + write(psb_out_unit,'("- mat asbly time : ",es12.5)') tasb + write(psb_out_unit,'("-total time : ",es12.5)') ttot + + end if + call psb_erractionrestore(err_act) + return + + 9999 call psb_error_handler(ctxt,err_act) + + return + end subroutine psb_d_gen_pde3d + + +end module psb_d_pde3d_mod + + + +program test + use psb_base_mod + use psb_ext_mod + use psb_oacc_mod + use psb_d_pde3d_mod + + implicit none + integer(psb_ipk_) :: n, i, info, m, nrm, nz + integer(psb_ipk_), parameter :: ntests=80, ngpu=20 + real(psb_dpk_) :: dot_dev, dot_host + type(psb_d_vect_oacc) :: tx, ty + type(psb_d_oacc_csr_sparse_mat) :: aacsr + real(psb_dpk_) :: t0, t1, t2, t3, csflp, elflp + double precision, external :: etime + + type(psb_dspmat_type) :: a + type(psb_desc_type) :: desc_a + type(psb_d_vect_type) :: xxv, bv + type(psb_d_csr_sparse_mat) :: acsr + character(len=5) :: afmt='csr' + real(psb_dpk_), allocatable :: vv(:), ydev(:), yhost(:) + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: iam, np, nth, idim + integer(psb_epk_) :: neq + + call psb_init(ctxt) + call psb_info(ctxt, iam, np) + + write(*,*) 'Enter size :' + read(*,*) idim + idim = max(1, idim) + + n = idim**3 + call psb_gen_pde3d(ctxt, idim, a, bv, xxv, desc_a, afmt, info) + call a%cp_to(acsr) + m = acsr%get_nrows() + n = acsr%get_ncols() + nz = acsr%get_nzeros() + call aacsr%all(m, n, nz, info) + aacsr%val = (acsr%val) + aacsr%ja = (acsr%ja) + aacsr%irp = (acsr%irp) + call aacsr%set_host() + call aacsr%sync() + + call initialize(n) + + call to_host() + t2 = etime() + do i = 1, ntests + dot_host = h_dot(n) + end do + t3 = etime() + + call tx%all(n, info) + call ty%all(n, info) + vv = bv%get_vect() + call bv%set_vect(v1) + call tx%set_vect(v1) + call ty%set_vect(v2) + t0 = etime() + do i = 1, ntests * ngpu + dot_dev = tx%dot_v(n, ty) + end do + !$acc wait + t1 = etime() + write(*,*) ' Dot Results : dev:', dot_dev, ' host:', dot_host + write(*,*) ' Timing : dev:', t1 - t0, (t1 - t0) / (ntests * ngpu), & + ' host:', t3 - t2, (t3 - t2) / ntests + + call a%mv_from(acsr) + t2 = etime() + do i = 1, ntests + call a%spmm(done, bv, dzero, xxv, info) + end do + t3 = etime() + yhost = xxv%get_vect() + t0 = etime() + do i = 1, ntests * ngpu + call aacsr%vect_mv(done, tx, dzero, ty, info) + end do + !$acc wait + t1 = etime() + ydev = ty%get_vect() + write(*,*) 'Correctness check: ', maxval(abs(ydev(:) - yhost(:))) + write(*,*) ' CSR PROD ' + write(*, '(2(a,f12.3,2x))') ' Timing (ms): ' + csflp = 2.d0 * nz / ((t1 - t0) / (ntests * ngpu)) + write(*, '(2(a,f12.3,2x))') ' dev:', 1e3 * (t1 - t0) / (ntests * ngpu), ' :', csflp / 1.d6 + csflp = 2.d0 * nz / ((t3 - t2) / (ntests)) + write(*, '(2(a,f12.3,2x))') ' host:', 1e3 * (t3 - t2) / ntests, ' :', csflp / 1.d6 + write(*,*) 'Done' + + call tx%free(info) + call ty%free(info) + call finalize_dev() + call finalize_host() + call psb_exit(ctxt) +end program test diff --git a/test/openacc/timers.c b/test/openacc/timers.c new file mode 100644 index 00000000..12fa4f56 --- /dev/null +++ b/test/openacc/timers.c @@ -0,0 +1,97 @@ +#include +#include +#include + +double wtime() +{ + struct timeval tt; + struct timezone tz; + double temp; + if (gettimeofday(&tt,&tz) != 0) { + fprintf(stderr,"Fatal error for gettimeofday ??? \n"); + exit(-1); + } + temp = ((double)tt.tv_sec)*1.0e3 + ((double)tt.tv_usec)*1.0e-3; + return(temp); +} + +double timef_() +{ + struct timeval tt; + struct timezone tz; + double temp; + if (gettimeofday(&tt,&tz) != 0) { + fprintf(stderr,"Fatal error for gettimeofday ??? \n"); + exit(-1); + } + temp = ((double)tt.tv_sec)*1.0e3 + ((double)tt.tv_usec)*1.0e-3; + return(temp); +} + +double timef() +{ + struct timeval tt; + struct timezone tz; + double temp; + if (gettimeofday(&tt,&tz) != 0) { + fprintf(stderr,"Fatal error for gettimeofday ??? \n"); + exit(-1); + } + temp = ((double)tt.tv_sec)*1.0e3 + ((double)tt.tv_usec)*1.0e-3; + return(temp); +} + +double etime() +{ + struct timeval tt; + struct timezone tz; + double temp; + if (gettimeofday(&tt,&tz) != 0) { + fprintf(stderr,"Fatal error for gettimeofday ??? \n"); + exit(-1); + } + temp = ((double)tt.tv_sec) + ((double)tt.tv_usec)*1.0e-6; + return(temp); +} + +double etime_() +{ + struct timeval tt; + struct timezone tz; + double temp; + if (gettimeofday(&tt,&tz) != 0) { + fprintf(stderr,"Fatal error for gettimeofday ??? \n"); + exit(-1); + } + temp = ((double)tt.tv_sec) + ((double)tt.tv_usec)*1.0e-6; + return(temp); +} + +double etimef() +{ + struct timeval tt; + struct timezone tz; + double temp; + if (gettimeofday(&tt,&tz) != 0) { + fprintf(stderr,"Fatal error for gettimeofday ??? \n"); + exit(-1); + } + temp = ((double)tt.tv_sec) + ((double)tt.tv_usec)*1.0e-6; + return(temp); +} + +double etimef_() +{ + struct timeval tt; + struct timezone tz; + double temp; + if (gettimeofday(&tt,&tz) != 0) { + fprintf(stderr,"Fatal error for gettimeofday ??? \n"); + exit(-1); + } + temp = ((double)tt.tv_sec) + ((double)tt.tv_usec)*1.0e-6; + return(temp); +} + + + diff --git a/test/openacc/vectoacc.F90 b/test/openacc/vectoacc.F90 new file mode 100644 index 00000000..b50fa317 --- /dev/null +++ b/test/openacc/vectoacc.F90 @@ -0,0 +1,85 @@ +program vectoacc + use psb_base_mod + use psb_oacc_mod + implicit none + + type(psb_d_vect_oacc) :: v3, v4, v5 + integer(psb_ipk_) :: info, n, i + real(psb_dpk_) :: alpha, beta, result + double precision, external :: etime + + real(psb_dpk_) :: dot_host, dot_dev, t_host, t_dev + double precision :: time_start, time_end + integer(psb_ipk_), parameter :: ntests=80, ngpu=20 + + write(*, *) 'Test of the vector operations with OpenACC' + + write(*, *) 'Enter the size of the vectors' + read(*, *) n + alpha = 2.0 + beta = 0.5 + + call v3%all(n, info) + call v4%all(n, info) + call v5%all(n, info) + + do i = 1, n + v3%v(i) = real(i, psb_dpk_) + v4%v(i) = real(n - i, psb_dpk_) + end do + + call v3%set_dev() + call v4%set_dev() + + call v3%scal(alpha) + call v3%sync() + + do i = 1, n + if (v3%v(i) /= alpha * real(i, psb_dpk_)) then + write(*, *) 'Scal error : index', i + end if + end do + write(*, *) 'Scal test passed' + + result = v3%dot_v(n, v4) + call v3%sync() + call v4%sync() + if (result /= sum(v3%v * v4%v)) then + write(*, *) 'Dot_v error, expected result:', sum(v3%v * v4%v), 'instead of :', result + end if + write(*, *) 'Dot_v test passed' + + result = v3%nrm2(n) + call v3%sync() + if (result /= sqrt(sum(v3%v ** 2))) then + write(*, *) 'nrm2 error, expected result:', sqrt(sum(v3%v ** 2)), 'instead of :', result + end if + write(*, *) 'nrm2 test passed' + + call v3%set_host() + call v4%set_host() + + time_start = etime() + do i = 1, ntests + dot_host = sum(v3%v * v4%v) + end do + time_end = etime() + t_host = (time_end - time_start) / real(ntests) + write(*, *) 'Performance host: ', t_host, ' sec' + + call v3%set_dev() + call v4%set_dev() + time_start = etime() + do i = 1, ntests + dot_dev = v3%dot_v(n, v4) + end do + !$acc wait + time_end = etime() + t_dev = (time_end - time_start) / real(ntests) + write(*, *) 'Performance device: ', t_dev, ' sec' + + call v3%free(info) + call v4%free(info) + call v5%free(info) + +end program vectoacc From e3a3e39caf31e8c28b0bf4267a06b6d4e9963d93 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 9 Jul 2024 10:03:30 +0200 Subject: [PATCH 02/39] Modify configure --enable-openacc --with-fcopenacc=..... --- Make.inc.in | 7 +++ Makefile | 11 ++++- config/pac.m4 | 44 ++++++++++++++++++ configure | 123 ++++++++++++++++++++++++++++++++++++++++++++++++++ configure.ac | 21 +++++++++ 5 files changed, 204 insertions(+), 2 deletions(-) diff --git a/Make.inc.in b/Make.inc.in index 858ad336..38c8ef86 100755 --- a/Make.inc.in +++ b/Make.inc.in @@ -67,6 +67,12 @@ UTILMODNAME=@UTILMODNAME@ CBINDLIBNAME=libpsb_cbind.a +OACCD=@OACCD@ +OACCLD=@OACCLD@ +FCOPENACC=@FCOPENACC@ +CCOPENACC=@CCOPENACC@ +CXXOPENACC=@CXXOPENACC@ + CUDAD=@CUDAD@ CUDALD=@CUDALD@ LCUDA=@LCUDA@ @@ -82,6 +88,7 @@ CUDA_SHORT_VERSION=@CUDA_SHORT_VERSION@ NVCC=@CUDA_NVCC@ CUDEFINES=@CUDEFINES@ + .SUFFIXES: .cu .cu.o: $(NVCC) $(CINCLUDES) $(CDEFINES) $(CUDEFINES) -c $< diff --git a/Makefile b/Makefile index 95f4cb17..972fd3c6 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ include Make.inc -all: dirs based precd kryld utild cbindd extd $(CUDAD) libd +all: dirs based precd kryld utild cbindd extd $(CUDAD) $(OACCD) libd @echo "=====================================" @echo "PSBLAS libraries Compilation Successful." @@ -14,9 +14,10 @@ utild: based kryld: precd extd: based cudad: extd +oaccd: extd cbindd: based precd kryld utild -libd: based precd kryld utild cbindd extd $(CUDALD) +libd: based precd kryld utild cbindd extd $(CUDALD) $(OACCLD) $(MAKE) -C base lib $(MAKE) -C prec lib $(MAKE) -C krylov lib @@ -25,6 +26,8 @@ libd: based precd kryld utild cbindd extd $(CUDALD) $(MAKE) -C ext lib cudald: cudad $(MAKE) -C cuda lib +oaccld: oaccd + $(MAKE) -C openacc lib based: @@ -41,6 +44,8 @@ extd: based $(MAKE) -C ext objs cudad: based extd $(MAKE) -C cuda objs +oaccd: based extd + $(MAKE) -C openacc objs install: all @@ -67,6 +72,7 @@ clean: $(MAKE) -C cbind clean $(MAKE) -C ext clean $(MAKE) -C cuda clean + $(MAKE) -C openacc clean check: all make check -C test/serial @@ -84,6 +90,7 @@ veryclean: cleanlib cd cbind && $(MAKE) veryclean cd ext && $(MAKE) veryclean cd cuda && $(MAKE) veryclean + cd openacc && $(MAKE) veryclean cd test/fileread && $(MAKE) clean cd test/pargen && $(MAKE) clean cd test/util && $(MAKE) clean diff --git a/config/pac.m4 b/config/pac.m4 index 0d22392f..d74a6f24 100644 --- a/config/pac.m4 +++ b/config/pac.m4 @@ -2267,3 +2267,47 @@ CPPFLAGS="$SAVE_CPPFLAGS" ])dnl + +dnl @synopsis PAC_ARG_OPENACC +dnl +dnl Test for --enable-openacc +dnl +dnl +dnl +dnl Example use: +dnl +dnl +dnl @author Salvatore Filippone +dnl +AC_DEFUN([PAC_ARG_OPENACC], +[AC_MSG_CHECKING([whether we want openacc ]) +AC_ARG_ENABLE(openacc, +AS_HELP_STRING([--enable-openacc], +[Specify whether to enable openacc. ]), +[ +pac_cv_openacc="yes"; +] +dnl , +dnl [pac_cv_openacc="no";] + ) +if test x"$pac_cv_openacc" == x"yes" ; then + AC_MSG_RESULT([yes.]) +# AC_LANG_PUSH([Fortran]) +# AC_OPENACC() +# pac_cv_openacc_fcopt="$OPENACC_FCFLAGS"; +# AC_LANG_POP() +# AC_LANG_PUSH([C]) +# AC_OPENACC() +# pac_cv_openacc_ccopt="$OPENACC_CFLAGS"; +# AC_LANG_POP() +# AC_LANG_PUSH([C++]) +# AC_OPENACC() +# pac_cv_openacc_cxxopt="$OPENACC_CXXFLAGS"; +# AC_LANG_POP() +else + pac_cv_openacc="no"; + AC_MSG_RESULT([no.]) +fi +] +) + diff --git a/configure b/configure index b7e5f329..15e3670d 100755 --- a/configure +++ b/configure @@ -667,6 +667,11 @@ CUDA_DIR EXTRALDLIBS SPGPU_LIBS SPGPU_FLAGS +CXXOPENACC +CCOPENACC +FCOPENACC +OACCLD +OACCD METISINCFILE UTILLIBNAME METHDLIBNAME @@ -840,6 +845,10 @@ with_amdincdir with_amdlibdir with_cuda with_cudacc +enable_openacc +with_ccopenacc +with_cxxopenacc +with_fcopenacc ' ac_precious_vars='build_alias host_alias @@ -1490,6 +1499,7 @@ Optional Features: --disable-silent-rules verbose build output (undo: "make V=0") --enable-openmp Specify whether to enable openmp. --disable-openmp do not use OpenMP + --enable-openacc Specify whether to enable openacc. Optional Packages: --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] @@ -1535,6 +1545,12 @@ Optional Packages: --with-cuda=DIR Specify the CUDA install directory. --with-cudacc A comma-separated list of CCs to compile to, for example, --with-cudacc=50,60,70,75 + --with-ccopenacc additional [CCOPENACC] flags to be added: will + prepend to [CCOPENACC] + --with-cxxopenacc additional [CXXOPENACC] flags to be added: will + prepend to [CXXOPENACC] + --with-fcopenacc additional [FCOPENACC] flags to be added: will + prepend to [FCOPENACC] Some influential environment variables: FC Fortran compiler command @@ -10867,6 +10883,106 @@ printf "%s\n" "$as_me: For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ CUDA_LIBS=""; fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether we want openacc " >&5 +printf %s "checking whether we want openacc ... " >&6; } +# Check whether --enable-openacc was given. +if test ${enable_openacc+y} +then : + enableval=$enable_openacc; +pac_cv_openacc="yes"; + + +fi + +if test x"$pac_cv_openacc" == x"yes" ; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes." >&5 +printf "%s\n" "yes." >&6; } +# AC_LANG_PUSH([Fortran]) +# AC_OPENACC() +# pac_cv_openacc_fcopt="$OPENACC_FCFLAGS"; +# AC_LANG_POP() +# AC_LANG_PUSH([C]) +# AC_OPENACC() +# pac_cv_openacc_ccopt="$OPENACC_CFLAGS"; +# AC_LANG_POP() +# AC_LANG_PUSH([C++]) +# AC_OPENACC() +# pac_cv_openacc_cxxopt="$OPENACC_CXXFLAGS"; +# AC_LANG_POP() +else + pac_cv_openacc="no"; + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no." >&5 +printf "%s\n" "no." >&6; } +fi + + +if test x"$pac_cv_openacc" == x"yes" ; then + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether additional CCOPENACC flags should be added (should be invoked only once)" >&5 +printf %s "checking whether additional CCOPENACC flags should be added (should be invoked only once)... " >&6; } + +# Check whether --with-ccopenacc was given. +if test ${with_ccopenacc+y} +then : + withval=$with_ccopenacc; +CCOPENACC="${withval} ${CCOPENACC}" +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: CCOPENACC = ${CCOPENACC}" >&5 +printf "%s\n" "CCOPENACC = ${CCOPENACC}" >&6; } + +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + +fi + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether additional CXXOPENACC flags should be added (should be invoked only once)" >&5 +printf %s "checking whether additional CXXOPENACC flags should be added (should be invoked only once)... " >&6; } + +# Check whether --with-cxxopenacc was given. +if test ${with_cxxopenacc+y} +then : + withval=$with_cxxopenacc; +CXXOPENACC="${withval} ${CXXOPENACC}" +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: CXXOPENACC = ${CXXOPENACC}" >&5 +printf "%s\n" "CXXOPENACC = ${CXXOPENACC}" >&6; } + +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + +fi + + + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether additional FCOPENACC flags should be added (should be invoked only once)" >&5 +printf %s "checking whether additional FCOPENACC flags should be added (should be invoked only once)... " >&6; } + +# Check whether --with-fcopenacc was given. +if test ${with_fcopenacc+y} +then : + withval=$with_fcopenacc; +FCOPENACC="${withval} ${FCOPENACC}" +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: FCOPENACC = ${FCOPENACC}" >&5 +printf "%s\n" "FCOPENACC = ${FCOPENACC}" >&6; } + +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + +fi + + + + OACCD=oaccd; + OACCLD=oaccld; + + #FCOPT="$FCOPT $FCOPENACC"; + #CCOPT="$CCOPT $CCOPENACC" + #CXXOPT="$CXXOPT $CXXOPENACC" + #FLINK="$FLINK $FCOPENACC"; +fi @@ -10957,6 +11073,13 @@ FDEFINES=$(PSBFDEFINES) $(CUDA_DEFINES) + + + + + + + diff --git a/configure.ac b/configure.ac index 10f9e2f4..ed4f4acc 100755 --- a/configure.ac +++ b/configure.ac @@ -843,6 +843,20 @@ if test "x$pac_cv_ipk_size" != "x4"; then CUDA_LIBS=""; fi +PAC_ARG_OPENACC() +if test x"$pac_cv_openacc" == x"yes" ; then + PAC_ARG_WITH_FLAGS(ccopenacc,CCOPENACC) + PAC_ARG_WITH_FLAGS(cxxopenacc,CXXOPENACC) + PAC_ARG_WITH_FLAGS(fcopenacc,FCOPENACC) + + OACCD=oaccd; + OACCLD=oaccld; + + #FCOPT="$FCOPT $FCOPENACC"; + #CCOPT="$CCOPT $CCOPENACC" + #CXXOPT="$CXXOPT $CXXOPENACC" + #FLINK="$FLINK $FCOPENACC"; +fi @@ -927,6 +941,12 @@ AC_SUBST(PRECLIBNAME) AC_SUBST(METHDLIBNAME) AC_SUBST(UTILLIBNAME) AC_SUBST(METISINCFILE) +AC_SUBST(OACCD) +AC_SUBST(OACCLD) +AC_SUBST(FCOPENACC) +AC_SUBST(CCOPENACC) +AC_SUBST(CXXOPENACC) + AC_SUBST(SPGPU_FLAGS) AC_SUBST(SPGPU_LIBS) dnl AC_SUBST(SPGPU_DIR) @@ -944,6 +964,7 @@ AC_SUBST(CUDEFINES) AC_SUBST(CUDAD) AC_SUBST(CUDALD) AC_SUBST(LCUDA) + ############################################################################### # the following files will be created by Automake From 93c9df0277619a4a3d8d56d4acfd99cb717a79a4 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 9 Jul 2024 10:03:57 +0200 Subject: [PATCH 03/39] Adjust Makefiles & source --- openacc/Makefile | 61 +- openacc/psb_d_oacc_csr_mat_mod.F90 | 2 +- openacc/psb_d_oacc_vect_mod.F90 | 1702 ++++++++++++++-------------- 3 files changed, 888 insertions(+), 877 deletions(-) diff --git a/openacc/Makefile b/openacc/Makefile index 3590249c..3327610f 100644 --- a/openacc/Makefile +++ b/openacc/Makefile @@ -1,11 +1,10 @@ -.SUFFIXES: -.SUFFIXES: .F90 .f90 .o .s .c +include ../Make.inc # Compilers and flags -CC=mpicc -FC=mpif90 -FCOPT=-O0 -march=native -OFFLOAD=-fopenacc -foffload=nvptx-none="-march=sm_70" +#CC=mpicc +#FC=mpif90 +#FCOPT=-O0 -march=native +#OFFLOAD=-fopenacc -foffload=nvptx-none="-march=sm_70" # Directories LIBDIR=../lib @@ -14,35 +13,49 @@ MODDIR=../modules IMPLDIR=./impl # Adding the impl directory # Include and library paths -INCLUDES=-I$(LIBDIR) -I$(INCDIR) -I$(MODDIR) -I$(IMPLDIR) -LIBS=-L$(LIBDIR) -lpsb_util -lpsb_ext -lpsb_base -lopenblas -lmetis +INCLUDES=-I$(LIBDIR) -I$(INCDIR) -I$(MODDIR) +FINCLUDES=$(FMFLAG). $(FMFLAG)$(INCDIR) $(FMFLAG)$(MODDIR) $(FIFLAG). +CINCLUDES= +#LIBS=-L$(LIBDIR) -lpsb_util -lpsb_ext -lpsb_base -lopenblas -lmetis # Source files FOBJS= psb_i_oacc_vect_mod.o psb_d_oacc_vect_mod.o \ - psb_oacc_mod.o psb_d_oacc_csr_mat_mod.o \ - impl/psb_d_oacc_csr_vect_mv.o + psb_oacc_mod.o psb_d_oacc_csr_mat_mod.o # Library name LIBNAME=libpsb_openacc.a -# Rules -all: $(LIBNAME) +OBJS=$(COBJS) $(FOBJS) -$(LIBNAME): $(FOBJS) - ar cr $(LIBNAME) $(FOBJS) +# Rules +lib: objs ilib + ar cur $(LIBNAME) $(OBJS) /bin/cp -p $(LIBNAME) $(LIBDIR) -clean: - /bin/rm -fr *.o $(LIBNAME) *.mod impl/*.o +objs: $(OBJS) iobjs + /bin/cp -p *$(.mod) $(MODDIR) -.f90.o: - $(FC) $(FCOPT) $(OFFLOAD) $(INCLUDES) -c $< -o $@ +iobjs: $(OBJS) + $(MAKE) -C impl objs -.c.o: - $(CC) -c $< -o $@ +ilib: $(OBJS) + $(MAKE) -C impl lib -.F90.o: - $(FC) $(FCOPT) $(OFFLOAD) $(INCLUDES) -c $< -o $@ +psb_oacc_mod.o : psb_i_oacc_vect_mod.o psb_d_oacc_vect_mod.o \ + psb_d_oacc_csr_mat_mod.o +clean: cclean iclean + /bin/rm -f $(FOBJS) *$(.mod) *.a +veryclean: clean +cclean: + /bin/rm -f $(COBJS) +iclean: + $(MAKE) -C impl clean -.F90.s: - $(FC) $(FCOPT) $(INCLUDES) -c -S $< -o $@ +.c.o: + $(CC) $(CCOPT) $(CCOPENACC) $(CINCLUDES) $(CDEFINES) -c $< -o $@ +.f90.o: + $(FC) $(FCOPT) $(FCOPENACC) $(FINCLUDES) -c $< -o $@ +.F90.o: + $(FC) $(FCOPT) $(FCOPENACC) $(FINCLUDES) $(FDEFINES) -c $< -o $@ +.cpp.o: + $(CXX) $(CXXOPT) $(CXXOPENACC) $(CXXINCLUDES) $(CXXDEFINES) -c $< -o $@ diff --git a/openacc/psb_d_oacc_csr_mat_mod.F90 b/openacc/psb_d_oacc_csr_mat_mod.F90 index c6b7e3d0..d9c8a2a6 100644 --- a/openacc/psb_d_oacc_csr_mat_mod.F90 +++ b/openacc/psb_d_oacc_csr_mat_mod.F90 @@ -3,7 +3,7 @@ module psb_d_oacc_csr_mat_mod use iso_c_binding use psb_d_mat_mod use psb_d_oacc_vect_mod - use oaccsparse_mod + !use oaccsparse_mod integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 diff --git a/openacc/psb_d_oacc_vect_mod.F90 b/openacc/psb_d_oacc_vect_mod.F90 index ac5428f3..eda804ce 100644 --- a/openacc/psb_d_oacc_vect_mod.F90 +++ b/openacc/psb_d_oacc_vect_mod.F90 @@ -3,9 +3,9 @@ module psb_d_oacc_vect_mod use psb_const_mod use psb_error_mod use psb_d_vect_mod - use psb_i_oacc_vect_mod use psb_i_vect_mod - + use psb_i_oacc_vect_mod + integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 @@ -14,49 +14,49 @@ module psb_d_oacc_vect_mod integer :: state = is_host contains - procedure, pass(x) :: get_nrows => d_oacc_get_nrows - procedure, nopass :: get_fmt => d_oacc_get_fmt - - procedure, pass(x) :: all => d_oacc_vect_all - procedure, pass(x) :: zero => d_oacc_zero - procedure, pass(x) :: asb_m => d_oacc_asb_m +!!$ procedure, pass(x) :: get_nrows => d_oacc_get_nrows +!!$ procedure, nopass :: get_fmt => d_oacc_get_fmt +!!$ +!!$ procedure, pass(x) :: all => d_oacc_vect_all +!!$ procedure, pass(x) :: zero => d_oacc_zero +!!$ procedure, pass(x) :: asb_m => d_oacc_asb_m procedure, pass(x) :: sync => d_oacc_sync procedure, pass(x) :: sync_space => d_oacc_sync_space - procedure, pass(x) :: bld_x => d_oacc_bld_x - procedure, pass(x) :: bld_mn => d_oacc_bld_mn - procedure, pass(x) :: free => d_oacc_vect_free - procedure, pass(x) :: ins_a => d_oacc_ins_a - procedure, pass(x) :: ins_v => d_oacc_ins_v +!!$ procedure, pass(x) :: bld_x => d_oacc_bld_x +!!$ procedure, pass(x) :: bld_mn => d_oacc_bld_mn +!!$ procedure, pass(x) :: free => d_oacc_vect_free +!!$ procedure, pass(x) :: ins_a => d_oacc_ins_a +!!$ procedure, pass(x) :: ins_v => d_oacc_ins_v procedure, pass(x) :: is_host => d_oacc_is_host procedure, pass(x) :: is_dev => d_oacc_is_dev procedure, pass(x) :: is_sync => d_oacc_is_sync procedure, pass(x) :: set_host => d_oacc_set_host procedure, pass(x) :: set_dev => d_oacc_set_dev procedure, pass(x) :: set_sync => d_oacc_set_sync - procedure, pass(x) :: set_scal => d_oacc_set_scal - - procedure, pass(x) :: gthzv_x => d_oacc_gthzv_x - procedure, pass(x) :: gthzbuf_x => d_oacc_gthzbuf - procedure, pass(y) :: sctb => d_oacc_sctb - procedure, pass(y) :: sctb_x => d_oacc_sctb_x - procedure, pass(y) :: sctb_buf => d_oacc_sctb_buf - - procedure, pass(x) :: get_size => oacc_get_size - procedure, pass(x) :: dot_v => d_oacc_vect_dot - procedure, pass(x) :: dot_a => d_oacc_dot_a - procedure, pass(y) :: axpby_v => d_oacc_axpby_v - procedure, pass(y) :: axpby_a => d_oacc_axpby_a - procedure, pass(z) :: abgdxyz => d_oacc_abgdxyz - procedure, pass(y) :: mlt_v => d_oacc_mlt_v - procedure, pass(y) :: mlt_a => d_oacc_mlt_a - procedure, pass(z) :: mlt_a_2 => d_oacc_mlt_a_2 - procedure, pass(z) :: mlt_v_2 => d_oacc_mlt_v_2 - procedure, pass(x) :: scal => d_oacc_scal - procedure, pass(x) :: nrm2 => d_oacc_nrm2 - procedure, pass(x) :: amax => d_oacc_amax - procedure, pass(x) :: asum => d_oacc_asum +!!$ procedure, pass(x) :: set_scal => d_oacc_set_scal +!!$ +!!$ procedure, pass(x) :: gthzv_x => d_oacc_gthzv_x +!!$ procedure, pass(x) :: gthzbuf_x => d_oacc_gthzbuf +!!$ procedure, pass(y) :: sctb => d_oacc_sctb +!!$ procedure, pass(y) :: sctb_x => d_oacc_sctb_x +!!$ procedure, pass(y) :: sctb_buf => d_oacc_sctb_buf +!!$ +!!$ procedure, pass(x) :: get_size => oacc_get_size +!!$ procedure, pass(x) :: dot_v => d_oacc_vect_dot +!!$ procedure, pass(x) :: dot_a => d_oacc_dot_a +!!$ procedure, pass(y) :: axpby_v => d_oacc_axpby_v +!!$ procedure, pass(y) :: axpby_a => d_oacc_axpby_a +!!$ procedure, pass(z) :: abgdxyz => d_oacc_abgdxyz +!!$ procedure, pass(y) :: mlt_v => d_oacc_mlt_v +!!$ procedure, pass(y) :: mlt_a => d_oacc_mlt_a +!!$ procedure, pass(z) :: mlt_a_2 => d_oacc_mlt_a_2 +!!$ procedure, pass(z) :: mlt_v_2 => d_oacc_mlt_v_2 +!!$ procedure, pass(x) :: scal => d_oacc_scal +!!$ procedure, pass(x) :: nrm2 => d_oacc_nrm2 +!!$ procedure, pass(x) :: amax => d_oacc_amax +!!$ procedure, pass(x) :: asum => d_oacc_asum procedure, pass(x) :: absval1 => d_oacc_absval1 - procedure, pass(x) :: absval2 => d_oacc_absval2 +!!$ procedure, pass(x) :: absval2 => d_oacc_absval2 end type psb_d_vect_oacc @@ -78,729 +78,727 @@ contains call x%set_dev() end subroutine d_oacc_absval1 - subroutine d_oacc_absval2(x, y) - implicit none - class(psb_d_vect_oacc), intent(inout) :: x - class(psb_d_base_vect_type), intent(inout) :: y - integer(psb_ipk_) :: n - integer(psb_ipk_) :: i - - n = min(size(x%v), size(y%v)) - select type (yy => y) - class is (psb_d_vect_oacc) - if (x%is_host()) call x%sync() - if (yy%is_host()) call yy%sync() - !$acc parallel loop - do i = 1, n - yy%v(i) = abs(x%v(i)) - end do - class default - if (x%is_dev()) call x%sync() - if (y%is_dev()) call y%sync() - call x%psb_d_base_vect_type%absval(y) - end select - end subroutine d_oacc_absval2 - - - - subroutine d_oacc_scal(alpha, x) - implicit none - class(psb_d_vect_oacc), intent(inout) :: x - real(psb_dpk_), intent(in) :: alpha - integer(psb_ipk_) :: info - integer(psb_ipk_) :: i - - if (x%is_host()) call x%sync_space() - !$acc parallel loop - do i = 1, size(x%v) - x%v(i) = alpha * x%v(i) - end do - call x%set_dev() - end subroutine d_oacc_scal - - function d_oacc_nrm2(n, x) result(res) - implicit none - class(psb_d_vect_oacc), intent(inout) :: x - integer(psb_ipk_), intent(in) :: n - real(psb_dpk_) :: res - integer(psb_ipk_) :: info - real(psb_dpk_) :: sum - integer(psb_ipk_) :: i - - if (x%is_host()) call x%sync_space() - sum = 0.0 - !$acc parallel loop reduction(+:sum) - do i = 1, n - sum = sum + x%v(i) * x%v(i) - end do - res = sqrt(sum) - end function d_oacc_nrm2 - - function d_oacc_amax(n, x) result(res) - implicit none - class(psb_d_vect_oacc), intent(inout) :: x - integer(psb_ipk_), intent(in) :: n - real(psb_dpk_) :: res - integer(psb_ipk_) :: info - real(psb_dpk_) :: max_val - integer(psb_ipk_) :: i - - if (x%is_host()) call x%sync_space() - max_val = -huge(0.0) - !$acc parallel loop reduction(max:max_val) - do i = 1, n - if (x%v(i) > max_val) max_val = x%v(i) - end do - res = max_val - end function d_oacc_amax - - function d_oacc_asum(n, x) result(res) - implicit none - class(psb_d_vect_oacc), intent(inout) :: x - integer(psb_ipk_), intent(in) :: n - real(psb_dpk_) :: res - integer(psb_ipk_) :: info - real(psb_dpk_) :: sum - integer(psb_ipk_) :: i - - if (x%is_host()) call x%sync_space() - sum = 0.0 - !$acc parallel loop reduction(+:sum) - do i = 1, n - sum = sum + abs(x%v(i)) - end do - res = sum - end function d_oacc_asum - - - subroutine d_oacc_mlt_v(x, y, info) - use psi_serial_mod - implicit none - class(psb_d_base_vect_type), intent(inout) :: x - class(psb_d_vect_oacc), intent(inout) :: y - integer(psb_ipk_), intent(out) :: info - - integer(psb_ipk_) :: i, n - - info = 0 - n = min(x%get_nrows(), y%get_nrows()) - select type(xx => x) - type is (psb_d_base_vect_type) - if (y%is_dev()) call y%sync() - !$acc parallel loop - do i = 1, n - y%v(i) = y%v(i) * xx%v(i) - end do - call y%set_host() - class default - if (xx%is_dev()) call xx%sync() - if (y%is_dev()) call y%sync() - !$acc parallel loop - do i = 1, n - y%v(i) = y%v(i) * xx%v(i) - end do - call y%set_host() - end select - end subroutine d_oacc_mlt_v - - subroutine d_oacc_mlt_a(x, y, info) - use psi_serial_mod - implicit none - real(psb_dpk_), intent(in) :: x(:) - class(psb_d_vect_oacc), intent(inout) :: y - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: i, n - - info = 0 - if (y%is_dev()) call y%sync_space() - !$acc parallel loop - do i = 1, size(x) - y%v(i) = y%v(i) * x(i) - end do - call y%set_host() - end subroutine d_oacc_mlt_a - - subroutine d_oacc_mlt_a_2(alpha, x, y, beta, z, info) - use psi_serial_mod - implicit none - real(psb_dpk_), intent(in) :: alpha, beta - real(psb_dpk_), intent(in) :: x(:) - real(psb_dpk_), intent(in) :: y(:) - class(psb_d_vect_oacc), intent(inout) :: z - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: i, n - - info = 0 - if (z%is_dev()) call z%sync_space() - !$acc parallel loop - do i = 1, size(x) - z%v(i) = alpha * x(i) * y(i) + beta * z%v(i) - end do - call z%set_host() - end subroutine d_oacc_mlt_a_2 - - subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) - use psi_serial_mod - use psb_string_mod - implicit none - real(psb_dpk_), intent(in) :: alpha, beta - class(psb_d_base_vect_type), intent(inout) :: x - class(psb_d_base_vect_type), intent(inout) :: y - class(psb_d_vect_oacc), intent(inout) :: z - integer(psb_ipk_), intent(out) :: info - character(len=1), intent(in), optional :: conjgx, conjgy - integer(psb_ipk_) :: i, n - logical :: conjgx_, conjgy_ - - conjgx_ = .false. - conjgy_ = .false. - if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C') - if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C') - - n = min(x%get_nrows(), y%get_nrows(), z%get_nrows()) - - info = 0 - select type(xx => x) - class is (psb_d_vect_oacc) - select type (yy => y) - class is (psb_d_vect_oacc) - if (xx%is_host()) call xx%sync_space() - if (yy%is_host()) call yy%sync_space() - if ((beta /= dzero) .and. (z%is_host())) call z%sync_space() - !$acc parallel loop - do i = 1, n - z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) - end do - call z%set_dev() - class default - if (xx%is_dev()) call xx%sync_space() - if (yy%is_dev()) call yy%sync() - if ((beta /= dzero) .and. (z%is_dev())) call z%sync_space() - !$acc parallel loop - do i = 1, n - z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) - end do - call z%set_host() - end select - class default - if (x%is_dev()) call x%sync() - if (y%is_dev()) call y%sync() - if ((beta /= dzero) .and. (z%is_dev())) call z%sync_space() - !$acc parallel loop - do i = 1, n - z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i) - end do - call z%set_host() - end select - end subroutine d_oacc_mlt_v_2 - - - subroutine d_oacc_axpby_v(m, alpha, x, beta, y, info) - use psi_serial_mod - implicit none - integer(psb_ipk_), intent(in) :: m - class(psb_d_base_vect_type), intent(inout) :: x - class(psb_d_vect_oacc), intent(inout) :: y - real(psb_dpk_), intent(in) :: alpha, beta - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: nx, ny, i - - info = psb_success_ - - select type(xx => x) - type is (psb_d_vect_oacc) - if ((beta /= dzero) .and. y%is_host()) call y%sync_space() - if (xx%is_host()) call xx%sync_space() - nx = size(xx%v) - ny = size(y%v) - if ((nx < m) .or. (ny < m)) then - info = psb_err_internal_error_ - else - !$acc parallel loop - do i = 1, m - y%v(i) = alpha * xx%v(i) + beta * y%v(i) - end do - end if - call y%set_dev() - class default - if ((alpha /= dzero) .and. (x%is_dev())) call x%sync() - call y%axpby(m, alpha, x%v, beta, info) - end select - end subroutine d_oacc_axpby_v - - subroutine d_oacc_axpby_a(m, alpha, x, beta, y, info) - use psi_serial_mod - implicit none - integer(psb_ipk_), intent(in) :: m - real(psb_dpk_), intent(in) :: x(:) - class(psb_d_vect_oacc), intent(inout) :: y - real(psb_dpk_), intent(in) :: alpha, beta - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: i - - if ((beta /= dzero) .and. (y%is_dev())) call y%sync_space() - !$acc parallel loop - do i = 1, m - y%v(i) = alpha * x(i) + beta * y%v(i) - end do - call y%set_host() - end subroutine d_oacc_axpby_a - - subroutine d_oacc_abgdxyz(m, alpha, beta, gamma, delta, x, y, z, info) - use psi_serial_mod - implicit none - integer(psb_ipk_), intent(in) :: m - class(psb_d_base_vect_type), intent(inout) :: x - class(psb_d_base_vect_type), intent(inout) :: y - class(psb_d_vect_oacc), intent(inout) :: z - real(psb_dpk_), intent(in) :: alpha, beta, gamma, delta - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: nx, ny, nz, i - logical :: gpu_done - - info = psb_success_ - gpu_done = .false. - - select type(xx => x) - class is (psb_d_vect_oacc) - select type(yy => y) - class is (psb_d_vect_oacc) - select type(zz => z) - class is (psb_d_vect_oacc) - if ((beta /= dzero) .and. yy%is_host()) call yy%sync_space() - if ((delta /= dzero) .and. zz%is_host()) call zz%sync_space() - if (xx%is_host()) call xx%sync_space() - nx = size(xx%v) - ny = size(yy%v) - nz = size(zz%v) - if ((nx < m) .or. (ny < m) .or. (nz < m)) then - info = psb_err_internal_error_ - else - !$acc parallel loop - do i = 1, m - yy%v(i) = alpha * xx%v(i) + beta * yy%v(i) - zz%v(i) = gamma * yy%v(i) + delta * zz%v(i) - end do - end if - call yy%set_dev() - call zz%set_dev() - gpu_done = .true. - end select - end select - end select - - if (.not. gpu_done) then - if (x%is_host()) call x%sync() - if (y%is_host()) call y%sync() - if (z%is_host()) call z%sync() - call y%axpby(m, alpha, x, beta, info) - call z%axpby(m, gamma, y, delta, info) - end if - end subroutine d_oacc_abgdxyz - - - subroutine d_oacc_sctb_buf(i, n, idx, beta, y) - use psb_base_mod - implicit none - integer(psb_ipk_) :: i, n - class(psb_i_base_vect_type) :: idx - real(psb_dpk_) :: beta - class(psb_d_vect_oacc) :: y - integer(psb_ipk_) :: info - - if (.not.allocated(y%combuf)) then - call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') - return - end if - - select type(ii => idx) - class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space(info) - if (y%is_host()) call y%sync_space() - - !$acc parallel loop - do i = 1, n - y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i) - end do - - class default - !$acc parallel loop - do i = 1, n - y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i) - end do - end select - end subroutine d_oacc_sctb_buf - - subroutine d_oacc_sctb_x(i, n, idx, x, beta, y) - use psb_base_mod - implicit none - integer(psb_ipk_):: i, n - class(psb_i_base_vect_type) :: idx - real(psb_dpk_) :: beta, x(:) - class(psb_d_vect_oacc) :: y - integer(psb_ipk_) :: info, ni - - select type(ii => idx) - class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space(info) - class default - call psb_errpush(info, 'd_oacc_sctb_x') - return - end select - - if (y%is_host()) call y%sync_space() - - !$acc parallel loop - do i = 1, n - y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i) - end do - - call y%set_dev() - end subroutine d_oacc_sctb_x - - - - subroutine d_oacc_sctb(n, idx, x, beta, y) - use psb_base_mod - implicit none - integer(psb_ipk_) :: n - integer(psb_ipk_) :: idx(:) - real(psb_dpk_) :: beta, x(:) - class(psb_d_vect_oacc) :: y - integer(psb_ipk_) :: info - integer(psb_ipk_) :: i - - if (n == 0) return - if (y%is_dev()) call y%sync_space() - - !$acc parallel loop - do i = 1, n - y%v(idx(i)) = beta * y%v(idx(i)) + x(i) - end do - - call y%set_host() - end subroutine d_oacc_sctb - - - subroutine d_oacc_gthzbuf(i, n, idx, x) - use psb_base_mod - implicit none - integer(psb_ipk_) :: i, n - class(psb_i_base_vect_type) :: idx - class(psb_d_vect_oacc) :: x - integer(psb_ipk_) :: info - - info = 0 - if (.not.allocated(x%combuf)) then - call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') - return - end if - - select type(ii => idx) - class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space(info) - class default - call psb_errpush(info, 'd_oacc_gthzbuf') - return - end select - - if (x%is_host()) call x%sync_space() - - !$acc parallel loop - do i = 1, n - x%combuf(i) = x%v(idx%v(i)) - end do - end subroutine d_oacc_gthzbuf - - subroutine d_oacc_gthzv_x(i, n, idx, x, y) - use psb_base_mod - implicit none - integer(psb_ipk_) :: i, n - class(psb_i_base_vect_type):: idx - real(psb_dpk_) :: y(:) - class(psb_d_vect_oacc):: x - integer(psb_ipk_) :: info - - info = 0 - - select type(ii => idx) - class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space(info) - class default - call psb_errpush(info, 'd_oacc_gthzv_x') - return - end select - - if (x%is_host()) call x%sync_space() - - !$acc parallel loop - do i = 1, n - y(i) = x%v(idx%v(i)) - end do - end subroutine d_oacc_gthzv_x - - subroutine d_oacc_ins_v(n, irl, val, dupl, x, info) - use psi_serial_mod - implicit none - class(psb_d_vect_oacc), intent(inout) :: x - integer(psb_ipk_), intent(in) :: n, dupl - class(psb_i_base_vect_type), intent(inout) :: irl - class(psb_d_base_vect_type), intent(inout) :: val - integer(psb_ipk_), intent(out) :: info - - integer(psb_ipk_) :: i, isz - logical :: done_oacc - - info = 0 - if (psb_errstatus_fatal()) return - - done_oacc = .false. - select type(virl => irl) - type is (psb_i_vect_oacc) - select type(vval => val) - type is (psb_d_vect_oacc) - if (vval%is_host()) call vval%sync_space() - if (virl%is_host()) call virl%sync_space(info) - if (x%is_host()) call x%sync_space() - !$acc parallel loop - do i = 1, n - x%v(virl%v(i)) = vval%v(i) - end do - call x%set_dev() - done_oacc = .true. - end select - end select - - if (.not.done_oacc) then - select type(virl => irl) - type is (psb_i_vect_oacc) - if (virl%is_dev()) call virl%sync_space(info) - end select - select type(vval => val) - type is (psb_d_vect_oacc) - if (vval%is_dev()) call vval%sync_space() - end select - call x%ins(n, irl%v, val%v, dupl, info) - end if - - if (info /= 0) then - call psb_errpush(info, 'oacc_vect_ins') - return - end if - - end subroutine d_oacc_ins_v - - - - subroutine d_oacc_ins_a(n, irl, val, dupl, x, info) - use psi_serial_mod - implicit none - class(psb_d_vect_oacc), intent(inout) :: x - integer(psb_ipk_), intent(in) :: n, dupl - integer(psb_ipk_), intent(in) :: irl(:) - real(psb_dpk_), intent(in) :: val(:) - integer(psb_ipk_), intent(out) :: info - - integer(psb_ipk_) :: i - - info = 0 - if (x%is_dev()) call x%sync_space() - call x%psb_d_base_vect_type%ins(n, irl, val, dupl, info) - call x%set_host() - !$acc update device(x%v) - - end subroutine d_oacc_ins_a - - - - subroutine d_oacc_bld_mn(x, n) - use psb_base_mod - implicit none - integer(psb_mpk_), intent(in) :: n - class(psb_d_vect_oacc), intent(inout) :: x - integer(psb_ipk_) :: info - - call x%all(n, info) - if (info /= 0) then - call psb_errpush(info, 'd_oacc_bld_mn', i_err=(/n, n, n, n, n/)) - end if - call x%set_host() - !$acc update device(x%v) - - end subroutine d_oacc_bld_mn - - - subroutine d_oacc_bld_x(x, this) - use psb_base_mod - implicit none - real(psb_dpk_), intent(in) :: this(:) - class(psb_d_vect_oacc), intent(inout) :: x - integer(psb_ipk_) :: info - - call psb_realloc(size(this), x%v, info) - if (info /= 0) then - info = psb_err_alloc_request_ - call psb_errpush(info, 'd_oacc_bld_x', & - i_err=(/size(this), izero, izero, izero, izero/)) - return - end if - - x%v(:) = this(:) - call x%set_host() - !$acc update device(x%v) - - end subroutine d_oacc_bld_x - - - subroutine d_oacc_asb_m(n, x, info) - use psb_base_mod - implicit none - integer(psb_mpk_), intent(in) :: n - class(psb_d_vect_oacc), intent(inout) :: x - integer(psb_ipk_), intent(out) :: info - integer(psb_mpk_) :: nd - - info = psb_success_ - - if (x%is_dev()) then - nd = size(x%v) - if (nd < n) then - call x%sync() - call x%psb_d_base_vect_type%asb(n, info) - if (info == psb_success_) call x%sync_space() - call x%set_host() - end if - else - if (size(x%v) < n) then - call x%psb_d_base_vect_type%asb(n, info) - if (info == psb_success_) call x%sync_space() - call x%set_host() - end if - end if - end subroutine d_oacc_asb_m - - - - subroutine d_oacc_set_scal(x, val, first, last) - class(psb_d_vect_oacc), intent(inout) :: x - real(psb_dpk_), intent(in) :: val - integer(psb_ipk_), optional :: first, last - - integer(psb_ipk_) :: first_, last_ - first_ = 1 - last_ = x%get_nrows() - if (present(first)) first_ = max(1, first) - if (present(last)) last_ = min(last, last_) - - !$acc parallel loop - do i = first_, last_ - x%v(i) = val - end do - !$acc end parallel loop - - call x%set_dev() - end subroutine d_oacc_set_scal - - - - subroutine d_oacc_zero(x) - use psi_serial_mod - implicit none - class(psb_d_vect_oacc), intent(inout) :: x - call x%set_dev() - call x%set_scal(dzero) - end subroutine d_oacc_zero - - function d_oacc_get_nrows(x) result(res) - implicit none - class(psb_d_vect_oacc), intent(in) :: x - integer(psb_ipk_) :: res - - if (allocated(x%v)) res = size(x%v) - end function d_oacc_get_nrows - - function d_oacc_get_fmt() result(res) - implicit none - character(len=5) :: res - res = "dOACC" - - end function d_oacc_get_fmt - - function d_oacc_vect_dot(n, x, y) result(res) - implicit none - class(psb_d_vect_oacc), intent(inout) :: x - class(psb_d_base_vect_type), intent(inout) :: y - integer(psb_ipk_), intent(in) :: n - real(psb_dpk_) :: res - real(psb_dpk_), external :: ddot - integer(psb_ipk_) :: info - integer(psb_ipk_) :: i - - res = dzero - - select type(yy => y) - type is (psb_d_base_vect_type) - if (x%is_dev()) call x%sync() - res = ddot(n, x%v, 1, yy%v, 1) - type is (psb_d_vect_oacc) - if (x%is_host()) call x%sync() - if (yy%is_host()) call yy%sync() - - !$acc parallel loop reduction(+:res) present(x%v, yy%v) - do i = 1, n - res = res + x%v(i) * yy%v(i) - end do - !$acc end parallel loop - - class default - call x%sync() - res = y%dot(n, x%v) - end select - - end function d_oacc_vect_dot - - - - - function d_oacc_dot_a(n, x, y) result(res) - implicit none - class(psb_d_vect_oacc), intent(inout) :: x - real(psb_dpk_), intent(in) :: y(:) - integer(psb_ipk_), intent(in) :: n - real(psb_dpk_) :: res - real(psb_dpk_), external :: ddot - - if (x%is_dev()) call x%sync() - res = ddot(n, y, 1, x%v, 1) - - end function d_oacc_dot_a - - ! subroutine d_oacc_set_vect(x,y) - ! implicit none - ! class(psb_d_vect_oacc), intent(inout) :: x - ! real(psb_dpk_), intent(in) :: y(:) - ! integer(psb_ipk_) :: info - - ! if (size(x%v) /= size(y)) then - ! call x%free(info) - ! call x%all(size(y),info) - ! end if - ! x%v(:) = y(:) - ! call x%set_host() - ! end subroutine d_oacc_set_vect - - subroutine d_oacc_to_dev(v) - implicit none - real(psb_dpk_) :: v(:) - !$acc update device(v) - end subroutine d_oacc_to_dev - - subroutine d_oacc_to_host(v) - implicit none - real(psb_dpk_) :: v(:) - !$acc update self(v) - end subroutine d_oacc_to_host - +!!$ subroutine d_oacc_absval2(x, y) +!!$ implicit none +!!$ class(psb_d_vect_oacc), intent(inout) :: x +!!$ class(psb_d_base_vect_type), intent(inout) :: y +!!$ integer(psb_ipk_) :: n +!!$ integer(psb_ipk_) :: i +!!$ +!!$ n = min(size(x%v), size(y%v)) +!!$ select type (yy => y) +!!$ class is (psb_d_vect_oacc) +!!$ if (x%is_host()) call x%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ yy%v(i) = abs(x%v(i)) +!!$ end do +!!$ class default +!!$ if (x%is_dev()) call x%sync() +!!$ if (y%is_dev()) call y%sync() +!!$ call x%psb_d_base_vect_type%absval(y) +!!$ end select +!!$ end subroutine d_oacc_absval2 +!!$ +!!$ subroutine d_oacc_scal(alpha, x) +!!$ implicit none +!!$ class(psb_d_vect_oacc), intent(inout) :: x +!!$ real(psb_dpk_), intent(in) :: alpha +!!$ integer(psb_ipk_) :: info +!!$ integer(psb_ipk_) :: i +!!$ +!!$ if (x%is_host()) call x%sync_space() +!!$ !$acc parallel loop +!!$ do i = 1, size(x%v) +!!$ x%v(i) = alpha * x%v(i) +!!$ end do +!!$ call x%set_dev() +!!$ end subroutine d_oacc_scal +!!$ +!!$ function d_oacc_nrm2(n, x) result(res) +!!$ implicit none +!!$ class(psb_d_vect_oacc), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_dpk_) :: res +!!$ integer(psb_ipk_) :: info +!!$ real(psb_dpk_) :: sum +!!$ integer(psb_ipk_) :: i +!!$ +!!$ if (x%is_host()) call x%sync_space() +!!$ sum = 0.0 +!!$ !$acc parallel loop reduction(+:sum) +!!$ do i = 1, n +!!$ sum = sum + x%v(i) * x%v(i) +!!$ end do +!!$ res = sqrt(sum) +!!$ end function d_oacc_nrm2 +!!$ +!!$ function d_oacc_amax(n, x) result(res) +!!$ implicit none +!!$ class(psb_d_vect_oacc), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_dpk_) :: res +!!$ integer(psb_ipk_) :: info +!!$ real(psb_dpk_) :: max_val +!!$ integer(psb_ipk_) :: i +!!$ +!!$ if (x%is_host()) call x%sync_space() +!!$ max_val = -huge(0.0) +!!$ !$acc parallel loop reduction(max:max_val) +!!$ do i = 1, n +!!$ if (x%v(i) > max_val) max_val = x%v(i) +!!$ end do +!!$ res = max_val +!!$ end function d_oacc_amax +!!$ +!!$ function d_oacc_asum(n, x) result(res) +!!$ implicit none +!!$ class(psb_d_vect_oacc), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_dpk_) :: res +!!$ integer(psb_ipk_) :: info +!!$ real(psb_dpk_) :: sum +!!$ integer(psb_ipk_) :: i +!!$ +!!$ if (x%is_host()) call x%sync_space() +!!$ sum = 0.0 +!!$ !$acc parallel loop reduction(+:sum) +!!$ do i = 1, n +!!$ sum = sum + abs(x%v(i)) +!!$ end do +!!$ res = sum +!!$ end function d_oacc_asum +!!$ +!!$ +!!$ subroutine d_oacc_mlt_v(x, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ class(psb_d_base_vect_type), intent(inout) :: x +!!$ class(psb_d_vect_oacc), intent(inout) :: y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ n = min(x%get_nrows(), y%get_nrows()) +!!$ select type(xx => x) +!!$ type is (psb_d_base_vect_type) +!!$ if (y%is_dev()) call y%sync() +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ y%v(i) = y%v(i) * xx%v(i) +!!$ end do +!!$ call y%set_host() +!!$ class default +!!$ if (xx%is_dev()) call xx%sync() +!!$ if (y%is_dev()) call y%sync() +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ y%v(i) = y%v(i) * xx%v(i) +!!$ end do +!!$ call y%set_host() +!!$ end select +!!$ end subroutine d_oacc_mlt_v +!!$ +!!$ subroutine d_oacc_mlt_a(x, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ real(psb_dpk_), intent(in) :: x(:) +!!$ class(psb_d_vect_oacc), intent(inout) :: y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ if (y%is_dev()) call y%sync_space() +!!$ !$acc parallel loop +!!$ do i = 1, size(x) +!!$ y%v(i) = y%v(i) * x(i) +!!$ end do +!!$ call y%set_host() +!!$ end subroutine d_oacc_mlt_a +!!$ +!!$ subroutine d_oacc_mlt_a_2(alpha, x, y, beta, z, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ real(psb_dpk_), intent(in) :: alpha, beta +!!$ real(psb_dpk_), intent(in) :: x(:) +!!$ real(psb_dpk_), intent(in) :: y(:) +!!$ class(psb_d_vect_oacc), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ if (z%is_dev()) call z%sync_space() +!!$ !$acc parallel loop +!!$ do i = 1, size(x) +!!$ z%v(i) = alpha * x(i) * y(i) + beta * z%v(i) +!!$ end do +!!$ call z%set_host() +!!$ end subroutine d_oacc_mlt_a_2 +!!$ +!!$ subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) +!!$ use psi_serial_mod +!!$ use psb_string_mod +!!$ implicit none +!!$ real(psb_dpk_), intent(in) :: alpha, beta +!!$ class(psb_d_base_vect_type), intent(inout) :: x +!!$ class(psb_d_base_vect_type), intent(inout) :: y +!!$ class(psb_d_vect_oacc), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character(len=1), intent(in), optional :: conjgx, conjgy +!!$ integer(psb_ipk_) :: i, n +!!$ logical :: conjgx_, conjgy_ +!!$ +!!$ conjgx_ = .false. +!!$ conjgy_ = .false. +!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C') +!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C') +!!$ +!!$ n = min(x%get_nrows(), y%get_nrows(), z%get_nrows()) +!!$ +!!$ info = 0 +!!$ select type(xx => x) +!!$ class is (psb_d_vect_oacc) +!!$ select type (yy => y) +!!$ class is (psb_d_vect_oacc) +!!$ if (xx%is_host()) call xx%sync_space() +!!$ if (yy%is_host()) call yy%sync_space() +!!$ if ((beta /= dzero) .and. (z%is_host())) call z%sync_space() +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) +!!$ end do +!!$ call z%set_dev() +!!$ class default +!!$ if (xx%is_dev()) call xx%sync_space() +!!$ if (yy%is_dev()) call yy%sync() +!!$ if ((beta /= dzero) .and. (z%is_dev())) call z%sync_space() +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) +!!$ end do +!!$ call z%set_host() +!!$ end select +!!$ class default +!!$ if (x%is_dev()) call x%sync() +!!$ if (y%is_dev()) call y%sync() +!!$ if ((beta /= dzero) .and. (z%is_dev())) call z%sync_space() +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i) +!!$ end do +!!$ call z%set_host() +!!$ end select +!!$ end subroutine d_oacc_mlt_v_2 +!!$ +!!$ +!!$ subroutine d_oacc_axpby_v(m, alpha, x, beta, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ integer(psb_ipk_), intent(in) :: m +!!$ class(psb_d_base_vect_type), intent(inout) :: x +!!$ class(psb_d_vect_oacc), intent(inout) :: y +!!$ real(psb_dpk_), intent(in) :: alpha, beta +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: nx, ny, i +!!$ +!!$ info = psb_success_ +!!$ +!!$ select type(xx => x) +!!$ type is (psb_d_vect_oacc) +!!$ if ((beta /= dzero) .and. y%is_host()) call y%sync_space() +!!$ if (xx%is_host()) call xx%sync_space() +!!$ nx = size(xx%v) +!!$ ny = size(y%v) +!!$ if ((nx < m) .or. (ny < m)) then +!!$ info = psb_err_internal_error_ +!!$ else +!!$ !$acc parallel loop +!!$ do i = 1, m +!!$ y%v(i) = alpha * xx%v(i) + beta * y%v(i) +!!$ end do +!!$ end if +!!$ call y%set_dev() +!!$ class default +!!$ if ((alpha /= dzero) .and. (x%is_dev())) call x%sync() +!!$ call y%axpby(m, alpha, x%v, beta, info) +!!$ end select +!!$ end subroutine d_oacc_axpby_v +!!$ +!!$ subroutine d_oacc_axpby_a(m, alpha, x, beta, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ integer(psb_ipk_), intent(in) :: m +!!$ real(psb_dpk_), intent(in) :: x(:) +!!$ class(psb_d_vect_oacc), intent(inout) :: y +!!$ real(psb_dpk_), intent(in) :: alpha, beta +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i +!!$ +!!$ if ((beta /= dzero) .and. (y%is_dev())) call y%sync_space() +!!$ !$acc parallel loop +!!$ do i = 1, m +!!$ y%v(i) = alpha * x(i) + beta * y%v(i) +!!$ end do +!!$ call y%set_host() +!!$ end subroutine d_oacc_axpby_a +!!$ +!!$ subroutine d_oacc_abgdxyz(m, alpha, beta, gamma, delta, x, y, z, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ integer(psb_ipk_), intent(in) :: m +!!$ class(psb_d_base_vect_type), intent(inout) :: x +!!$ class(psb_d_base_vect_type), intent(inout) :: y +!!$ class(psb_d_vect_oacc), intent(inout) :: z +!!$ real(psb_dpk_), intent(in) :: alpha, beta, gamma, delta +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: nx, ny, nz, i +!!$ logical :: gpu_done +!!$ +!!$ info = psb_success_ +!!$ gpu_done = .false. +!!$ +!!$ select type(xx => x) +!!$ class is (psb_d_vect_oacc) +!!$ select type(yy => y) +!!$ class is (psb_d_vect_oacc) +!!$ select type(zz => z) +!!$ class is (psb_d_vect_oacc) +!!$ if ((beta /= dzero) .and. yy%is_host()) call yy%sync_space() +!!$ if ((delta /= dzero) .and. zz%is_host()) call zz%sync_space() +!!$ if (xx%is_host()) call xx%sync_space() +!!$ nx = size(xx%v) +!!$ ny = size(yy%v) +!!$ nz = size(zz%v) +!!$ if ((nx < m) .or. (ny < m) .or. (nz < m)) then +!!$ info = psb_err_internal_error_ +!!$ else +!!$ !$acc parallel loop +!!$ do i = 1, m +!!$ yy%v(i) = alpha * xx%v(i) + beta * yy%v(i) +!!$ zz%v(i) = gamma * yy%v(i) + delta * zz%v(i) +!!$ end do +!!$ end if +!!$ call yy%set_dev() +!!$ call zz%set_dev() +!!$ gpu_done = .true. +!!$ end select +!!$ end select +!!$ end select +!!$ +!!$ if (.not. gpu_done) then +!!$ if (x%is_host()) call x%sync() +!!$ if (y%is_host()) call y%sync() +!!$ if (z%is_host()) call z%sync() +!!$ call y%axpby(m, alpha, x, beta, info) +!!$ call z%axpby(m, gamma, y, delta, info) +!!$ end if +!!$ end subroutine d_oacc_abgdxyz +!!$ +!!$ +!!$ subroutine d_oacc_sctb_buf(i, n, idx, beta, y) +!!$ use psb_base_mod +!!$ implicit none +!!$ integer(psb_ipk_) :: i, n +!!$ class(psb_i_base_vect_type) :: idx +!!$ real(psb_dpk_) :: beta +!!$ class(psb_d_vect_oacc) :: y +!!$ integer(psb_ipk_) :: info +!!$ +!!$ if (.not.allocated(y%combuf)) then +!!$ call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') +!!$ return +!!$ end if +!!$ +!!$ select type(ii => idx) +!!$ class is (psb_i_vect_oacc) +!!$ if (ii%is_host()) call ii%sync_space(info) +!!$ if (y%is_host()) call y%sync_space() +!!$ +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i) +!!$ end do +!!$ +!!$ class default +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i) +!!$ end do +!!$ end select +!!$ end subroutine d_oacc_sctb_buf +!!$ +!!$ subroutine d_oacc_sctb_x(i, n, idx, x, beta, y) +!!$ use psb_base_mod +!!$ implicit none +!!$ integer(psb_ipk_):: i, n +!!$ class(psb_i_base_vect_type) :: idx +!!$ real(psb_dpk_) :: beta, x(:) +!!$ class(psb_d_vect_oacc) :: y +!!$ integer(psb_ipk_) :: info, ni +!!$ +!!$ select type(ii => idx) +!!$ class is (psb_i_vect_oacc) +!!$ if (ii%is_host()) call ii%sync_space(info) +!!$ class default +!!$ call psb_errpush(info, 'd_oacc_sctb_x') +!!$ return +!!$ end select +!!$ +!!$ if (y%is_host()) call y%sync_space() +!!$ +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i) +!!$ end do +!!$ +!!$ call y%set_dev() +!!$ end subroutine d_oacc_sctb_x +!!$ +!!$ +!!$ +!!$ subroutine d_oacc_sctb(n, idx, x, beta, y) +!!$ use psb_base_mod +!!$ implicit none +!!$ integer(psb_ipk_) :: n +!!$ integer(psb_ipk_) :: idx(:) +!!$ real(psb_dpk_) :: beta, x(:) +!!$ class(psb_d_vect_oacc) :: y +!!$ integer(psb_ipk_) :: info +!!$ integer(psb_ipk_) :: i +!!$ +!!$ if (n == 0) return +!!$ if (y%is_dev()) call y%sync_space() +!!$ +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ y%v(idx(i)) = beta * y%v(idx(i)) + x(i) +!!$ end do +!!$ +!!$ call y%set_host() +!!$ end subroutine d_oacc_sctb +!!$ +!!$ +!!$ subroutine d_oacc_gthzbuf(i, n, idx, x) +!!$ use psb_base_mod +!!$ implicit none +!!$ integer(psb_ipk_) :: i, n +!!$ class(psb_i_base_vect_type) :: idx +!!$ class(psb_d_vect_oacc) :: x +!!$ integer(psb_ipk_) :: info +!!$ +!!$ info = 0 +!!$ if (.not.allocated(x%combuf)) then +!!$ call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') +!!$ return +!!$ end if +!!$ +!!$ select type(ii => idx) +!!$ class is (psb_i_vect_oacc) +!!$ if (ii%is_host()) call ii%sync_space(info) +!!$ class default +!!$ call psb_errpush(info, 'd_oacc_gthzbuf') +!!$ return +!!$ end select +!!$ +!!$ if (x%is_host()) call x%sync_space() +!!$ +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ x%combuf(i) = x%v(idx%v(i)) +!!$ end do +!!$ end subroutine d_oacc_gthzbuf +!!$ +!!$ subroutine d_oacc_gthzv_x(i, n, idx, x, y) +!!$ use psb_base_mod +!!$ implicit none +!!$ integer(psb_ipk_) :: i, n +!!$ class(psb_i_base_vect_type):: idx +!!$ real(psb_dpk_) :: y(:) +!!$ class(psb_d_vect_oacc):: x +!!$ integer(psb_ipk_) :: info +!!$ +!!$ info = 0 +!!$ +!!$ select type(ii => idx) +!!$ class is (psb_i_vect_oacc) +!!$ if (ii%is_host()) call ii%sync_space(info) +!!$ class default +!!$ call psb_errpush(info, 'd_oacc_gthzv_x') +!!$ return +!!$ end select +!!$ +!!$ if (x%is_host()) call x%sync_space() +!!$ +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ y(i) = x%v(idx%v(i)) +!!$ end do +!!$ end subroutine d_oacc_gthzv_x +!!$ +!!$ subroutine d_oacc_ins_v(n, irl, val, dupl, x, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ class(psb_d_vect_oacc), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n, dupl +!!$ class(psb_i_base_vect_type), intent(inout) :: irl +!!$ class(psb_d_base_vect_type), intent(inout) :: val +!!$ integer(psb_ipk_), intent(out) :: info +!!$ +!!$ integer(psb_ipk_) :: i, isz +!!$ logical :: done_oacc +!!$ +!!$ info = 0 +!!$ if (psb_errstatus_fatal()) return +!!$ +!!$ done_oacc = .false. +!!$ select type(virl => irl) +!!$ type is (psb_i_vect_oacc) +!!$ select type(vval => val) +!!$ type is (psb_d_vect_oacc) +!!$ if (vval%is_host()) call vval%sync_space() +!!$ if (virl%is_host()) call virl%sync_space(info) +!!$ if (x%is_host()) call x%sync_space() +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ x%v(virl%v(i)) = vval%v(i) +!!$ end do +!!$ call x%set_dev() +!!$ done_oacc = .true. +!!$ end select +!!$ end select +!!$ +!!$ if (.not.done_oacc) then +!!$ select type(virl => irl) +!!$ type is (psb_i_vect_oacc) +!!$ if (virl%is_dev()) call virl%sync_space(info) +!!$ end select +!!$ select type(vval => val) +!!$ type is (psb_d_vect_oacc) +!!$ if (vval%is_dev()) call vval%sync_space() +!!$ end select +!!$ call x%ins(n, irl%v, val%v, dupl, info) +!!$ end if +!!$ +!!$ if (info /= 0) then +!!$ call psb_errpush(info, 'oacc_vect_ins') +!!$ return +!!$ end if +!!$ +!!$ end subroutine d_oacc_ins_v +!!$ +!!$ +!!$ +!!$ subroutine d_oacc_ins_a(n, irl, val, dupl, x, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ class(psb_d_vect_oacc), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n, dupl +!!$ integer(psb_ipk_), intent(in) :: irl(:) +!!$ real(psb_dpk_), intent(in) :: val(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ +!!$ integer(psb_ipk_) :: i +!!$ +!!$ info = 0 +!!$ if (x%is_dev()) call x%sync_space() +!!$ call x%psb_d_base_vect_type%ins(n, irl, val, dupl, info) +!!$ call x%set_host() +!!$ !$acc update device(x%v) +!!$ +!!$ end subroutine d_oacc_ins_a +!!$ +!!$ +!!$ +!!$ subroutine d_oacc_bld_mn(x, n) +!!$ use psb_base_mod +!!$ implicit none +!!$ integer(psb_mpk_), intent(in) :: n +!!$ class(psb_d_vect_oacc), intent(inout) :: x +!!$ integer(psb_ipk_) :: info +!!$ +!!$ call x%all(n, info) +!!$ if (info /= 0) then +!!$ call psb_errpush(info, 'd_oacc_bld_mn', i_err=(/n, n, n, n, n/)) +!!$ end if +!!$ call x%set_host() +!!$ !$acc update device(x%v) +!!$ +!!$ end subroutine d_oacc_bld_mn +!!$ +!!$ +!!$ subroutine d_oacc_bld_x(x, this) +!!$ use psb_base_mod +!!$ implicit none +!!$ real(psb_dpk_), intent(in) :: this(:) +!!$ class(psb_d_vect_oacc), intent(inout) :: x +!!$ integer(psb_ipk_) :: info +!!$ +!!$ call psb_realloc(size(this), x%v, info) +!!$ if (info /= 0) then +!!$ info = psb_err_alloc_request_ +!!$ call psb_errpush(info, 'd_oacc_bld_x', & +!!$ i_err=(/size(this), izero, izero, izero, izero/)) +!!$ return +!!$ end if +!!$ +!!$ x%v(:) = this(:) +!!$ call x%set_host() +!!$ !$acc update device(x%v) +!!$ +!!$ end subroutine d_oacc_bld_x +!!$ +!!$ +!!$ subroutine d_oacc_asb_m(n, x, info) +!!$ use psb_base_mod +!!$ implicit none +!!$ integer(psb_mpk_), intent(in) :: n +!!$ class(psb_d_vect_oacc), intent(inout) :: x +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_mpk_) :: nd +!!$ +!!$ info = psb_success_ +!!$ +!!$ if (x%is_dev()) then +!!$ nd = size(x%v) +!!$ if (nd < n) then +!!$ call x%sync() +!!$ call x%psb_d_base_vect_type%asb(n, info) +!!$ if (info == psb_success_) call x%sync_space() +!!$ call x%set_host() +!!$ end if +!!$ else +!!$ if (size(x%v) < n) then +!!$ call x%psb_d_base_vect_type%asb(n, info) +!!$ if (info == psb_success_) call x%sync_space() +!!$ call x%set_host() +!!$ end if +!!$ end if +!!$ end subroutine d_oacc_asb_m +!!$ +!!$ +!!$ +!!$ subroutine d_oacc_set_scal(x, val, first, last) +!!$ class(psb_d_vect_oacc), intent(inout) :: x +!!$ real(psb_dpk_), intent(in) :: val +!!$ integer(psb_ipk_), optional :: first, last +!!$ +!!$ integer(psb_ipk_) :: first_, last_ +!!$ first_ = 1 +!!$ last_ = x%get_nrows() +!!$ if (present(first)) first_ = max(1, first) +!!$ if (present(last)) last_ = min(last, last_) +!!$ +!!$ !$acc parallel loop +!!$ do i = first_, last_ +!!$ x%v(i) = val +!!$ end do +!!$ !$acc end parallel loop +!!$ +!!$ call x%set_dev() +!!$ end subroutine d_oacc_set_scal +!!$ +!!$ +!!$ +!!$ subroutine d_oacc_zero(x) +!!$ use psi_serial_mod +!!$ implicit none +!!$ class(psb_d_vect_oacc), intent(inout) :: x +!!$ call x%set_dev() +!!$ call x%set_scal(dzero) +!!$ end subroutine d_oacc_zero +!!$ +!!$ function d_oacc_get_nrows(x) result(res) +!!$ implicit none +!!$ class(psb_d_vect_oacc), intent(in) :: x +!!$ integer(psb_ipk_) :: res +!!$ +!!$ if (allocated(x%v)) res = size(x%v) +!!$ end function d_oacc_get_nrows +!!$ +!!$ function d_oacc_get_fmt() result(res) +!!$ implicit none +!!$ character(len=5) :: res +!!$ res = "dOACC" +!!$ +!!$ end function d_oacc_get_fmt +!!$ +!!$ function d_oacc_vect_dot(n, x, y) result(res) +!!$ implicit none +!!$ class(psb_d_vect_oacc), intent(inout) :: x +!!$ class(psb_d_base_vect_type), intent(inout) :: y +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_dpk_) :: res +!!$ real(psb_dpk_), external :: ddot +!!$ integer(psb_ipk_) :: info +!!$ integer(psb_ipk_) :: i +!!$ +!!$ res = dzero +!!$ +!!$ select type(yy => y) +!!$ type is (psb_d_base_vect_type) +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n, x%v, 1, yy%v, 1) +!!$ type is (psb_d_vect_oacc) +!!$ if (x%is_host()) call x%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ +!!$ !$acc parallel loop reduction(+:res) present(x%v, yy%v) +!!$ do i = 1, n +!!$ res = res + x%v(i) * yy%v(i) +!!$ end do +!!$ !$acc end parallel loop +!!$ +!!$ class default +!!$ call x%sync() +!!$ res = y%dot(n, x%v) +!!$ end select +!!$ +!!$ end function d_oacc_vect_dot +!!$ +!!$ +!!$ +!!$ +!!$ function d_oacc_dot_a(n, x, y) result(res) +!!$ implicit none +!!$ class(psb_d_vect_oacc), intent(inout) :: x +!!$ real(psb_dpk_), intent(in) :: y(:) +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_dpk_) :: res +!!$ real(psb_dpk_), external :: ddot +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n, y, 1, x%v, 1) +!!$ +!!$ end function d_oacc_dot_a +!!$ +!!$ ! subroutine d_oacc_set_vect(x,y) +!!$ ! implicit none +!!$ ! class(psb_d_vect_oacc), intent(inout) :: x +!!$ ! real(psb_dpk_), intent(in) :: y(:) +!!$ ! integer(psb_ipk_) :: info +!!$ +!!$ ! if (size(x%v) /= size(y)) then +!!$ ! call x%free(info) +!!$ ! call x%all(size(y),info) +!!$ ! end if +!!$ ! x%v(:) = y(:) +!!$ ! call x%set_host() +!!$ ! end subroutine d_oacc_set_vect +!!$ +!!$ subroutine d_oacc_to_dev(v) +!!$ implicit none +!!$ real(psb_dpk_) :: v(:) +!!$ !$acc update device(v) +!!$ end subroutine d_oacc_to_dev +!!$ +!!$ subroutine d_oacc_to_host(v) +!!$ implicit none +!!$ real(psb_dpk_) :: v(:) +!!$ !$acc update self(v) +!!$ end subroutine d_oacc_to_host +!!$ subroutine d_oacc_sync_space(x) implicit none class(psb_d_vect_oacc), intent(inout) :: x @@ -871,96 +869,96 @@ contains res = (x%state == is_sync) end function d_oacc_is_sync - - subroutine d_oacc_vect_all(n, x, info) - use psi_serial_mod - use psb_realloc_mod - implicit none - integer(psb_ipk_), intent(in) :: n - class(psb_d_vect_oacc), intent(out) :: x - integer(psb_ipk_), intent(out) :: info - - call psb_realloc(n, x%v, info) - if (info == 0) then - call x%set_host() - !$acc enter data create(x%v) - call x%sync_space() - end if - if (info /= 0) then - info = psb_err_alloc_request_ - call psb_errpush(info, 'd_oacc_all', & - i_err=(/n, n, n, n, n/)) - end if - end subroutine d_oacc_vect_all - - - subroutine d_oacc_vect_free(x, info) - implicit none - class(psb_d_vect_oacc), intent(inout) :: x - integer(psb_ipk_), intent(out) :: info - info = 0 - if (allocated(x%v)) then - !$acc exit data delete(x%v) finalize - deallocate(x%v, stat=info) - end if - - end subroutine d_oacc_vect_free - - function oacc_get_size(x) result(res) - implicit none - class(psb_d_vect_oacc), intent(inout) :: x - integer(psb_ipk_) :: res - - if (x%is_dev()) call x%sync() - res = size(x%v) - end function oacc_get_size - - - subroutine initialize(N) - integer(psb_ipk_) :: N - integer(psb_ipk_) :: i - allocate(v1(N),v2(N),p(N)) - !$acc enter data create(v1,v2,p) - !$acc parallel - !$acc loop - do i=1,n - v1(i) = i - v2(i) = n+i - end do - !$acc end parallel - end subroutine initialize - subroutine finalize_dev() - !$acc exit data delete(v1,v2,p) - end subroutine finalize_dev - subroutine finalize_host() - deallocate(v1,v2,p) - end subroutine finalize_host - subroutine to_dev() - !$acc update device(v1,v2) - end subroutine to_dev - subroutine to_host() - !$acc update self(v1,v2) - end subroutine to_host - function d_dot(N) result(res) - real(kind(1.d0)) :: res - integer(psb_ipk_) :: i,N - real(kind(1.d0)) :: t1,t2,t3 - res = 0.0d0 - !$acc parallel - !$acc loop reduction(+:res) - do i=1,N - res = res + v1(i) * v2(i) - end do - !$acc end parallel - - end function d_dot - function h_dot(N) result(res) - integer(psb_ipk_) :: i,N - real(kind(1.d0)) :: t1,t2,t3,res - res = 0.0d0 - do i=1,N - res = res + v1(i) * v2(i) - end do - end function h_dot - -end module psb_d_oacc_vect_mod \ No newline at end of file +!!$ +!!$ subroutine d_oacc_vect_all(n, x, info) +!!$ use psi_serial_mod +!!$ use psb_realloc_mod +!!$ implicit none +!!$ integer(psb_ipk_), intent(in) :: n +!!$ class(psb_d_vect_oacc), intent(out) :: x +!!$ integer(psb_ipk_), intent(out) :: info +!!$ +!!$ call psb_realloc(n, x%v, info) +!!$ if (info == 0) then +!!$ call x%set_host() +!!$ !$acc enter data create(x%v) +!!$ call x%sync_space() +!!$ end if +!!$ if (info /= 0) then +!!$ info = psb_err_alloc_request_ +!!$ call psb_errpush(info, 'd_oacc_all', & +!!$ i_err=(/n, n, n, n, n/)) +!!$ end if +!!$ end subroutine d_oacc_vect_all +!!$ +!!$ +!!$ subroutine d_oacc_vect_free(x, info) +!!$ implicit none +!!$ class(psb_d_vect_oacc), intent(inout) :: x +!!$ integer(psb_ipk_), intent(out) :: info +!!$ info = 0 +!!$ if (allocated(x%v)) then +!!$ !$acc exit data delete(x%v) finalize +!!$ deallocate(x%v, stat=info) +!!$ end if +!!$ +!!$ end subroutine d_oacc_vect_free +!!$ +!!$ function oacc_get_size(x) result(res) +!!$ implicit none +!!$ class(psb_d_vect_oacc), intent(inout) :: x +!!$ integer(psb_ipk_) :: res +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = size(x%v) +!!$ end function oacc_get_size +!!$ +!!$ +!!$ subroutine initialize(N) +!!$ integer(psb_ipk_) :: N +!!$ integer(psb_ipk_) :: i +!!$ allocate(v1(N),v2(N),p(N)) +!!$ !$acc enter data create(v1,v2,p) +!!$ !$acc parallel +!!$ !$acc loop +!!$ do i=1,n +!!$ v1(i) = i +!!$ v2(i) = n+i +!!$ end do +!!$ !$acc end parallel +!!$ end subroutine initialize +!!$ subroutine finalize_dev() +!!$ !$acc exit data delete(v1,v2,p) +!!$ end subroutine finalize_dev +!!$ subroutine finalize_host() +!!$ deallocate(v1,v2,p) +!!$ end subroutine finalize_host +!!$ subroutine to_dev() +!!$ !$acc update device(v1,v2) +!!$ end subroutine to_dev +!!$ subroutine to_host() +!!$ !$acc update self(v1,v2) +!!$ end subroutine to_host +!!$ function d_dot(N) result(res) +!!$ real(kind(1.d0)) :: res +!!$ integer(psb_ipk_) :: i,N +!!$ real(kind(1.d0)) :: t1,t2,t3 +!!$ res = 0.0d0 +!!$ !$acc parallel +!!$ !$acc loop reduction(+:res) +!!$ do i=1,N +!!$ res = res + v1(i) * v2(i) +!!$ end do +!!$ !$acc end parallel +!!$ +!!$ end function d_dot +!!$ function h_dot(N) result(res) +!!$ integer(psb_ipk_) :: i,N +!!$ real(kind(1.d0)) :: t1,t2,t3,res +!!$ res = 0.0d0 +!!$ do i=1,N +!!$ res = res + v1(i) * v2(i) +!!$ end do +!!$ end function h_dot +!!$ +end module psb_d_oacc_vect_mod From e0d7091ecc81f3727ae4f75e170dea82d1948e13 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 9 Jul 2024 10:04:32 +0200 Subject: [PATCH 04/39] Makefile in openacc/impl --- openacc/impl/Makefile | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100755 openacc/impl/Makefile diff --git a/openacc/impl/Makefile b/openacc/impl/Makefile new file mode 100755 index 00000000..fff48657 --- /dev/null +++ b/openacc/impl/Makefile @@ -0,0 +1,29 @@ +include ../../Make.inc +LIBDIR=../../lib +INCDIR=../../include +MODDIR=../../modules +# +# Compilers and such +# +#CCOPT= -g +FINCLUDES=$(FMFLAG).. $(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FIFLAG).. +LIBNAME=libpsb_openacc.a + +OBJS= psb_d_oacc_csr_vect_mv.o + +objs: $(OBJS) + +lib: objs + ar cur ../$(LIBNAME) $(OBJS) + +clean: + /bin/rm -f $(OBJS) + +.c.o: + $(CC) $(CCOPT) $(CCOPENACC) $(CINCLUDES) $(CDEFINES) -c $< -o $@ +.f90.o: + $(FC) $(FCOPT) $(FCOPENACC) $(FINCLUDES) -c $< -o $@ +.F90.o: + $(FC) $(FCOPT) $(FCOPENACC) $(FINCLUDES) $(FDEFINES) -c $< -o $@ +.cpp.o: + $(CXX) $(CXXOPT) $(CXXOPENACC) $(CXXINCLUDES) $(CXXDEFINES) -c $< -o $@ From 6be998ac66c5a0869f10e58ace3ee2f2aec5cb8e Mon Sep 17 00:00:00 2001 From: tloloum Date: Tue, 9 Jul 2024 11:22:28 +0200 Subject: [PATCH 05/39] oacc_env_mod --- openacc/Makefile | 3 ++- openacc/psb_oacc_env_mod.F90 | 18 ++++++++++++++++++ openacc/psb_oacc_mod.F90 | 2 ++ 3 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 openacc/psb_oacc_env_mod.F90 diff --git a/openacc/Makefile b/openacc/Makefile index 3590249c..aae571ad 100644 --- a/openacc/Makefile +++ b/openacc/Makefile @@ -11,7 +11,7 @@ OFFLOAD=-fopenacc -foffload=nvptx-none="-march=sm_70" LIBDIR=../lib INCDIR=../include MODDIR=../modules -IMPLDIR=./impl # Adding the impl directory +IMPLDIR=./impl # Include and library paths INCLUDES=-I$(LIBDIR) -I$(INCDIR) -I$(MODDIR) -I$(IMPLDIR) @@ -20,6 +20,7 @@ LIBS=-L$(LIBDIR) -lpsb_util -lpsb_ext -lpsb_base -lopenblas -lmetis # Source files FOBJS= psb_i_oacc_vect_mod.o psb_d_oacc_vect_mod.o \ psb_oacc_mod.o psb_d_oacc_csr_mat_mod.o \ + psb_oacc_env_mod.o \ impl/psb_d_oacc_csr_vect_mv.o # Library name diff --git a/openacc/psb_oacc_env_mod.F90 b/openacc/psb_oacc_env_mod.F90 new file mode 100644 index 00000000..83c9426d --- /dev/null +++ b/openacc/psb_oacc_env_mod.F90 @@ -0,0 +1,18 @@ +module psb_oacc_env_mod +contains + + subroutine psb_oacc_init(ctxt, dev) + use psb_penv_mod + use psb_const_mod + use psb_error_mod + type(psb_ctxt_type), intent(in) :: ctxt + integer, intent(in), optional :: dev + + end subroutine psb_oacc_init + + subroutine psb_oacc_exit() + integer :: res + + end subroutine psb_oacc_exit + +end module psb_oacc_env_mod diff --git a/openacc/psb_oacc_mod.F90 b/openacc/psb_oacc_mod.F90 index ce5e85f9..fe827db8 100644 --- a/openacc/psb_oacc_mod.F90 +++ b/openacc/psb_oacc_mod.F90 @@ -1,6 +1,8 @@ module psb_oacc_mod use psb_const_mod + use psb_oacc_env_mod + use psb_d_oacc_vect_mod use psb_d_oacc_csr_mat_mod From 1f0e5918270e22d1f3dcf0f4bea0e299192ee2c5 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 9 Jul 2024 13:16:36 +0200 Subject: [PATCH 06/39] Reworked oacc_vect_mod. oacc_mlt_v_X generates ICE, to be fixed --- openacc/impl/Makefile | 2 +- openacc/impl/psb_d_oacc_mlt_v.f90 | 31 + openacc/impl/psb_d_oacc_mlt_v_2.f90 | 55 + openacc/psb_d_oacc_vect_mod.F90 | 1487 ++++++++++++++------------- 4 files changed, 831 insertions(+), 744 deletions(-) create mode 100644 openacc/impl/psb_d_oacc_mlt_v.f90 create mode 100644 openacc/impl/psb_d_oacc_mlt_v_2.f90 diff --git a/openacc/impl/Makefile b/openacc/impl/Makefile index fff48657..7bab6654 100755 --- a/openacc/impl/Makefile +++ b/openacc/impl/Makefile @@ -9,7 +9,7 @@ MODDIR=../../modules FINCLUDES=$(FMFLAG).. $(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FIFLAG).. LIBNAME=libpsb_openacc.a -OBJS= psb_d_oacc_csr_vect_mv.o +OBJS= psb_d_oacc_csr_vect_mv.o psb_d_oacc_mlt_v_2.o psb_d_oacc_mlt_v.o objs: $(OBJS) diff --git a/openacc/impl/psb_d_oacc_mlt_v.f90 b/openacc/impl/psb_d_oacc_mlt_v.f90 new file mode 100644 index 00000000..a4eb6660 --- /dev/null +++ b/openacc/impl/psb_d_oacc_mlt_v.f90 @@ -0,0 +1,31 @@ + +subroutine d_oacc_mlt_v(x, y, info) + use psb_d_oacc_vect_mod, psb_protect_name => d_oacc_mlt_v + + implicit none + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_vect_oacc), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, n + + info = 0 +!!$ n = min(x%get_nrows(), y%get_nrows()) +!!$ select type(xx => x) +!!$ type is (psb_d_base_vect_type) +!!$ if (y%is_dev()) call y%sync() +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ y%v(i) = y%v(i) * xx%v(i) +!!$ end do +!!$ call y%set_host() +!!$ class default +!!$ if (xx%is_dev()) call xx%sync() +!!$ if (y%is_dev()) call y%sync() +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ y%v(i) = y%v(i) * xx%v(i) +!!$ end do +!!$ call y%set_host() +!!$ end select +end subroutine d_oacc_mlt_v diff --git a/openacc/impl/psb_d_oacc_mlt_v_2.f90 b/openacc/impl/psb_d_oacc_mlt_v_2.f90 new file mode 100644 index 00000000..b59b4f56 --- /dev/null +++ b/openacc/impl/psb_d_oacc_mlt_v_2.f90 @@ -0,0 +1,55 @@ +subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) + use psb_d_oacc_vect_mod, psb_protect_name => d_oacc_mlt_v_2 + use psb_string_mod + implicit none + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + class(psb_d_vect_oacc), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + character(len=1), intent(in), optional :: conjgx, conjgy + integer(psb_ipk_) :: i, n + logical :: conjgx_, conjgy_ + + conjgx_ = .false. + conjgy_ = .false. + if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C') + if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C') + + n = min(x%get_nrows(), y%get_nrows(), z%get_nrows()) + + info = 0 +!!$ select type(xx => x) +!!$ class is (psb_d_vect_oacc) +!!$ select type (yy => y) +!!$ class is (psb_d_vect_oacc) +!!$ if (xx%is_host()) call xx%sync_space() +!!$ if (yy%is_host()) call yy%sync_space() +!!$ if ((beta /= dzero) .and. (z%is_host())) call z%sync_space() +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) +!!$ end do +!!$ call z%set_dev() +!!$ class default +!!$ if (xx%is_dev()) call xx%sync_space() +!!$ if (yy%is_dev()) call yy%sync() +!!$ if ((beta /= dzero) .and. (z%is_dev())) call z%sync_space() +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) +!!$ end do +!!$ call z%set_host() +!!$ end select +!!$ class default +!!$ if (x%is_dev()) call x%sync() +!!$ if (y%is_dev()) call y%sync() +!!$ if ((beta /= dzero) .and. (z%is_dev())) call z%sync_space() +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i) +!!$ end do +!!$ call z%set_host() +!!$ end select +end subroutine d_oacc_mlt_v_2 + diff --git a/openacc/psb_d_oacc_vect_mod.F90 b/openacc/psb_d_oacc_vect_mod.F90 index eda804ce..3139f9b8 100644 --- a/openacc/psb_d_oacc_vect_mod.F90 +++ b/openacc/psb_d_oacc_vect_mod.F90 @@ -5,63 +5,87 @@ module psb_d_oacc_vect_mod use psb_d_vect_mod use psb_i_vect_mod use psb_i_oacc_vect_mod - + integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 - + type, extends(psb_d_base_vect_type) :: psb_d_vect_oacc integer :: state = is_host contains -!!$ procedure, pass(x) :: get_nrows => d_oacc_get_nrows -!!$ procedure, nopass :: get_fmt => d_oacc_get_fmt -!!$ -!!$ procedure, pass(x) :: all => d_oacc_vect_all -!!$ procedure, pass(x) :: zero => d_oacc_zero -!!$ procedure, pass(x) :: asb_m => d_oacc_asb_m + procedure, pass(x) :: get_nrows => d_oacc_get_nrows + procedure, nopass :: get_fmt => d_oacc_get_fmt + + procedure, pass(x) :: all => d_oacc_vect_all + procedure, pass(x) :: zero => d_oacc_zero + procedure, pass(x) :: asb_m => d_oacc_asb_m procedure, pass(x) :: sync => d_oacc_sync procedure, pass(x) :: sync_space => d_oacc_sync_space -!!$ procedure, pass(x) :: bld_x => d_oacc_bld_x -!!$ procedure, pass(x) :: bld_mn => d_oacc_bld_mn -!!$ procedure, pass(x) :: free => d_oacc_vect_free -!!$ procedure, pass(x) :: ins_a => d_oacc_ins_a -!!$ procedure, pass(x) :: ins_v => d_oacc_ins_v + procedure, pass(x) :: bld_x => d_oacc_bld_x + procedure, pass(x) :: bld_mn => d_oacc_bld_mn + procedure, pass(x) :: free => d_oacc_vect_free + procedure, pass(x) :: ins_a => d_oacc_ins_a + procedure, pass(x) :: ins_v => d_oacc_ins_v procedure, pass(x) :: is_host => d_oacc_is_host procedure, pass(x) :: is_dev => d_oacc_is_dev procedure, pass(x) :: is_sync => d_oacc_is_sync procedure, pass(x) :: set_host => d_oacc_set_host procedure, pass(x) :: set_dev => d_oacc_set_dev procedure, pass(x) :: set_sync => d_oacc_set_sync -!!$ procedure, pass(x) :: set_scal => d_oacc_set_scal -!!$ -!!$ procedure, pass(x) :: gthzv_x => d_oacc_gthzv_x -!!$ procedure, pass(x) :: gthzbuf_x => d_oacc_gthzbuf -!!$ procedure, pass(y) :: sctb => d_oacc_sctb -!!$ procedure, pass(y) :: sctb_x => d_oacc_sctb_x -!!$ procedure, pass(y) :: sctb_buf => d_oacc_sctb_buf -!!$ -!!$ procedure, pass(x) :: get_size => oacc_get_size -!!$ procedure, pass(x) :: dot_v => d_oacc_vect_dot -!!$ procedure, pass(x) :: dot_a => d_oacc_dot_a -!!$ procedure, pass(y) :: axpby_v => d_oacc_axpby_v -!!$ procedure, pass(y) :: axpby_a => d_oacc_axpby_a -!!$ procedure, pass(z) :: abgdxyz => d_oacc_abgdxyz -!!$ procedure, pass(y) :: mlt_v => d_oacc_mlt_v -!!$ procedure, pass(y) :: mlt_a => d_oacc_mlt_a -!!$ procedure, pass(z) :: mlt_a_2 => d_oacc_mlt_a_2 -!!$ procedure, pass(z) :: mlt_v_2 => d_oacc_mlt_v_2 -!!$ procedure, pass(x) :: scal => d_oacc_scal -!!$ procedure, pass(x) :: nrm2 => d_oacc_nrm2 -!!$ procedure, pass(x) :: amax => d_oacc_amax -!!$ procedure, pass(x) :: asum => d_oacc_asum + procedure, pass(x) :: set_scal => d_oacc_set_scal + + procedure, pass(x) :: gthzv_x => d_oacc_gthzv_x + procedure, pass(x) :: gthzbuf_x => d_oacc_gthzbuf + procedure, pass(y) :: sctb => d_oacc_sctb + procedure, pass(y) :: sctb_x => d_oacc_sctb_x + procedure, pass(y) :: sctb_buf => d_oacc_sctb_buf + + procedure, pass(x) :: get_size => oacc_get_size + procedure, pass(x) :: dot_v => d_oacc_vect_dot + procedure, pass(x) :: dot_a => d_oacc_dot_a + procedure, pass(y) :: axpby_v => d_oacc_axpby_v + procedure, pass(y) :: axpby_a => d_oacc_axpby_a + procedure, pass(z) :: abgdxyz => d_oacc_abgdxyz + procedure, pass(y) :: mlt_a => d_oacc_mlt_a + procedure, pass(z) :: mlt_a_2 => d_oacc_mlt_a_2 + procedure, pass(y) :: mlt_v => d_oacc_mlt_v + procedure, pass(z) :: mlt_v_2 => d_oacc_mlt_v_2 + procedure, pass(x) :: scal => d_oacc_scal + procedure, pass(x) :: nrm2 => d_oacc_nrm2 + procedure, pass(x) :: amax => d_oacc_amax + procedure, pass(x) :: asum => d_oacc_asum procedure, pass(x) :: absval1 => d_oacc_absval1 -!!$ procedure, pass(x) :: absval2 => d_oacc_absval2 + procedure, pass(x) :: absval2 => d_oacc_absval2 end type psb_d_vect_oacc real(psb_dpk_), allocatable :: v1(:),v2(:),p(:) + interface + subroutine d_oacc_mlt_v(x, y, info) + import + implicit none + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_vect_oacc), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + end subroutine d_oacc_mlt_v + end interface + + + interface + subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) + import + implicit none + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + class(psb_d_vect_oacc), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + character(len=1), intent(in), optional :: conjgx, conjgy + end subroutine d_oacc_mlt_v_2 + end interface + contains subroutine d_oacc_absval1(x) @@ -73,106 +97,140 @@ contains n = size(x%v) !$acc parallel loop do i = 1, n - x%v(i) = abs(x%v(i)) + x%v(i) = abs(x%v(i)) end do call x%set_dev() end subroutine d_oacc_absval1 -!!$ subroutine d_oacc_absval2(x, y) -!!$ implicit none -!!$ class(psb_d_vect_oacc), intent(inout) :: x -!!$ class(psb_d_base_vect_type), intent(inout) :: y -!!$ integer(psb_ipk_) :: n -!!$ integer(psb_ipk_) :: i -!!$ -!!$ n = min(size(x%v), size(y%v)) -!!$ select type (yy => y) -!!$ class is (psb_d_vect_oacc) -!!$ if (x%is_host()) call x%sync() -!!$ if (yy%is_host()) call yy%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ yy%v(i) = abs(x%v(i)) -!!$ end do -!!$ class default -!!$ if (x%is_dev()) call x%sync() -!!$ if (y%is_dev()) call y%sync() -!!$ call x%psb_d_base_vect_type%absval(y) -!!$ end select -!!$ end subroutine d_oacc_absval2 -!!$ -!!$ subroutine d_oacc_scal(alpha, x) -!!$ implicit none -!!$ class(psb_d_vect_oacc), intent(inout) :: x -!!$ real(psb_dpk_), intent(in) :: alpha -!!$ integer(psb_ipk_) :: info -!!$ integer(psb_ipk_) :: i -!!$ -!!$ if (x%is_host()) call x%sync_space() -!!$ !$acc parallel loop -!!$ do i = 1, size(x%v) -!!$ x%v(i) = alpha * x%v(i) -!!$ end do -!!$ call x%set_dev() -!!$ end subroutine d_oacc_scal -!!$ -!!$ function d_oacc_nrm2(n, x) result(res) -!!$ implicit none -!!$ class(psb_d_vect_oacc), intent(inout) :: x -!!$ integer(psb_ipk_), intent(in) :: n -!!$ real(psb_dpk_) :: res -!!$ integer(psb_ipk_) :: info -!!$ real(psb_dpk_) :: sum -!!$ integer(psb_ipk_) :: i -!!$ -!!$ if (x%is_host()) call x%sync_space() -!!$ sum = 0.0 -!!$ !$acc parallel loop reduction(+:sum) -!!$ do i = 1, n -!!$ sum = sum + x%v(i) * x%v(i) -!!$ end do -!!$ res = sqrt(sum) -!!$ end function d_oacc_nrm2 -!!$ -!!$ function d_oacc_amax(n, x) result(res) -!!$ implicit none -!!$ class(psb_d_vect_oacc), intent(inout) :: x -!!$ integer(psb_ipk_), intent(in) :: n -!!$ real(psb_dpk_) :: res -!!$ integer(psb_ipk_) :: info -!!$ real(psb_dpk_) :: max_val -!!$ integer(psb_ipk_) :: i -!!$ -!!$ if (x%is_host()) call x%sync_space() -!!$ max_val = -huge(0.0) -!!$ !$acc parallel loop reduction(max:max_val) -!!$ do i = 1, n -!!$ if (x%v(i) > max_val) max_val = x%v(i) -!!$ end do -!!$ res = max_val -!!$ end function d_oacc_amax -!!$ -!!$ function d_oacc_asum(n, x) result(res) -!!$ implicit none -!!$ class(psb_d_vect_oacc), intent(inout) :: x -!!$ integer(psb_ipk_), intent(in) :: n -!!$ real(psb_dpk_) :: res -!!$ integer(psb_ipk_) :: info -!!$ real(psb_dpk_) :: sum -!!$ integer(psb_ipk_) :: i -!!$ -!!$ if (x%is_host()) call x%sync_space() -!!$ sum = 0.0 -!!$ !$acc parallel loop reduction(+:sum) -!!$ do i = 1, n -!!$ sum = sum + abs(x%v(i)) -!!$ end do -!!$ res = sum -!!$ end function d_oacc_asum -!!$ -!!$ + subroutine d_oacc_absval2(x, y) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_) :: n + integer(psb_ipk_) :: i + + n = min(size(x%v), size(y%v)) + select type (yy => y) + class is (psb_d_vect_oacc) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + !$acc parallel loop + do i = 1, n + yy%v(i) = abs(x%v(i)) + end do + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call x%psb_d_base_vect_type%absval(y) + end select + end subroutine d_oacc_absval2 + + subroutine d_oacc_scal(alpha, x) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + real(psb_dpk_), intent(in) :: alpha + integer(psb_ipk_) :: info + integer(psb_ipk_) :: i + + if (x%is_host()) call x%sync_space() + !$acc parallel loop + do i = 1, size(x%v) + x%v(i) = alpha * x%v(i) + end do + call x%set_dev() + end subroutine d_oacc_scal + + function d_oacc_nrm2(n, x) result(res) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + real(psb_dpk_) :: sum + integer(psb_ipk_) :: i + + if (x%is_host()) call x%sync_space() + sum = 0.0 + !$acc parallel loop reduction(+:sum) + do i = 1, n + sum = sum + x%v(i) * x%v(i) + end do + res = sqrt(sum) + end function d_oacc_nrm2 + + function d_oacc_amax(n, x) result(res) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + real(psb_dpk_) :: max_val + integer(psb_ipk_) :: i + + if (x%is_host()) call x%sync_space() + max_val = -huge(0.0) + !$acc parallel loop reduction(max:max_val) + do i = 1, n + if (x%v(i) > max_val) max_val = x%v(i) + end do + res = max_val + end function d_oacc_amax + + function d_oacc_asum(n, x) result(res) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + real(psb_dpk_) :: sum + integer(psb_ipk_) :: i + + if (x%is_host()) call x%sync_space() + sum = 0.0 + !$acc parallel loop reduction(+:sum) + do i = 1, n + sum = sum + abs(x%v(i)) + end do + res = sum + end function d_oacc_asum + + + subroutine d_oacc_mlt_a(x, y, info) + implicit none + real(psb_dpk_), intent(in) :: x(:) + class(psb_d_vect_oacc), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (y%is_dev()) call y%sync_space() + !$acc parallel loop + do i = 1, size(x) + y%v(i) = y%v(i) * x(i) + end do + call y%set_host() + end subroutine d_oacc_mlt_a + + subroutine d_oacc_mlt_a_2(alpha, x, y, beta, z, info) + implicit none + real(psb_dpk_), intent(in) :: alpha, beta + real(psb_dpk_), intent(in) :: x(:) + real(psb_dpk_), intent(in) :: y(:) + class(psb_d_vect_oacc), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (z%is_dev()) call z%sync_space() + !$acc parallel loop + do i = 1, size(x) + z%v(i) = alpha * x(i) * y(i) + beta * z%v(i) + end do + call z%set_host() + end subroutine d_oacc_mlt_a_2 + + !!$ subroutine d_oacc_mlt_v(x, y, info) -!!$ use psi_serial_mod !!$ implicit none !!$ class(psb_d_base_vect_type), intent(inout) :: x !!$ class(psb_d_vect_oacc), intent(inout) :: y @@ -201,42 +259,6 @@ contains !!$ end select !!$ end subroutine d_oacc_mlt_v !!$ -!!$ subroutine d_oacc_mlt_a(x, y, info) -!!$ use psi_serial_mod -!!$ implicit none -!!$ real(psb_dpk_), intent(in) :: x(:) -!!$ class(psb_d_vect_oacc), intent(inout) :: y -!!$ integer(psb_ipk_), intent(out) :: info -!!$ integer(psb_ipk_) :: i, n -!!$ -!!$ info = 0 -!!$ if (y%is_dev()) call y%sync_space() -!!$ !$acc parallel loop -!!$ do i = 1, size(x) -!!$ y%v(i) = y%v(i) * x(i) -!!$ end do -!!$ call y%set_host() -!!$ end subroutine d_oacc_mlt_a -!!$ -!!$ subroutine d_oacc_mlt_a_2(alpha, x, y, beta, z, info) -!!$ use psi_serial_mod -!!$ implicit none -!!$ real(psb_dpk_), intent(in) :: alpha, beta -!!$ real(psb_dpk_), intent(in) :: x(:) -!!$ real(psb_dpk_), intent(in) :: y(:) -!!$ class(psb_d_vect_oacc), intent(inout) :: z -!!$ integer(psb_ipk_), intent(out) :: info -!!$ integer(psb_ipk_) :: i, n -!!$ -!!$ info = 0 -!!$ if (z%is_dev()) call z%sync_space() -!!$ !$acc parallel loop -!!$ do i = 1, size(x) -!!$ z%v(i) = alpha * x(i) * y(i) + beta * z%v(i) -!!$ end do -!!$ call z%set_host() -!!$ end subroutine d_oacc_mlt_a_2 -!!$ !!$ subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) !!$ use psi_serial_mod !!$ use psb_string_mod @@ -291,514 +313,513 @@ contains !!$ call z%set_host() !!$ end select !!$ end subroutine d_oacc_mlt_v_2 -!!$ -!!$ -!!$ subroutine d_oacc_axpby_v(m, alpha, x, beta, y, info) -!!$ use psi_serial_mod -!!$ implicit none -!!$ integer(psb_ipk_), intent(in) :: m -!!$ class(psb_d_base_vect_type), intent(inout) :: x -!!$ class(psb_d_vect_oacc), intent(inout) :: y -!!$ real(psb_dpk_), intent(in) :: alpha, beta -!!$ integer(psb_ipk_), intent(out) :: info -!!$ integer(psb_ipk_) :: nx, ny, i -!!$ -!!$ info = psb_success_ -!!$ -!!$ select type(xx => x) -!!$ type is (psb_d_vect_oacc) -!!$ if ((beta /= dzero) .and. y%is_host()) call y%sync_space() -!!$ if (xx%is_host()) call xx%sync_space() -!!$ nx = size(xx%v) -!!$ ny = size(y%v) -!!$ if ((nx < m) .or. (ny < m)) then -!!$ info = psb_err_internal_error_ -!!$ else -!!$ !$acc parallel loop -!!$ do i = 1, m -!!$ y%v(i) = alpha * xx%v(i) + beta * y%v(i) -!!$ end do -!!$ end if -!!$ call y%set_dev() -!!$ class default -!!$ if ((alpha /= dzero) .and. (x%is_dev())) call x%sync() -!!$ call y%axpby(m, alpha, x%v, beta, info) -!!$ end select -!!$ end subroutine d_oacc_axpby_v -!!$ -!!$ subroutine d_oacc_axpby_a(m, alpha, x, beta, y, info) -!!$ use psi_serial_mod -!!$ implicit none -!!$ integer(psb_ipk_), intent(in) :: m -!!$ real(psb_dpk_), intent(in) :: x(:) -!!$ class(psb_d_vect_oacc), intent(inout) :: y -!!$ real(psb_dpk_), intent(in) :: alpha, beta -!!$ integer(psb_ipk_), intent(out) :: info -!!$ integer(psb_ipk_) :: i -!!$ -!!$ if ((beta /= dzero) .and. (y%is_dev())) call y%sync_space() -!!$ !$acc parallel loop -!!$ do i = 1, m -!!$ y%v(i) = alpha * x(i) + beta * y%v(i) -!!$ end do -!!$ call y%set_host() -!!$ end subroutine d_oacc_axpby_a -!!$ -!!$ subroutine d_oacc_abgdxyz(m, alpha, beta, gamma, delta, x, y, z, info) -!!$ use psi_serial_mod -!!$ implicit none -!!$ integer(psb_ipk_), intent(in) :: m -!!$ class(psb_d_base_vect_type), intent(inout) :: x -!!$ class(psb_d_base_vect_type), intent(inout) :: y -!!$ class(psb_d_vect_oacc), intent(inout) :: z -!!$ real(psb_dpk_), intent(in) :: alpha, beta, gamma, delta -!!$ integer(psb_ipk_), intent(out) :: info -!!$ integer(psb_ipk_) :: nx, ny, nz, i -!!$ logical :: gpu_done -!!$ -!!$ info = psb_success_ -!!$ gpu_done = .false. -!!$ -!!$ select type(xx => x) -!!$ class is (psb_d_vect_oacc) -!!$ select type(yy => y) -!!$ class is (psb_d_vect_oacc) -!!$ select type(zz => z) -!!$ class is (psb_d_vect_oacc) -!!$ if ((beta /= dzero) .and. yy%is_host()) call yy%sync_space() -!!$ if ((delta /= dzero) .and. zz%is_host()) call zz%sync_space() -!!$ if (xx%is_host()) call xx%sync_space() -!!$ nx = size(xx%v) -!!$ ny = size(yy%v) -!!$ nz = size(zz%v) -!!$ if ((nx < m) .or. (ny < m) .or. (nz < m)) then -!!$ info = psb_err_internal_error_ -!!$ else -!!$ !$acc parallel loop -!!$ do i = 1, m -!!$ yy%v(i) = alpha * xx%v(i) + beta * yy%v(i) -!!$ zz%v(i) = gamma * yy%v(i) + delta * zz%v(i) -!!$ end do -!!$ end if -!!$ call yy%set_dev() -!!$ call zz%set_dev() -!!$ gpu_done = .true. -!!$ end select -!!$ end select -!!$ end select -!!$ -!!$ if (.not. gpu_done) then -!!$ if (x%is_host()) call x%sync() -!!$ if (y%is_host()) call y%sync() -!!$ if (z%is_host()) call z%sync() -!!$ call y%axpby(m, alpha, x, beta, info) -!!$ call z%axpby(m, gamma, y, delta, info) -!!$ end if -!!$ end subroutine d_oacc_abgdxyz -!!$ -!!$ -!!$ subroutine d_oacc_sctb_buf(i, n, idx, beta, y) -!!$ use psb_base_mod -!!$ implicit none -!!$ integer(psb_ipk_) :: i, n -!!$ class(psb_i_base_vect_type) :: idx -!!$ real(psb_dpk_) :: beta -!!$ class(psb_d_vect_oacc) :: y -!!$ integer(psb_ipk_) :: info -!!$ -!!$ if (.not.allocated(y%combuf)) then -!!$ call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') -!!$ return -!!$ end if -!!$ -!!$ select type(ii => idx) -!!$ class is (psb_i_vect_oacc) -!!$ if (ii%is_host()) call ii%sync_space(info) -!!$ if (y%is_host()) call y%sync_space() -!!$ -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i) -!!$ end do -!!$ -!!$ class default -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i) -!!$ end do -!!$ end select -!!$ end subroutine d_oacc_sctb_buf -!!$ -!!$ subroutine d_oacc_sctb_x(i, n, idx, x, beta, y) -!!$ use psb_base_mod -!!$ implicit none -!!$ integer(psb_ipk_):: i, n -!!$ class(psb_i_base_vect_type) :: idx -!!$ real(psb_dpk_) :: beta, x(:) -!!$ class(psb_d_vect_oacc) :: y -!!$ integer(psb_ipk_) :: info, ni -!!$ -!!$ select type(ii => idx) -!!$ class is (psb_i_vect_oacc) -!!$ if (ii%is_host()) call ii%sync_space(info) -!!$ class default -!!$ call psb_errpush(info, 'd_oacc_sctb_x') -!!$ return -!!$ end select -!!$ -!!$ if (y%is_host()) call y%sync_space() -!!$ -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i) -!!$ end do -!!$ -!!$ call y%set_dev() -!!$ end subroutine d_oacc_sctb_x -!!$ -!!$ -!!$ -!!$ subroutine d_oacc_sctb(n, idx, x, beta, y) -!!$ use psb_base_mod -!!$ implicit none -!!$ integer(psb_ipk_) :: n -!!$ integer(psb_ipk_) :: idx(:) -!!$ real(psb_dpk_) :: beta, x(:) -!!$ class(psb_d_vect_oacc) :: y -!!$ integer(psb_ipk_) :: info -!!$ integer(psb_ipk_) :: i -!!$ -!!$ if (n == 0) return -!!$ if (y%is_dev()) call y%sync_space() -!!$ -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ y%v(idx(i)) = beta * y%v(idx(i)) + x(i) -!!$ end do -!!$ -!!$ call y%set_host() -!!$ end subroutine d_oacc_sctb -!!$ -!!$ -!!$ subroutine d_oacc_gthzbuf(i, n, idx, x) -!!$ use psb_base_mod -!!$ implicit none -!!$ integer(psb_ipk_) :: i, n -!!$ class(psb_i_base_vect_type) :: idx -!!$ class(psb_d_vect_oacc) :: x -!!$ integer(psb_ipk_) :: info -!!$ -!!$ info = 0 -!!$ if (.not.allocated(x%combuf)) then -!!$ call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') -!!$ return -!!$ end if -!!$ -!!$ select type(ii => idx) -!!$ class is (psb_i_vect_oacc) -!!$ if (ii%is_host()) call ii%sync_space(info) -!!$ class default -!!$ call psb_errpush(info, 'd_oacc_gthzbuf') -!!$ return -!!$ end select -!!$ -!!$ if (x%is_host()) call x%sync_space() -!!$ -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ x%combuf(i) = x%v(idx%v(i)) -!!$ end do -!!$ end subroutine d_oacc_gthzbuf -!!$ -!!$ subroutine d_oacc_gthzv_x(i, n, idx, x, y) -!!$ use psb_base_mod -!!$ implicit none -!!$ integer(psb_ipk_) :: i, n -!!$ class(psb_i_base_vect_type):: idx -!!$ real(psb_dpk_) :: y(:) -!!$ class(psb_d_vect_oacc):: x -!!$ integer(psb_ipk_) :: info -!!$ -!!$ info = 0 -!!$ -!!$ select type(ii => idx) -!!$ class is (psb_i_vect_oacc) -!!$ if (ii%is_host()) call ii%sync_space(info) -!!$ class default -!!$ call psb_errpush(info, 'd_oacc_gthzv_x') -!!$ return -!!$ end select -!!$ -!!$ if (x%is_host()) call x%sync_space() -!!$ -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ y(i) = x%v(idx%v(i)) -!!$ end do -!!$ end subroutine d_oacc_gthzv_x -!!$ -!!$ subroutine d_oacc_ins_v(n, irl, val, dupl, x, info) -!!$ use psi_serial_mod -!!$ implicit none -!!$ class(psb_d_vect_oacc), intent(inout) :: x -!!$ integer(psb_ipk_), intent(in) :: n, dupl -!!$ class(psb_i_base_vect_type), intent(inout) :: irl -!!$ class(psb_d_base_vect_type), intent(inout) :: val -!!$ integer(psb_ipk_), intent(out) :: info -!!$ -!!$ integer(psb_ipk_) :: i, isz -!!$ logical :: done_oacc -!!$ -!!$ info = 0 -!!$ if (psb_errstatus_fatal()) return -!!$ -!!$ done_oacc = .false. -!!$ select type(virl => irl) -!!$ type is (psb_i_vect_oacc) -!!$ select type(vval => val) -!!$ type is (psb_d_vect_oacc) -!!$ if (vval%is_host()) call vval%sync_space() -!!$ if (virl%is_host()) call virl%sync_space(info) -!!$ if (x%is_host()) call x%sync_space() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ x%v(virl%v(i)) = vval%v(i) -!!$ end do -!!$ call x%set_dev() -!!$ done_oacc = .true. -!!$ end select -!!$ end select -!!$ -!!$ if (.not.done_oacc) then -!!$ select type(virl => irl) -!!$ type is (psb_i_vect_oacc) -!!$ if (virl%is_dev()) call virl%sync_space(info) -!!$ end select -!!$ select type(vval => val) -!!$ type is (psb_d_vect_oacc) -!!$ if (vval%is_dev()) call vval%sync_space() -!!$ end select -!!$ call x%ins(n, irl%v, val%v, dupl, info) -!!$ end if -!!$ -!!$ if (info /= 0) then -!!$ call psb_errpush(info, 'oacc_vect_ins') -!!$ return -!!$ end if -!!$ -!!$ end subroutine d_oacc_ins_v -!!$ -!!$ -!!$ -!!$ subroutine d_oacc_ins_a(n, irl, val, dupl, x, info) -!!$ use psi_serial_mod -!!$ implicit none -!!$ class(psb_d_vect_oacc), intent(inout) :: x -!!$ integer(psb_ipk_), intent(in) :: n, dupl -!!$ integer(psb_ipk_), intent(in) :: irl(:) -!!$ real(psb_dpk_), intent(in) :: val(:) -!!$ integer(psb_ipk_), intent(out) :: info -!!$ -!!$ integer(psb_ipk_) :: i -!!$ -!!$ info = 0 -!!$ if (x%is_dev()) call x%sync_space() -!!$ call x%psb_d_base_vect_type%ins(n, irl, val, dupl, info) -!!$ call x%set_host() -!!$ !$acc update device(x%v) -!!$ -!!$ end subroutine d_oacc_ins_a -!!$ -!!$ -!!$ -!!$ subroutine d_oacc_bld_mn(x, n) -!!$ use psb_base_mod -!!$ implicit none -!!$ integer(psb_mpk_), intent(in) :: n -!!$ class(psb_d_vect_oacc), intent(inout) :: x -!!$ integer(psb_ipk_) :: info -!!$ -!!$ call x%all(n, info) -!!$ if (info /= 0) then -!!$ call psb_errpush(info, 'd_oacc_bld_mn', i_err=(/n, n, n, n, n/)) -!!$ end if -!!$ call x%set_host() -!!$ !$acc update device(x%v) -!!$ -!!$ end subroutine d_oacc_bld_mn -!!$ -!!$ -!!$ subroutine d_oacc_bld_x(x, this) -!!$ use psb_base_mod -!!$ implicit none -!!$ real(psb_dpk_), intent(in) :: this(:) -!!$ class(psb_d_vect_oacc), intent(inout) :: x -!!$ integer(psb_ipk_) :: info -!!$ -!!$ call psb_realloc(size(this), x%v, info) -!!$ if (info /= 0) then -!!$ info = psb_err_alloc_request_ -!!$ call psb_errpush(info, 'd_oacc_bld_x', & -!!$ i_err=(/size(this), izero, izero, izero, izero/)) -!!$ return -!!$ end if -!!$ -!!$ x%v(:) = this(:) -!!$ call x%set_host() -!!$ !$acc update device(x%v) -!!$ -!!$ end subroutine d_oacc_bld_x -!!$ -!!$ -!!$ subroutine d_oacc_asb_m(n, x, info) -!!$ use psb_base_mod -!!$ implicit none -!!$ integer(psb_mpk_), intent(in) :: n -!!$ class(psb_d_vect_oacc), intent(inout) :: x -!!$ integer(psb_ipk_), intent(out) :: info -!!$ integer(psb_mpk_) :: nd -!!$ -!!$ info = psb_success_ -!!$ -!!$ if (x%is_dev()) then -!!$ nd = size(x%v) -!!$ if (nd < n) then -!!$ call x%sync() -!!$ call x%psb_d_base_vect_type%asb(n, info) -!!$ if (info == psb_success_) call x%sync_space() -!!$ call x%set_host() -!!$ end if -!!$ else -!!$ if (size(x%v) < n) then -!!$ call x%psb_d_base_vect_type%asb(n, info) -!!$ if (info == psb_success_) call x%sync_space() -!!$ call x%set_host() -!!$ end if -!!$ end if -!!$ end subroutine d_oacc_asb_m -!!$ -!!$ -!!$ -!!$ subroutine d_oacc_set_scal(x, val, first, last) -!!$ class(psb_d_vect_oacc), intent(inout) :: x -!!$ real(psb_dpk_), intent(in) :: val -!!$ integer(psb_ipk_), optional :: first, last -!!$ -!!$ integer(psb_ipk_) :: first_, last_ -!!$ first_ = 1 -!!$ last_ = x%get_nrows() -!!$ if (present(first)) first_ = max(1, first) -!!$ if (present(last)) last_ = min(last, last_) -!!$ -!!$ !$acc parallel loop -!!$ do i = first_, last_ -!!$ x%v(i) = val -!!$ end do -!!$ !$acc end parallel loop -!!$ -!!$ call x%set_dev() -!!$ end subroutine d_oacc_set_scal -!!$ -!!$ -!!$ -!!$ subroutine d_oacc_zero(x) -!!$ use psi_serial_mod -!!$ implicit none -!!$ class(psb_d_vect_oacc), intent(inout) :: x -!!$ call x%set_dev() -!!$ call x%set_scal(dzero) -!!$ end subroutine d_oacc_zero -!!$ -!!$ function d_oacc_get_nrows(x) result(res) -!!$ implicit none -!!$ class(psb_d_vect_oacc), intent(in) :: x -!!$ integer(psb_ipk_) :: res -!!$ -!!$ if (allocated(x%v)) res = size(x%v) -!!$ end function d_oacc_get_nrows -!!$ -!!$ function d_oacc_get_fmt() result(res) -!!$ implicit none -!!$ character(len=5) :: res -!!$ res = "dOACC" -!!$ -!!$ end function d_oacc_get_fmt -!!$ -!!$ function d_oacc_vect_dot(n, x, y) result(res) -!!$ implicit none -!!$ class(psb_d_vect_oacc), intent(inout) :: x -!!$ class(psb_d_base_vect_type), intent(inout) :: y -!!$ integer(psb_ipk_), intent(in) :: n -!!$ real(psb_dpk_) :: res -!!$ real(psb_dpk_), external :: ddot -!!$ integer(psb_ipk_) :: info -!!$ integer(psb_ipk_) :: i -!!$ -!!$ res = dzero -!!$ -!!$ select type(yy => y) -!!$ type is (psb_d_base_vect_type) -!!$ if (x%is_dev()) call x%sync() -!!$ res = ddot(n, x%v, 1, yy%v, 1) -!!$ type is (psb_d_vect_oacc) -!!$ if (x%is_host()) call x%sync() -!!$ if (yy%is_host()) call yy%sync() -!!$ -!!$ !$acc parallel loop reduction(+:res) present(x%v, yy%v) -!!$ do i = 1, n -!!$ res = res + x%v(i) * yy%v(i) -!!$ end do -!!$ !$acc end parallel loop -!!$ -!!$ class default -!!$ call x%sync() -!!$ res = y%dot(n, x%v) -!!$ end select -!!$ -!!$ end function d_oacc_vect_dot -!!$ -!!$ -!!$ -!!$ -!!$ function d_oacc_dot_a(n, x, y) result(res) -!!$ implicit none -!!$ class(psb_d_vect_oacc), intent(inout) :: x -!!$ real(psb_dpk_), intent(in) :: y(:) -!!$ integer(psb_ipk_), intent(in) :: n -!!$ real(psb_dpk_) :: res -!!$ real(psb_dpk_), external :: ddot -!!$ -!!$ if (x%is_dev()) call x%sync() -!!$ res = ddot(n, y, 1, x%v, 1) -!!$ -!!$ end function d_oacc_dot_a -!!$ -!!$ ! subroutine d_oacc_set_vect(x,y) -!!$ ! implicit none -!!$ ! class(psb_d_vect_oacc), intent(inout) :: x -!!$ ! real(psb_dpk_), intent(in) :: y(:) -!!$ ! integer(psb_ipk_) :: info -!!$ -!!$ ! if (size(x%v) /= size(y)) then -!!$ ! call x%free(info) -!!$ ! call x%all(size(y),info) -!!$ ! end if -!!$ ! x%v(:) = y(:) -!!$ ! call x%set_host() -!!$ ! end subroutine d_oacc_set_vect -!!$ -!!$ subroutine d_oacc_to_dev(v) -!!$ implicit none -!!$ real(psb_dpk_) :: v(:) -!!$ !$acc update device(v) -!!$ end subroutine d_oacc_to_dev -!!$ -!!$ subroutine d_oacc_to_host(v) -!!$ implicit none -!!$ real(psb_dpk_) :: v(:) -!!$ !$acc update self(v) -!!$ end subroutine d_oacc_to_host -!!$ + + + subroutine d_oacc_axpby_v(m, alpha, x, beta, y, info) + !use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_vect_oacc), intent(inout) :: y + real(psb_dpk_), intent(in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny, i + + info = psb_success_ + + select type(xx => x) + type is (psb_d_vect_oacc) + if ((beta /= dzero) .and. y%is_host()) call y%sync_space() + if (xx%is_host()) call xx%sync_space() + nx = size(xx%v) + ny = size(y%v) + if ((nx < m) .or. (ny < m)) then + info = psb_err_internal_error_ + else + !$acc parallel loop + do i = 1, m + y%v(i) = alpha * xx%v(i) + beta * y%v(i) + end do + end if + call y%set_dev() + class default + if ((alpha /= dzero) .and. (x%is_dev())) call x%sync() + call y%axpby(m, alpha, x%v, beta, info) + end select + end subroutine d_oacc_axpby_v + + subroutine d_oacc_axpby_a(m, alpha, x, beta, y, info) + !use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + real(psb_dpk_), intent(in) :: x(:) + class(psb_d_vect_oacc), intent(inout) :: y + real(psb_dpk_), intent(in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i + + if ((beta /= dzero) .and. (y%is_dev())) call y%sync_space() + !$acc parallel loop + do i = 1, m + y%v(i) = alpha * x(i) + beta * y%v(i) + end do + call y%set_host() + end subroutine d_oacc_axpby_a + + subroutine d_oacc_abgdxyz(m, alpha, beta, gamma, delta, x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + class(psb_d_vect_oacc), intent(inout) :: z + real(psb_dpk_), intent(in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny, nz, i + logical :: gpu_done + + info = psb_success_ + gpu_done = .false. + + select type(xx => x) + class is (psb_d_vect_oacc) + select type(yy => y) + class is (psb_d_vect_oacc) + select type(zz => z) + class is (psb_d_vect_oacc) + if ((beta /= dzero) .and. yy%is_host()) call yy%sync_space() + if ((delta /= dzero) .and. zz%is_host()) call zz%sync_space() + if (xx%is_host()) call xx%sync_space() + nx = size(xx%v) + ny = size(yy%v) + nz = size(zz%v) + if ((nx < m) .or. (ny < m) .or. (nz < m)) then + info = psb_err_internal_error_ + else + !$acc parallel loop + do i = 1, m + yy%v(i) = alpha * xx%v(i) + beta * yy%v(i) + zz%v(i) = gamma * yy%v(i) + delta * zz%v(i) + end do + end if + call yy%set_dev() + call zz%set_dev() + gpu_done = .true. + end select + end select + end select + + if (.not. gpu_done) then + if (x%is_host()) call x%sync() + if (y%is_host()) call y%sync() + if (z%is_host()) call z%sync() + call y%axpby(m, alpha, x, beta, info) + call z%axpby(m, gamma, y, delta, info) + end if + end subroutine d_oacc_abgdxyz + + subroutine d_oacc_sctb_buf(i, n, idx, beta, y) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + real(psb_dpk_) :: beta + class(psb_d_vect_oacc) :: y + integer(psb_ipk_) :: info + + if (.not.allocated(y%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') + return + end if + + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync_space(info) + if (y%is_host()) call y%sync_space() + + !$acc parallel loop + do i = 1, n + y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i) + end do + + class default + !$acc parallel loop + do i = 1, n + y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i) + end do + end select + end subroutine d_oacc_sctb_buf + + subroutine d_oacc_sctb_x(i, n, idx, x, beta, y) + use psb_base_mod + implicit none + integer(psb_ipk_):: i, n + class(psb_i_base_vect_type) :: idx + real(psb_dpk_) :: beta, x(:) + class(psb_d_vect_oacc) :: y + integer(psb_ipk_) :: info, ni + + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync_space(info) + class default + call psb_errpush(info, 'd_oacc_sctb_x') + return + end select + + if (y%is_host()) call y%sync_space() + + !$acc parallel loop + do i = 1, n + y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i) + end do + + call y%set_dev() + end subroutine d_oacc_sctb_x + + + + subroutine d_oacc_sctb(n, idx, x, beta, y) + use psb_base_mod + implicit none + integer(psb_ipk_) :: n + integer(psb_ipk_) :: idx(:) + real(psb_dpk_) :: beta, x(:) + class(psb_d_vect_oacc) :: y + integer(psb_ipk_) :: info + integer(psb_ipk_) :: i + + if (n == 0) return + if (y%is_dev()) call y%sync_space() + + !$acc parallel loop + do i = 1, n + y%v(idx(i)) = beta * y%v(idx(i)) + x(i) + end do + + call y%set_host() + end subroutine d_oacc_sctb + + + subroutine d_oacc_gthzbuf(i, n, idx, x) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + class(psb_d_vect_oacc) :: x + integer(psb_ipk_) :: info + + info = 0 + if (.not.allocated(x%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') + return + end if + + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync_space(info) + class default + call psb_errpush(info, 'd_oacc_gthzbuf') + return + end select + + if (x%is_host()) call x%sync_space() + + !$acc parallel loop + do i = 1, n + x%combuf(i) = x%v(idx%v(i)) + end do + end subroutine d_oacc_gthzbuf + + subroutine d_oacc_gthzv_x(i, n, idx, x, y) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type):: idx + real(psb_dpk_) :: y(:) + class(psb_d_vect_oacc):: x + integer(psb_ipk_) :: info + + info = 0 + + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync_space(info) + class default + call psb_errpush(info, 'd_oacc_gthzv_x') + return + end select + + if (x%is_host()) call x%sync_space() + + !$acc parallel loop + do i = 1, n + y(i) = x%v(idx%v(i)) + end do + end subroutine d_oacc_gthzv_x + + subroutine d_oacc_ins_v(n, irl, val, dupl, x, info) + use psi_serial_mod + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + class(psb_i_base_vect_type), intent(inout) :: irl + class(psb_d_base_vect_type), intent(inout) :: val + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, isz + logical :: done_oacc + + info = 0 + if (psb_errstatus_fatal()) return + + done_oacc = .false. + select type(virl => irl) + type is (psb_i_vect_oacc) + select type(vval => val) + type is (psb_d_vect_oacc) + if (vval%is_host()) call vval%sync_space() + if (virl%is_host()) call virl%sync_space(info) + if (x%is_host()) call x%sync_space() + !$acc parallel loop + do i = 1, n + x%v(virl%v(i)) = vval%v(i) + end do + call x%set_dev() + done_oacc = .true. + end select + end select + + if (.not.done_oacc) then + select type(virl => irl) + type is (psb_i_vect_oacc) + if (virl%is_dev()) call virl%sync_space(info) + end select + select type(vval => val) + type is (psb_d_vect_oacc) + if (vval%is_dev()) call vval%sync_space() + end select + call x%ins(n, irl%v, val%v, dupl, info) + end if + + if (info /= 0) then + call psb_errpush(info, 'oacc_vect_ins') + return + end if + + end subroutine d_oacc_ins_v + + + + subroutine d_oacc_ins_a(n, irl, val, dupl, x, info) + use psi_serial_mod + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + integer(psb_ipk_), intent(in) :: irl(:) + real(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + if (x%is_dev()) call x%sync_space() + call x%psb_d_base_vect_type%ins(n, irl, val, dupl, info) + call x%set_host() + !$acc update device(x%v) + + end subroutine d_oacc_ins_a + + + + subroutine d_oacc_bld_mn(x, n) + use psb_base_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(n, info) + if (info /= 0) then + call psb_errpush(info, 'd_oacc_bld_mn', i_err=(/n, n, n, n, n/)) + end if + call x%set_host() + !$acc update device(x%v) + + end subroutine d_oacc_bld_mn + + + subroutine d_oacc_bld_x(x, this) + use psb_base_mod + implicit none + real(psb_dpk_), intent(in) :: this(:) + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + + call psb_realloc(size(this), x%v, info) + if (info /= 0) then + info = psb_err_alloc_request_ + call psb_errpush(info, 'd_oacc_bld_x', & + i_err=(/size(this), izero, izero, izero, izero/)) + return + end if + + x%v(:) = this(:) + call x%set_host() + !$acc update device(x%v) + + end subroutine d_oacc_bld_x + + + subroutine d_oacc_asb_m(n, x, info) + use psb_base_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_mpk_) :: nd + + info = psb_success_ + + if (x%is_dev()) then + nd = size(x%v) + if (nd < n) then + call x%sync() + call x%psb_d_base_vect_type%asb(n, info) + if (info == psb_success_) call x%sync_space() + call x%set_host() + end if + else + if (size(x%v) < n) then + call x%psb_d_base_vect_type%asb(n, info) + if (info == psb_success_) call x%sync_space() + call x%set_host() + end if + end if + end subroutine d_oacc_asb_m + + + + subroutine d_oacc_set_scal(x, val, first, last) + class(psb_d_vect_oacc), intent(inout) :: x + real(psb_dpk_), intent(in) :: val + integer(psb_ipk_), optional :: first, last + + integer(psb_ipk_) :: first_, last_ + first_ = 1 + last_ = x%get_nrows() + if (present(first)) first_ = max(1, first) + if (present(last)) last_ = min(last, last_) + + !$acc parallel loop + do i = first_, last_ + x%v(i) = val + end do + !$acc end parallel loop + + call x%set_dev() + end subroutine d_oacc_set_scal + + + + subroutine d_oacc_zero(x) + use psi_serial_mod + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + call x%set_dev() + call x%set_scal(dzero) + end subroutine d_oacc_zero + + function d_oacc_get_nrows(x) result(res) + implicit none + class(psb_d_vect_oacc), intent(in) :: x + integer(psb_ipk_) :: res + + if (allocated(x%v)) res = size(x%v) + end function d_oacc_get_nrows + + function d_oacc_get_fmt() result(res) + implicit none + character(len=5) :: res + res = "dOACC" + + end function d_oacc_get_fmt + + function d_oacc_vect_dot(n, x, y) result(res) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + real(psb_dpk_), external :: ddot + integer(psb_ipk_) :: info + integer(psb_ipk_) :: i + + res = dzero + + select type(yy => y) + type is (psb_d_base_vect_type) + if (x%is_dev()) call x%sync() + res = ddot(n, x%v, 1, yy%v, 1) + type is (psb_d_vect_oacc) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + + !$acc parallel loop reduction(+:res) present(x%v, yy%v) + do i = 1, n + res = res + x%v(i) * yy%v(i) + end do + !$acc end parallel loop + + class default + call x%sync() + res = y%dot(n, x%v) + end select + + end function d_oacc_vect_dot + + + + + function d_oacc_dot_a(n, x, y) result(res) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + real(psb_dpk_), intent(in) :: y(:) + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + real(psb_dpk_), external :: ddot + + if (x%is_dev()) call x%sync() + res = ddot(n, y, 1, x%v, 1) + + end function d_oacc_dot_a + + ! subroutine d_oacc_set_vect(x,y) + ! implicit none + ! class(psb_d_vect_oacc), intent(inout) :: x + ! real(psb_dpk_), intent(in) :: y(:) + ! integer(psb_ipk_) :: info + + ! if (size(x%v) /= size(y)) then + ! call x%free(info) + ! call x%all(size(y),info) + ! end if + ! x%v(:) = y(:) + ! call x%set_host() + ! end subroutine d_oacc_set_vect + + subroutine d_oacc_to_dev(v) + implicit none + real(psb_dpk_) :: v(:) + !$acc update device(v) + end subroutine d_oacc_to_dev + + subroutine d_oacc_to_host(v) + implicit none + real(psb_dpk_) :: v(:) + !$acc update self(v) + end subroutine d_oacc_to_host + subroutine d_oacc_sync_space(x) implicit none class(psb_d_vect_oacc), intent(inout) :: x @@ -828,21 +849,21 @@ contains subroutine d_oacc_set_host(x) implicit none class(psb_d_vect_oacc), intent(inout) :: x - + x%state = is_host end subroutine d_oacc_set_host subroutine d_oacc_set_dev(x) implicit none class(psb_d_vect_oacc), intent(inout) :: x - + x%state = is_dev end subroutine d_oacc_set_dev subroutine d_oacc_set_sync(x) implicit none class(psb_d_vect_oacc), intent(inout) :: x - + x%state = is_sync end subroutine d_oacc_set_sync @@ -869,50 +890,50 @@ contains res = (x%state == is_sync) end function d_oacc_is_sync -!!$ -!!$ subroutine d_oacc_vect_all(n, x, info) -!!$ use psi_serial_mod -!!$ use psb_realloc_mod -!!$ implicit none -!!$ integer(psb_ipk_), intent(in) :: n -!!$ class(psb_d_vect_oacc), intent(out) :: x -!!$ integer(psb_ipk_), intent(out) :: info -!!$ -!!$ call psb_realloc(n, x%v, info) -!!$ if (info == 0) then -!!$ call x%set_host() -!!$ !$acc enter data create(x%v) -!!$ call x%sync_space() -!!$ end if -!!$ if (info /= 0) then -!!$ info = psb_err_alloc_request_ -!!$ call psb_errpush(info, 'd_oacc_all', & -!!$ i_err=(/n, n, n, n, n/)) -!!$ end if -!!$ end subroutine d_oacc_vect_all -!!$ -!!$ -!!$ subroutine d_oacc_vect_free(x, info) -!!$ implicit none -!!$ class(psb_d_vect_oacc), intent(inout) :: x -!!$ integer(psb_ipk_), intent(out) :: info -!!$ info = 0 -!!$ if (allocated(x%v)) then -!!$ !$acc exit data delete(x%v) finalize -!!$ deallocate(x%v, stat=info) -!!$ end if -!!$ -!!$ end subroutine d_oacc_vect_free -!!$ -!!$ function oacc_get_size(x) result(res) -!!$ implicit none -!!$ class(psb_d_vect_oacc), intent(inout) :: x -!!$ integer(psb_ipk_) :: res -!!$ -!!$ if (x%is_dev()) call x%sync() -!!$ res = size(x%v) -!!$ end function oacc_get_size -!!$ + + subroutine d_oacc_vect_all(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: n + class(psb_d_vect_oacc), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(n, x%v, info) + if (info == 0) then + call x%set_host() + !$acc enter data create(x%v) + call x%sync_space() + end if + if (info /= 0) then + info = psb_err_alloc_request_ + call psb_errpush(info, 'd_oacc_all', & + i_err=(/n, n, n, n, n/)) + end if + end subroutine d_oacc_vect_all + + + subroutine d_oacc_vect_free(x, info) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + info = 0 + if (allocated(x%v)) then + !$acc exit data delete(x%v) finalize + deallocate(x%v, stat=info) + end if + + end subroutine d_oacc_vect_free + + function oacc_get_size(x) result(res) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: res + + if (x%is_dev()) call x%sync() + res = size(x%v) + end function oacc_get_size + !!$ !!$ subroutine initialize(N) !!$ integer(psb_ipk_) :: N @@ -939,26 +960,6 @@ contains !!$ subroutine to_host() !!$ !$acc update self(v1,v2) !!$ end subroutine to_host -!!$ function d_dot(N) result(res) -!!$ real(kind(1.d0)) :: res -!!$ integer(psb_ipk_) :: i,N -!!$ real(kind(1.d0)) :: t1,t2,t3 -!!$ res = 0.0d0 -!!$ !$acc parallel -!!$ !$acc loop reduction(+:res) -!!$ do i=1,N -!!$ res = res + v1(i) * v2(i) -!!$ end do -!!$ !$acc end parallel -!!$ -!!$ end function d_dot -!!$ function h_dot(N) result(res) -!!$ integer(psb_ipk_) :: i,N -!!$ real(kind(1.d0)) :: t1,t2,t3,res -!!$ res = 0.0d0 -!!$ do i=1,N -!!$ res = res + v1(i) * v2(i) -!!$ end do -!!$ end function h_dot !!$ + end module psb_d_oacc_vect_mod From a81d1d9b68481161c0102dcce390cf514978c063 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 9 Jul 2024 13:45:16 +0200 Subject: [PATCH 07/39] Now oacc_vect compiles cleanly --- openacc/Makefile | 4 +- openacc/impl/psb_d_oacc_mlt_v.f90 | 36 ++++++++--------- openacc/impl/psb_d_oacc_mlt_v_2.f90 | 62 ++++++++++++++--------------- 3 files changed, 50 insertions(+), 52 deletions(-) diff --git a/openacc/Makefile b/openacc/Makefile index c82a9281..aa6ea23d 100644 --- a/openacc/Makefile +++ b/openacc/Makefile @@ -20,7 +20,7 @@ CINCLUDES= # Source files FOBJS= psb_i_oacc_vect_mod.o psb_d_oacc_vect_mod.o \ - psb_oacc_mod.o psb_d_oacc_csr_mat_mod.o + psb_oacc_mod.o psb_d_oacc_csr_mat_mod.o \ psb_oacc_env_mod.o # Library name @@ -43,7 +43,7 @@ ilib: $(OBJS) $(MAKE) -C impl lib psb_oacc_mod.o : psb_i_oacc_vect_mod.o psb_d_oacc_vect_mod.o \ - psb_d_oacc_csr_mat_mod.o + psb_d_oacc_csr_mat_mod.o psb_oacc_env_mod.o clean: cclean iclean /bin/rm -f $(FOBJS) *$(.mod) *.a diff --git a/openacc/impl/psb_d_oacc_mlt_v.f90 b/openacc/impl/psb_d_oacc_mlt_v.f90 index a4eb6660..bedd0247 100644 --- a/openacc/impl/psb_d_oacc_mlt_v.f90 +++ b/openacc/impl/psb_d_oacc_mlt_v.f90 @@ -10,22 +10,22 @@ subroutine d_oacc_mlt_v(x, y, info) integer(psb_ipk_) :: i, n info = 0 -!!$ n = min(x%get_nrows(), y%get_nrows()) -!!$ select type(xx => x) -!!$ type is (psb_d_base_vect_type) -!!$ if (y%is_dev()) call y%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ y%v(i) = y%v(i) * xx%v(i) -!!$ end do -!!$ call y%set_host() -!!$ class default -!!$ if (xx%is_dev()) call xx%sync() -!!$ if (y%is_dev()) call y%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ y%v(i) = y%v(i) * xx%v(i) -!!$ end do -!!$ call y%set_host() -!!$ end select + n = min(x%get_nrows(), y%get_nrows()) + select type(xx => x) + class is (psb_d_vect_oacc) + if (y%is_host()) call y%sync() + if (xx%is_host()) call xx%sync() + !$acc parallel loop + do i = 1, n + y%v(i) = y%v(i) * xx%v(i) + end do + call y%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (y%is_dev()) call y%sync() + do i = 1, n + y%v(i) = y%v(i) * xx%v(i) + end do + call y%set_host() + end select end subroutine d_oacc_mlt_v diff --git a/openacc/impl/psb_d_oacc_mlt_v_2.f90 b/openacc/impl/psb_d_oacc_mlt_v_2.f90 index b59b4f56..7e46495f 100644 --- a/openacc/impl/psb_d_oacc_mlt_v_2.f90 +++ b/openacc/impl/psb_d_oacc_mlt_v_2.f90 @@ -19,37 +19,35 @@ subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) n = min(x%get_nrows(), y%get_nrows(), z%get_nrows()) info = 0 -!!$ select type(xx => x) -!!$ class is (psb_d_vect_oacc) -!!$ select type (yy => y) -!!$ class is (psb_d_vect_oacc) -!!$ if (xx%is_host()) call xx%sync_space() -!!$ if (yy%is_host()) call yy%sync_space() -!!$ if ((beta /= dzero) .and. (z%is_host())) call z%sync_space() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) -!!$ end do -!!$ call z%set_dev() -!!$ class default -!!$ if (xx%is_dev()) call xx%sync_space() -!!$ if (yy%is_dev()) call yy%sync() -!!$ if ((beta /= dzero) .and. (z%is_dev())) call z%sync_space() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) -!!$ end do -!!$ call z%set_host() -!!$ end select -!!$ class default -!!$ if (x%is_dev()) call x%sync() -!!$ if (y%is_dev()) call y%sync() -!!$ if ((beta /= dzero) .and. (z%is_dev())) call z%sync_space() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i) -!!$ end do -!!$ call z%set_host() -!!$ end select + select type(xx => x) + class is (psb_d_vect_oacc) + select type (yy => y) + class is (psb_d_vect_oacc) + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + if ((beta /= dzero) .and. (z%is_host())) call z%sync() + !$acc parallel loop + do i = 1, n + z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) + end do + call z%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (yy%is_dev()) call yy%sync() + if ((beta /= dzero) .and. (z%is_dev())) call z%sync() + do i = 1, n + z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) + end do + call z%set_host() + end select + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + if ((beta /= dzero) .and. (z%is_dev())) call z%sync() + do i = 1, n + z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i) + end do + call z%set_host() + end select end subroutine d_oacc_mlt_v_2 From c7bbfb8b680b8bfc25e3448e03de4d2ab57e8d0a Mon Sep 17 00:00:00 2001 From: tloloum Date: Wed, 10 Jul 2024 10:06:27 +0200 Subject: [PATCH 08/39] new Makefile, test compile well + data vect --- test/openacc/Makefile | 20 ++--- test/openacc/datavect.F90 | 84 ++++++++++++++++++ test/openacc/vectoacc.F90 | 180 ++++++++++++++++++++------------------ 3 files changed, 187 insertions(+), 97 deletions(-) create mode 100644 test/openacc/datavect.F90 diff --git a/test/openacc/Makefile b/test/openacc/Makefile index 6df14d42..65bed1ad 100644 --- a/test/openacc/Makefile +++ b/test/openacc/Makefile @@ -1,7 +1,6 @@ TOPDIR=../.. include $(TOPDIR)/Make.inc -# Directories LIBDIR=$(TOPDIR)/lib/ PSBLIBDIR=$(TOPDIR)/lib/ PSBINCDIR=$(TOPDIR)/include @@ -10,36 +9,30 @@ INCDIR=$(TOPDIR)/include MODDIR=$(TOPDIR)/modules EXEDIR=./runs -# Libraries -PSBLAS_LIB= -L$(LIBDIR) -L$(PSBLIBDIR) -lpsb_util -lpsb_ext -lpsb_base -lpsb_openacc -lopenblas -lmetis +PSBLAS_LIB= -L$(LIBDIR) -L$(PSBLIBDIR) -lpsb_openacc -lpsb_base -lpsb_ext -lpsb_util -lopenblas -lmetis LDLIBS=$(PSBGPULDLIBS) -# Includes FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FMFLAG). $(FMFLAG)$(PSBMODDIR) $(FMFLAG)$(PSBINCDIR) $(LIBRSB_DEFINES) -# Compiler flags FFLAGS=-O0 -march=native -fopenacc -foffload=nvptx-none="-march=sm_70" CFLAGS=-O0 -march=native -# Source files -SRCS=test.F90 vectoacc.F90 +SRCS=test.F90 vectoacc.F90 datavect.F90 CSRC=timers.c -# Object files OBJS=$(SRCS:.F90=.o) $(CSRC:.c=.o) -# Default rule -all: dir +all: dir $(OBJS) + $(FC) $(FFLAGS) $(OBJS) -o datavect $(FINCLUDES) $(PSBLAS_LIB) $(LDLIBS) + /bin/mv datavect $(EXEDIR) dir: @if test ! -d $(EXEDIR); then mkdir $(EXEDIR); fi -# Pattern rule for creating executables %: %.o timers.o $(FC) $(FFLAGS) $^ -o $@ $(FINCLUDES) $(PSBLAS_LIB) $(LDLIBS) /bin/mv $@ $(EXEDIR) -# Compilation rules %.o: %.F90 $(FC) $(FFLAGS) $(FINCLUDES) -c $< -o $@ @@ -47,7 +40,6 @@ dir: $(CC) $(CFLAGS) $(FINCLUDES) -c $< -o $@ clean: - /bin/rm -fr *.o *.mod $(EXEDIR)/* + /bin/rm -fr *.o *.mod $(EXEDIR)/* -# Phony targets .PHONY: all dir clean diff --git a/test/openacc/datavect.F90 b/test/openacc/datavect.F90 new file mode 100644 index 00000000..b78a65d9 --- /dev/null +++ b/test/openacc/datavect.F90 @@ -0,0 +1,84 @@ +program datavect + use psb_base_mod + use psb_oacc_mod + implicit none + + type(psb_d_vect_oacc) :: v3, v4, v5 + integer(psb_ipk_) :: info, n, i, old_percentage, percentage + real(psb_dpk_) :: alpha, dot_dev, dot_host, t_alloc_host, t_alloc_dev, t_calc_host, t_calc_dev + double precision, external :: etime + double precision :: time_start, time_end + integer, parameter :: min_size = 1000, max_size = 100000000, step_size = 1000000 + integer, parameter :: ntests = 80, ngpu = 20 + integer :: size + character(len=20) :: filename + + open(unit=10, file='performance_data.csv', status='unknown') + write(10, '(A, A, A, A, A)') 'Size,Alloc_Host,Alloc_Dev,Calc_Host,Calc_Dev' + + write(*, *) 'Test of the vector operations with OpenACC' + + alpha = 2.0 + old_percentage = 0 + + do size = min_size, max_size, step_size + n = size + percentage = int(real(size - min_size) / real(max_size - min_size) * 100.0) + if (percentage /= old_percentage) then + write(*, '(A,I3,A)', advance='no') 'Progress: ', percentage, '%' + write(*,'(A)', advance='no') char(13) + old_percentage = percentage + end if + + time_start = etime() + call v3%all(n, info) + call v4%all(n, info) + call v5%all(n, info) + time_end = etime() + t_alloc_host = (time_end - time_start) + + do i = 1, n + v3%v(i) = real(i, psb_dpk_) + v4%v(i) = real(n - i, psb_dpk_) + end do + + call v3%scal(alpha) + + call v3%set_host() + call v4%set_host() + + time_start = etime() + do i = 1, ntests + dot_host = sum(v3%v * v4%v) + end do + time_end = etime() + t_calc_host = (time_end - time_start) / real(ntests) + + time_start = etime() + call v3%set_dev() + call v4%set_dev() + call v3%sync_space() + call v4%sync_space() + time_end = etime() + t_alloc_dev = (time_end - time_start) + + time_start = etime() + do i = 1, ntests + dot_dev = v3%dot_v(n, v4) + end do + !$acc wait + time_end = etime() + t_calc_dev = (time_end - time_start) / real(ntests) + + write(10, '(I10, 1X, ES12.5, 1X, ES12.5, 1X, ES12.5, 1X, ES12.5)') size, t_alloc_host, t_alloc_dev, t_calc_host, t_calc_dev + + call v3%free(info) + call v4%free(info) + call v5%free(info) + end do + + close(10) + write(*, *) 'Performance data written to performance_data.csv' + + +end program datavect diff --git a/test/openacc/vectoacc.F90 b/test/openacc/vectoacc.F90 index b50fa317..639a2a67 100644 --- a/test/openacc/vectoacc.F90 +++ b/test/openacc/vectoacc.F90 @@ -1,85 +1,99 @@ program vectoacc - use psb_base_mod - use psb_oacc_mod - implicit none - - type(psb_d_vect_oacc) :: v3, v4, v5 - integer(psb_ipk_) :: info, n, i - real(psb_dpk_) :: alpha, beta, result - double precision, external :: etime - - real(psb_dpk_) :: dot_host, dot_dev, t_host, t_dev - double precision :: time_start, time_end - integer(psb_ipk_), parameter :: ntests=80, ngpu=20 - - write(*, *) 'Test of the vector operations with OpenACC' - - write(*, *) 'Enter the size of the vectors' - read(*, *) n - alpha = 2.0 - beta = 0.5 - - call v3%all(n, info) - call v4%all(n, info) - call v5%all(n, info) - - do i = 1, n - v3%v(i) = real(i, psb_dpk_) - v4%v(i) = real(n - i, psb_dpk_) - end do - - call v3%set_dev() - call v4%set_dev() - - call v3%scal(alpha) - call v3%sync() - - do i = 1, n - if (v3%v(i) /= alpha * real(i, psb_dpk_)) then - write(*, *) 'Scal error : index', i - end if - end do - write(*, *) 'Scal test passed' - - result = v3%dot_v(n, v4) - call v3%sync() - call v4%sync() - if (result /= sum(v3%v * v4%v)) then - write(*, *) 'Dot_v error, expected result:', sum(v3%v * v4%v), 'instead of :', result - end if - write(*, *) 'Dot_v test passed' - - result = v3%nrm2(n) - call v3%sync() - if (result /= sqrt(sum(v3%v ** 2))) then - write(*, *) 'nrm2 error, expected result:', sqrt(sum(v3%v ** 2)), 'instead of :', result - end if - write(*, *) 'nrm2 test passed' - - call v3%set_host() - call v4%set_host() - - time_start = etime() - do i = 1, ntests - dot_host = sum(v3%v * v4%v) - end do - time_end = etime() - t_host = (time_end - time_start) / real(ntests) - write(*, *) 'Performance host: ', t_host, ' sec' - - call v3%set_dev() - call v4%set_dev() - time_start = etime() - do i = 1, ntests - dot_dev = v3%dot_v(n, v4) - end do - !$acc wait - time_end = etime() - t_dev = (time_end - time_start) / real(ntests) - write(*, *) 'Performance device: ', t_dev, ' sec' - - call v3%free(info) - call v4%free(info) - call v5%free(info) - + use psb_base_mod + use psb_oacc_mod + implicit none + + type(psb_d_vect_oacc) :: v3, v4, v5 + integer(psb_ipk_) :: info, n, i + real(psb_dpk_) :: alpha, beta, result + double precision, external :: etime + + real(psb_dpk_) :: dot_host, dot_dev, t_host, t_dev, t_alloc_host, t_alloc_dev, t_calc_host, t_calc_dev + double precision :: time_start, time_end + integer(psb_ipk_), parameter :: ntests=80, ngpu=20 + + write(*, *) 'Test of the vector operations with OpenACC' + + write(*, *) 'Enter the size of the vectors' + read(*, *) n + alpha = 2.0 + beta = 0.5 + + time_start = etime() + call v3%all(n, info) + call v4%all(n, info) + call v5%all(n, info) + time_end = etime() + t_alloc_host = time_end - time_start + write(*, *) 'Allocation time on host: ', t_alloc_host, ' sec' + + do i = 1, n + v3%v(i) = real(i, psb_dpk_) + v4%v(i) = real(n - i, psb_dpk_) + end do + + call v3%set_dev() + call v4%set_dev() + + call v3%scal(alpha) + call v3%sync() + + do i = 1, n + if (v3%v(i) /= alpha * real(i, psb_dpk_)) then + write(*, *) 'Scal error : index', i + end if + end do + write(*, *) 'Scal test passed' + + result = v3%dot_v(n, v4) + call v3%sync() + call v4%sync() + if (result /= sum(v3%v * v4%v)) then + write(*, *) 'Dot_v error, expected result:', sum(v3%v * v4%v), 'instead of :', result + end if + write(*, *) 'Dot_v test passed' + + result = v3%nrm2(n) + call v3%sync() + if (result /= sqrt(sum(v3%v ** 2))) then + write(*, *) 'nrm2 error, expected result:', sqrt(sum(v3%v ** 2)), 'instead of :', result + end if + write(*, *) 'nrm2 test passed' + + call v3%set_host() + call v4%set_host() + + time_start = etime() + do i = 1, ntests + dot_host = sum(v3%v * v4%v) + end do + time_end = etime() + t_calc_host = (time_end - time_start) / real(ntests) + write(*, *) 'Host calculation time: ', t_calc_host, ' sec' + + call v3%set_dev() + call v4%set_dev() + + time_start = etime() + call v3%sync_space() + call v4%sync_space() + time_end = etime() + t_alloc_dev = time_end - time_start + write(*, *) 'Allocation time on device: ', t_alloc_dev, ' sec' + + time_start = etime() + do i = 1, ntests + dot_dev = v3%dot_v(n, v4) + end do + !$acc wait + time_end = etime() + t_calc_dev = (time_end - time_start) / real(ntests) + write(*, *) 'Device calculation time: ', t_calc_dev, ' sec' + + + call v3%free(info) + call v4%free(info) + call v5%free(info) + end program vectoacc + \ No newline at end of file From 2b5f09ddf9f746267c2e88530bbb708b89a532dd Mon Sep 17 00:00:00 2001 From: tloloum Date: Fri, 12 Jul 2024 09:26:06 +0200 Subject: [PATCH 09/39] all methods implementations for psb_d_oacc_csr_sparse_mat --- openacc/Makefile | 10 +- openacc/impl/Makefile | 8 +- openacc/impl/psb_d_oacc_csr_allocate_mnnz.F90 | 34 +++ openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 | 25 +++ openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 | 23 +++ openacc/impl/psb_d_oacc_csr_csmm.F90 | 86 ++++++++ openacc/impl/psb_d_oacc_csr_csmv.F90 | 81 ++++++++ openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90 | 84 ++++++++ openacc/impl/psb_d_oacc_csr_mold.F90 | 33 +++ openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 | 24 +++ openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 | 23 +++ openacc/impl/psb_d_oacc_csr_reallocate_nz.F90 | 27 +++ openacc/impl/psb_d_oacc_csr_scal.F90 | 52 +++++ openacc/impl/psb_d_oacc_csr_scals.F90 | 32 +++ openacc/psb_d_oacc_csr_mat_mod.F90 | 194 +++++++++++++++++- 15 files changed, 719 insertions(+), 17 deletions(-) create mode 100644 openacc/impl/psb_d_oacc_csr_allocate_mnnz.F90 create mode 100644 openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 create mode 100644 openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 create mode 100644 openacc/impl/psb_d_oacc_csr_csmm.F90 create mode 100644 openacc/impl/psb_d_oacc_csr_csmv.F90 create mode 100644 openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90 create mode 100644 openacc/impl/psb_d_oacc_csr_mold.F90 create mode 100644 openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 create mode 100644 openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 create mode 100644 openacc/impl/psb_d_oacc_csr_reallocate_nz.F90 create mode 100644 openacc/impl/psb_d_oacc_csr_scal.F90 create mode 100644 openacc/impl/psb_d_oacc_csr_scals.F90 diff --git a/openacc/Makefile b/openacc/Makefile index aa6ea23d..d1311fe2 100644 --- a/openacc/Makefile +++ b/openacc/Makefile @@ -1,34 +1,32 @@ include ../Make.inc -# Compilers and flags + #CC=mpicc #FC=mpif90 #FCOPT=-O0 -march=native #OFFLOAD=-fopenacc -foffload=nvptx-none="-march=sm_70" -# Directories LIBDIR=../lib INCDIR=../include MODDIR=../modules IMPLDIR=./impl -# Include and library paths INCLUDES=-I$(LIBDIR) -I$(INCDIR) -I$(MODDIR) FINCLUDES=$(FMFLAG). $(FMFLAG)$(INCDIR) $(FMFLAG)$(MODDIR) $(FIFLAG). CINCLUDES= #LIBS=-L$(LIBDIR) -lpsb_util -lpsb_ext -lpsb_base -lopenblas -lmetis -# Source files + FOBJS= psb_i_oacc_vect_mod.o psb_d_oacc_vect_mod.o \ psb_oacc_mod.o psb_d_oacc_csr_mat_mod.o \ psb_oacc_env_mod.o -# Library name + LIBNAME=libpsb_openacc.a OBJS=$(COBJS) $(FOBJS) -# Rules + lib: objs ilib ar cur $(LIBNAME) $(OBJS) /bin/cp -p $(LIBNAME) $(LIBDIR) diff --git a/openacc/impl/Makefile b/openacc/impl/Makefile index 7bab6654..ff0e8251 100755 --- a/openacc/impl/Makefile +++ b/openacc/impl/Makefile @@ -9,7 +9,13 @@ MODDIR=../../modules FINCLUDES=$(FMFLAG).. $(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FIFLAG).. LIBNAME=libpsb_openacc.a -OBJS= psb_d_oacc_csr_vect_mv.o psb_d_oacc_mlt_v_2.o psb_d_oacc_mlt_v.o +OBJS= psb_d_oacc_csr_vect_mv.o psb_d_oacc_csr_inner_vect_sv.o \ + psb_d_oacc_csr_csmm.o psb_d_oacc_csr_csmv.o psb_d_oacc_csr_scals.o \ + psb_d_oacc_csr_scal.o psb_d_oacc_csr_allocate_mnnz.o \ + psb_d_oacc_csr_reallocate_nz.o psb_d_oacc_csr_cp_from_coo.o \ + psb_d_oacc_csr_cp_from_fmt.o psb_d_oacc_csr_mv_from_coo \ + psb_d_oacc_csr_mv_from_fmt psb_d_oacc_csr_mold.o \ + psb_d_oacc_mlt_v_2.o psb_d_oacc_mlt_v.o objs: $(OBJS) diff --git a/openacc/impl/psb_d_oacc_csr_allocate_mnnz.F90 b/openacc/impl/psb_d_oacc_csr_allocate_mnnz.F90 new file mode 100644 index 00000000..a0b8abe0 --- /dev/null +++ b/openacc/impl/psb_d_oacc_csr_allocate_mnnz.F90 @@ -0,0 +1,34 @@ +subroutine psb_d_oacc_csr_allocate_mnnz(m, n, a, nz) + use psb_base_mod + use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m, n + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_d_csr_sparse_mat%allocate(m, n, nz) + + if (.not.allocated(a%val)) then + allocate(a%val(nz)) + allocate(a%ja(nz)) + allocate(a%irp(m+1)) + end if + + call a%set_dev() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + + 9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_oacc_csr_allocate_mnnz + \ No newline at end of file diff --git a/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 b/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 new file mode 100644 index 00000000..c84c5876 --- /dev/null +++ b/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 @@ -0,0 +1,25 @@ +subroutine psb_d_oacc_csr_cp_from_coo(a, b, info) + use psb_base_mod + use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_cp_from_coo + implicit none + + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_d_csr_sparse_mat%cp_from_coo(b, info) + if (info /= 0) goto 9999 + + call a%set_dev() + if (info /= 0) goto 9999 + + return + + 9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_d_oacc_csr_cp_from_coo + \ No newline at end of file diff --git a/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 b/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 new file mode 100644 index 00000000..2eae41d7 --- /dev/null +++ b/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 @@ -0,0 +1,23 @@ +subroutine psb_d_oacc_csr_cp_from_fmt(a, b, info) + use psb_base_mod + use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_cp_from_fmt + implicit none + + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_d_coo_sparse_mat) + call a%cp_from_coo(b, info) + class default + call a%psb_d_csr_sparse_mat%cp_from_fmt(b, info) + if (info /= 0) return + + !$acc update device(a%val, a%ja, a%irp) + end select + +end subroutine psb_d_oacc_csr_cp_from_fmt + \ No newline at end of file diff --git a/openacc/impl/psb_d_oacc_csr_csmm.F90 b/openacc/impl/psb_d_oacc_csr_csmm.F90 new file mode 100644 index 00000000..e7224764 --- /dev/null +++ b/openacc/impl/psb_d_oacc_csr_csmm.F90 @@ -0,0 +1,86 @@ +subroutine psb_d_oacc_csr_csmm(alpha, a, x, beta, y, info, trans) + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_csmm + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + real(psb_dpk_), intent(in) :: x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i, j, m, n,k, nxy + logical :: tra + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'd_oacc_csmm' + logical, parameter :: debug = .false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) < n) then + info = 36 + call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) + goto 9999 + end if + + if (size(y,1) < m) then + info = 36 + call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) + goto 9999 + end if + + if (tra) then + call a%psb_d_csr_sparse_mat%spmm(alpha, x, beta, y, info, trans) + else + nxy = min(size(x,2), size(y,2)) + + !$acc parallel loop collapse(2) present(a, x, y) + do j = 1, nxy + do i = 1, m + y(i,j) = beta * y(i,j) + end do + end do + + !$acc parallel loop collapse(2) present(a, x, y) + do j = 1, nxy + do i = 1, n + do k = a%irp(i), a%irp(i+1) - 1 + y(a%ja(k), j) = y(a%ja(k), j) + alpha * a%val(k) * x(i, j) + end do + end do + end do + endif + + call psb_erractionrestore(err_act) + return + + 9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_csr_csmm + \ No newline at end of file diff --git a/openacc/impl/psb_d_oacc_csr_csmv.F90 b/openacc/impl/psb_d_oacc_csr_csmv.F90 new file mode 100644 index 00000000..c3291acf --- /dev/null +++ b/openacc/impl/psb_d_oacc_csr_csmv.F90 @@ -0,0 +1,81 @@ +subroutine psb_d_oacc_csr_csmv(alpha, a, x, beta, y, info, trans) + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_csmv + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + real(psb_dpk_), intent(in) :: x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i, j, m, n + logical :: tra + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'd_oacc_csmv' + logical, parameter :: debug = .false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) < n) then + info = 36 + call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) + goto 9999 + end if + + if (size(y,1) < m) then + info = 36 + call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) + goto 9999 + end if + + if (tra) then + call a%psb_d_csr_sparse_mat%spmm(alpha, x, beta, y, info, trans) + else + !$acc parallel loop present(a, x, y) + do i = 1, m + y(i) = beta * y(i) + end do + + !$acc parallel loop present(a, x, y) + do i = 1, n + do j = a%irp(i), a%irp(i+1) - 1 + y(a%ja(j)) = y(a%ja(j)) + alpha * a%val(j) * x(i) + end do + end do + endif + + call psb_erractionrestore(err_act) + return + + 9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_csr_csmv + \ No newline at end of file diff --git a/openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90 b/openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90 new file mode 100644 index 00000000..c0461c82 --- /dev/null +++ b/openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90 @@ -0,0 +1,84 @@ +subroutine psb_d_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans) + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_inner_vect_sv + use psb_d_oacc_vect_mod + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + real(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'd_oacc_csr_inner_vect_sv' + logical, parameter :: debug = .false. + integer(psb_ipk_) :: i + + call psb_get_erraction(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra .or. (beta /= dzero)) then + call x%sync() + call y%sync() + call a%psb_d_csr_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans) + call y%set_host() + else + select type (xx => x) + type is (psb_d_vect_oacc) + select type(yy => y) + type is (psb_d_vect_oacc) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + !$acc parallel loop present(a, xx, yy) + do i = 1, size(a%val) + yy%v(i) = alpha * a%val(i) * xx%v(a%ja(i)) + beta * yy%v(i) + end do + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_d_csr_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_d_csr_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + endif + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info, name, a_err = 'csrg_vect_sv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + + 9999 call psb_error_handler(err_act) + return + end subroutine psb_d_oacc_csr_inner_vect_sv + \ No newline at end of file diff --git a/openacc/impl/psb_d_oacc_csr_mold.F90 b/openacc/impl/psb_d_oacc_csr_mold.F90 new file mode 100644 index 00000000..08598cb6 --- /dev/null +++ b/openacc/impl/psb_d_oacc_csr_mold.F90 @@ -0,0 +1,33 @@ +subroutine psb_d_oacc_csr_mold(a, b, info) + use psb_base_mod + use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_mold + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='csr_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b, stat=info) + end if + if (info == 0) allocate(psb_d_oacc_csr_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + + 9999 call psb_error_handler(err_act) + + return + + end subroutine psb_d_oacc_csr_mold + \ No newline at end of file diff --git a/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 b/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 new file mode 100644 index 00000000..5f4a6b41 --- /dev/null +++ b/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 @@ -0,0 +1,24 @@ +subroutine psb_d_oacc_csr_mv_from_coo(a, b, info) + use psb_base_mod + use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_mv_from_coo + implicit none + + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_d_csr_sparse_mat%mv_from_coo(b, info) + if (info /= 0) goto 9999 + + !$acc update device(a%val, a%ja, a%irp) + + return + + 9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_oacc_csr_mv_from_coo + \ No newline at end of file diff --git a/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 b/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 new file mode 100644 index 00000000..16a4636b --- /dev/null +++ b/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 @@ -0,0 +1,23 @@ +subroutine psb_d_oacc_csr_mv_from_fmt(a, b, info) + use psb_base_mod + use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_mv_from_fmt + implicit none + + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_d_coo_sparse_mat) + call a%mv_from_coo(b, info) + class default + call a%psb_d_csr_sparse_mat%mv_from_fmt(b, info) + if (info /= 0) return + + !$acc update device(a%val, a%ja, a%irp) + end select + +end subroutine psb_d_oacc_csr_mv_from_fmt + \ No newline at end of file diff --git a/openacc/impl/psb_d_oacc_csr_reallocate_nz.F90 b/openacc/impl/psb_d_oacc_csr_reallocate_nz.F90 new file mode 100644 index 00000000..7a6723d0 --- /dev/null +++ b/openacc/impl/psb_d_oacc_csr_reallocate_nz.F90 @@ -0,0 +1,27 @@ +subroutine psb_d_oacc_csr_reallocate_nz(nz, a) + use psb_base_mod + use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_oacc_csr_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_d_csr_sparse_mat%reallocate(nz) + + call a%set_dev() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + + 9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_csr_reallocate_nz + \ No newline at end of file diff --git a/openacc/impl/psb_d_oacc_csr_scal.F90 b/openacc/impl/psb_d_oacc_csr_scal.F90 new file mode 100644 index 00000000..b2bf12e8 --- /dev/null +++ b/openacc/impl/psb_d_oacc_csr_scal.F90 @@ -0,0 +1,52 @@ +subroutine psb_d_oacc_csr_scal(d, a, info, side) + use psb_base_mod + use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_scal + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i, j + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + if (present(side)) then + if (side == 'L') then + !$acc parallel loop present(a, d) + do i = 1, a%get_nrows() + do j = a%irp(i), a%irp(i+1) - 1 + a%val(j) = a%val(j) * d(i) + end do + end do + else if (side == 'R') then + !$acc parallel loop present(a, d) + do i = 1, a%get_ncols() + do j = a%irp(i), a%irp(i+1) - 1 + a%val(j) = a%val(j) * d(a%ja(j)) + end do + end do + end if + else + !$acc parallel loop present(a, d) + do i = 1, size(a%val) + a%val(i) = a%val(i) * d(i) + end do + end if + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + + 9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_csr_scal + \ No newline at end of file diff --git a/openacc/impl/psb_d_oacc_csr_scals.F90 b/openacc/impl/psb_d_oacc_csr_scals.F90 new file mode 100644 index 00000000..8c77f647 --- /dev/null +++ b/openacc/impl/psb_d_oacc_csr_scals.F90 @@ -0,0 +1,32 @@ +subroutine psb_d_oacc_csr_scals(d, a, info) + use psb_base_mod + use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_scals + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + !$acc parallel loop present(a) + do i = 1, size(a%val) + a%val(i) = a%val(i) * d + end do + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_oacc_csr_scals diff --git a/openacc/psb_d_oacc_csr_mat_mod.F90 b/openacc/psb_d_oacc_csr_mat_mod.F90 index d9c8a2a6..b8eb1be9 100644 --- a/openacc/psb_d_oacc_csr_mat_mod.F90 +++ b/openacc/psb_d_oacc_csr_mat_mod.F90 @@ -12,18 +12,69 @@ module psb_d_oacc_csr_mat_mod type, extends(psb_d_csr_sparse_mat) :: psb_d_oacc_csr_sparse_mat integer(psb_ipk_) :: devstate = is_host contains - procedure, pass(a) :: all => d_oacc_csr_all - procedure, pass(a) :: is_host => d_oacc_csr_is_host - procedure, pass(a) :: is_sync => d_oacc_csr_is_sync - procedure, pass(a) :: is_dev => d_oacc_csr_is_dev - procedure, pass(a) :: set_host => d_oacc_csr_set_host - procedure, pass(a) :: set_sync => d_oacc_csr_set_sync - procedure, pass(a) :: set_dev => d_oacc_csr_set_dev - procedure, pass(a) :: sync_space => d_oacc_csr_sync_space - procedure, pass(a) :: sync => d_oacc_csr_sync - procedure, pass(a) :: vect_mv => psb_d_oacc_csr_vect_mv + procedure, nopass :: get_fmt => d_oacc_csr_get_fmt + procedure, pass(a) :: sizeof => d_oacc_csr_sizeof + procedure, pass(a) :: vect_mv => psb_d_oacc_csr_vect_mv + procedure, pass(a) :: in_vect_sv => psb_d_oacc_csr_inner_vect_sv + procedure, pass(a) :: csmm => psb_d_oacc_csr_csmm + procedure, pass(a) :: csmv => psb_d_oacc_csr_csmv + procedure, pass(a) :: scals => psb_d_oacc_csr_scals + procedure, pass(a) :: scalv => psb_d_oacc_csr_scal + procedure, pass(a) :: reallocate_nz => psb_d_oacc_csr_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_oacc_csr_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_d_oacc_csr_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_oacc_csr_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_oacc_csr_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_oacc_csr_mv_from_fmt + procedure, pass(a) :: free => d_oacc_csr_free + procedure, pass(a) :: mold => psb_d_oacc_csr_mold + procedure, pass(a) :: all => d_oacc_csr_all + procedure, pass(a) :: is_host => d_oacc_csr_is_host + procedure, pass(a) :: is_sync => d_oacc_csr_is_sync + procedure, pass(a) :: is_dev => d_oacc_csr_is_dev + procedure, pass(a) :: set_host => d_oacc_csr_set_host + procedure, pass(a) :: set_sync => d_oacc_csr_set_sync + procedure, pass(a) :: set_dev => d_oacc_csr_set_dev + procedure, pass(a) :: sync_space => d_oacc_csr_sync_space + procedure, pass(a) :: sync => d_oacc_csr_sync end type psb_d_oacc_csr_sparse_mat + interface + subroutine psb_d_oacc_csr_mold(a,b,info) + import :: psb_d_oacc_csr_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_csr_mold + end interface + + interface + subroutine psb_d_oacc_csr_cp_from_fmt(a,b,info) + import :: psb_d_oacc_csr_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_csr_cp_from_fmt + end interface + + interface + subroutine psb_d_oacc_csr_mv_from_coo(a,b,info) + import :: psb_d_oacc_csr_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_csr_mv_from_coo + end interface + + interface + subroutine psb_d_oacc_csr_mv_from_fmt(a,b,info) + import :: psb_d_oacc_csr_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_csr_mv_from_fmt + end interface + interface subroutine psb_d_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans) import :: psb_d_oacc_csr_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ @@ -35,7 +86,130 @@ module psb_d_oacc_csr_mat_mod end subroutine psb_d_oacc_csr_vect_mv end interface + interface + subroutine psb_d_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans) + import :: psb_d_oacc_csr_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x,y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_csr_inner_vect_sv + end interface + + interface + subroutine psb_d_oacc_csr_csmm(alpha, a, x, beta, y, info, trans) + import :: psb_d_oacc_csr_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_csr_csmm + end interface + + interface + subroutine psb_d_oacc_csr_csmv(alpha, a, x, beta, y, info, trans) + import :: psb_d_oacc_csr_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_csr_csmv + end interface + + interface + subroutine psb_d_oacc_csr_scals(d, a, info) + import :: psb_d_oacc_csr_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_csr_scals + end interface + + interface + subroutine psb_d_oacc_csr_scal(d,a,info,side) + import :: psb_d_oacc_csr_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: side + end subroutine psb_d_oacc_csr_scal + end interface + + interface + subroutine psb_d_oacc_csr_reallocate_nz(nz,a) + import :: psb_d_oacc_csr_sparse_mat, psb_ipk_ + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + end subroutine psb_d_oacc_csr_reallocate_nz + end interface + + interface + subroutine psb_d_oacc_csr_allocate_mnnz(m,n,a,nz) + import :: psb_d_oacc_csr_sparse_mat, psb_ipk_ + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: m,n + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_oacc_csr_allocate_mnnz + end interface + + interface + subroutine psb_d_oacc_csr_cp_from_coo(a,b,info) + import :: psb_d_oacc_csr_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_csr_cp_from_coo + end interface + contains + + + subroutine d_oacc_csr_free(a) + use psb_base_mod + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + if (allocated(a%val)) then + !$acc exit data delete(a%val) + end if + if (allocated(a%ja)) then + !$acc exit data delete(a%ja) + end if + if (allocated(a%irp)) then + !$acc exit data delete(a%irp) + end if + + call a%psb_d_csr_sparse_mat%free() + + return + end subroutine d_oacc_csr_free + + + + function d_oacc_csr_sizeof(a) result(res) + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + + end function d_oacc_csr_sizeof + + + function d_oacc_csr_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSR_oacc' + end function d_oacc_csr_get_fmt subroutine d_oacc_csr_all(m, n, nz, a, info) implicit none From 9e8294066d885c3fd7fa5e846b476b29c7e25418 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Fri, 12 Jul 2024 13:01:22 +0200 Subject: [PATCH 10/39] Fix Makefile --- openacc/impl/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/openacc/impl/Makefile b/openacc/impl/Makefile index ff0e8251..64879eb6 100755 --- a/openacc/impl/Makefile +++ b/openacc/impl/Makefile @@ -13,8 +13,8 @@ OBJS= psb_d_oacc_csr_vect_mv.o psb_d_oacc_csr_inner_vect_sv.o \ psb_d_oacc_csr_csmm.o psb_d_oacc_csr_csmv.o psb_d_oacc_csr_scals.o \ psb_d_oacc_csr_scal.o psb_d_oacc_csr_allocate_mnnz.o \ psb_d_oacc_csr_reallocate_nz.o psb_d_oacc_csr_cp_from_coo.o \ - psb_d_oacc_csr_cp_from_fmt.o psb_d_oacc_csr_mv_from_coo \ - psb_d_oacc_csr_mv_from_fmt psb_d_oacc_csr_mold.o \ + psb_d_oacc_csr_cp_from_fmt.o psb_d_oacc_csr_mv_from_coo.o \ + psb_d_oacc_csr_mv_from_fmt.o psb_d_oacc_csr_mold.o \ psb_d_oacc_mlt_v_2.o psb_d_oacc_mlt_v.o objs: $(OBJS) From 0707cc0a72fcb6d3282f8d3e353e21887eb46494 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Fri, 12 Jul 2024 13:01:32 +0200 Subject: [PATCH 11/39] Take out reference to elldev_mod --- openacc/impl/psb_d_oacc_csr_csmm.F90 | 6 +++--- openacc/impl/psb_d_oacc_csr_csmv.F90 | 6 +++--- openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90 | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/openacc/impl/psb_d_oacc_csr_csmm.F90 b/openacc/impl/psb_d_oacc_csr_csmm.F90 index e7224764..4b404ad1 100644 --- a/openacc/impl/psb_d_oacc_csr_csmm.F90 +++ b/openacc/impl/psb_d_oacc_csr_csmm.F90 @@ -1,7 +1,7 @@ subroutine psb_d_oacc_csr_csmm(alpha, a, x, beta, y, info, trans) use psb_base_mod - use elldev_mod - use psb_vectordev_mod + !use elldev_mod + !use psb_vectordev_mod use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_csmm implicit none class(psb_d_oacc_csr_sparse_mat), intent(in) :: a @@ -83,4 +83,4 @@ subroutine psb_d_oacc_csr_csmm(alpha, a, x, beta, y, info, trans) return end subroutine psb_d_oacc_csr_csmm - \ No newline at end of file + diff --git a/openacc/impl/psb_d_oacc_csr_csmv.F90 b/openacc/impl/psb_d_oacc_csr_csmv.F90 index c3291acf..65acc8e4 100644 --- a/openacc/impl/psb_d_oacc_csr_csmv.F90 +++ b/openacc/impl/psb_d_oacc_csr_csmv.F90 @@ -1,7 +1,7 @@ subroutine psb_d_oacc_csr_csmv(alpha, a, x, beta, y, info, trans) use psb_base_mod - use elldev_mod - use psb_vectordev_mod + !use elldev_mod + !use psb_vectordev_mod use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_csmv implicit none class(psb_d_oacc_csr_sparse_mat), intent(in) :: a @@ -78,4 +78,4 @@ subroutine psb_d_oacc_csr_csmv(alpha, a, x, beta, y, info, trans) return end subroutine psb_d_oacc_csr_csmv - \ No newline at end of file + diff --git a/openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90 b/openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90 index c0461c82..a15a710e 100644 --- a/openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90 +++ b/openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90 @@ -1,7 +1,7 @@ subroutine psb_d_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans) use psb_base_mod - use elldev_mod - use psb_vectordev_mod + ! use elldev_mod + ! use psb_vectordev_mod use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_inner_vect_sv use psb_d_oacc_vect_mod implicit none @@ -81,4 +81,4 @@ subroutine psb_d_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans) 9999 call psb_error_handler(err_act) return end subroutine psb_d_oacc_csr_inner_vect_sv - \ No newline at end of file + From 7006665d823f9ca3efcbd929e933fe0cc956adc0 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Fri, 12 Jul 2024 16:34:48 +0200 Subject: [PATCH 12/39] Introduce submodules, adjust Makefile --- openacc/impl/Makefile | 12 + openacc/impl/psb_d_oacc_csr_allocate_mnnz.F90 | 27 +- openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 | 25 +- openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 | 21 +- openacc/impl/psb_d_oacc_csr_csmm.F90 | 40 +- openacc/impl/psb_d_oacc_csr_csmv.F90 | 40 +- openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90 | 33 +- openacc/impl/psb_d_oacc_csr_mold.F90 | 24 +- openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 | 27 +- openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 | 21 +- openacc/impl/psb_d_oacc_csr_reallocate_nz.F90 | 23 +- openacc/impl/psb_d_oacc_csr_scal.F90 | 27 +- openacc/impl/psb_d_oacc_csr_scals.F90 | 46 +- openacc/impl/psb_d_oacc_csr_vect_mv.F90 | 66 +- openacc/impl/psb_d_oacc_mlt_v.f90 | 57 +- openacc/impl/psb_d_oacc_mlt_v_2.f90 | 84 +-- openacc/psb_d_oacc_csr_mat_mod.F90 | 689 +++++++++--------- openacc/psb_d_oacc_vect_mod.F90 | 14 +- 18 files changed, 645 insertions(+), 631 deletions(-) diff --git a/openacc/impl/Makefile b/openacc/impl/Makefile index 64879eb6..56df9402 100755 --- a/openacc/impl/Makefile +++ b/openacc/impl/Makefile @@ -2,6 +2,7 @@ include ../../Make.inc LIBDIR=../../lib INCDIR=../../include MODDIR=../../modules +UP=.. # # Compilers and such # @@ -22,6 +23,17 @@ objs: $(OBJS) lib: objs ar cur ../$(LIBNAME) $(OBJS) + +psb_d_oacc_csr_vect_mv.o psb_d_oacc_csr_inner_vect_sv.o \ + psb_d_oacc_csr_csmm.o psb_d_oacc_csr_csmv.o psb_d_oacc_csr_scals.o \ + psb_d_oacc_csr_scal.o psb_d_oacc_csr_allocate_mnnz.o \ + psb_d_oacc_csr_reallocate_nz.o psb_d_oacc_csr_cp_from_coo.o \ + psb_d_oacc_csr_cp_from_fmt.o psb_d_oacc_csr_mv_from_coo.o \ + psb_d_oacc_csr_mv_from_fmt.o psb_d_oacc_csr_mold.o: $(UP)/psb_d_oacc_csr_mat_mod.o + +psb_d_oacc_mlt_v_2.o psb_d_oacc_mlt_v.o: $(UP)/psb_d_oacc_vect_mod.o + + clean: /bin/rm -f $(OBJS) diff --git a/openacc/impl/psb_d_oacc_csr_allocate_mnnz.F90 b/openacc/impl/psb_d_oacc_csr_allocate_mnnz.F90 index a0b8abe0..5281edee 100644 --- a/openacc/impl/psb_d_oacc_csr_allocate_mnnz.F90 +++ b/openacc/impl/psb_d_oacc_csr_allocate_mnnz.F90 @@ -1,6 +1,7 @@ -subroutine psb_d_oacc_csr_allocate_mnnz(m, n, a, nz) - use psb_base_mod - use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_allocate_mnnz +submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_allocate_mnnz_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_csr_allocate_mnnz(m, n, a, nz) implicit none integer(psb_ipk_), intent(in) :: m, n class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a @@ -9,26 +10,26 @@ subroutine psb_d_oacc_csr_allocate_mnnz(m, n, a, nz) integer(psb_ipk_) :: err_act, nz_ character(len=20) :: name='allocate_mnz' logical, parameter :: debug=.false. - + call psb_erractionsave(err_act) info = psb_success_ - + call a%psb_d_csr_sparse_mat%allocate(m, n, nz) - + if (.not.allocated(a%val)) then allocate(a%val(nz)) allocate(a%ja(nz)) allocate(a%irp(m+1)) end if - + call a%set_dev() if (info /= 0) goto 9999 - + call psb_erractionrestore(err_act) return - - 9999 call psb_error_handler(err_act) + +9999 call psb_error_handler(err_act) return - -end subroutine psb_d_oacc_csr_allocate_mnnz - \ No newline at end of file + + end subroutine psb_d_oacc_csr_allocate_mnnz +end submodule psb_d_oacc_csr_allocate_mnnz_impl diff --git a/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 b/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 index c84c5876..ec92f618 100644 --- a/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 +++ b/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 @@ -1,25 +1,26 @@ -subroutine psb_d_oacc_csr_cp_from_coo(a, b, info) - use psb_base_mod - use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_cp_from_coo +submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_cp_from_coo_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_csr_cp_from_coo(a, b, info) implicit none - + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - + info = psb_success_ - + call a%psb_d_csr_sparse_mat%cp_from_coo(b, info) if (info /= 0) goto 9999 - + call a%set_dev() if (info /= 0) goto 9999 - + return - - 9999 continue + +9999 continue info = psb_err_alloc_dealloc_ return - + end subroutine psb_d_oacc_csr_cp_from_coo - \ No newline at end of file +end submodule psb_d_oacc_csr_cp_from_coo_impl diff --git a/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 b/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 index 2eae41d7..37541ea9 100644 --- a/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 +++ b/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 @@ -1,23 +1,24 @@ -subroutine psb_d_oacc_csr_cp_from_fmt(a, b, info) - use psb_base_mod - use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_cp_from_fmt +submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_cp_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_csr_cp_from_fmt(a, b, info) implicit none - + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - + info = psb_success_ - + select type(b) type is (psb_d_coo_sparse_mat) call a%cp_from_coo(b, info) class default call a%psb_d_csr_sparse_mat%cp_from_fmt(b, info) if (info /= 0) return - + !$acc update device(a%val, a%ja, a%irp) end select - -end subroutine psb_d_oacc_csr_cp_from_fmt - \ No newline at end of file + + end subroutine psb_d_oacc_csr_cp_from_fmt +end submodule psb_d_oacc_csr_cp_from_fmt_impl diff --git a/openacc/impl/psb_d_oacc_csr_csmm.F90 b/openacc/impl/psb_d_oacc_csr_csmm.F90 index 4b404ad1..6ab87175 100644 --- a/openacc/impl/psb_d_oacc_csr_csmm.F90 +++ b/openacc/impl/psb_d_oacc_csr_csmm.F90 @@ -1,8 +1,7 @@ -subroutine psb_d_oacc_csr_csmm(alpha, a, x, beta, y, info, trans) - use psb_base_mod - !use elldev_mod - !use psb_vectordev_mod - use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_csmm +submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_csmm_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_csr_csmm(alpha, a, x, beta, y, info, trans) implicit none class(psb_d_oacc_csr_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta @@ -10,30 +9,30 @@ subroutine psb_d_oacc_csr_csmm(alpha, a, x, beta, y, info, trans) real(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - + character :: trans_ integer(psb_ipk_) :: i, j, m, n,k, nxy logical :: tra integer(psb_ipk_) :: err_act character(len=20) :: name = 'd_oacc_csmm' logical, parameter :: debug = .false. - + info = psb_success_ call psb_erractionsave(err_act) - + if (present(trans)) then trans_ = trans else trans_ = 'N' end if - + if (.not.a%is_asb()) then info = psb_err_invalid_mat_state_ call psb_errpush(info, name) goto 9999 endif tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') - + if (tra) then m = a%get_ncols() n = a%get_nrows() @@ -41,31 +40,31 @@ subroutine psb_d_oacc_csr_csmm(alpha, a, x, beta, y, info, trans) n = a%get_ncols() m = a%get_nrows() end if - + if (size(x,1) < n) then info = 36 call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) goto 9999 end if - + if (size(y,1) < m) then info = 36 call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) goto 9999 end if - + if (tra) then call a%psb_d_csr_sparse_mat%spmm(alpha, x, beta, y, info, trans) else nxy = min(size(x,2), size(y,2)) - + !$acc parallel loop collapse(2) present(a, x, y) do j = 1, nxy do i = 1, m y(i,j) = beta * y(i,j) end do end do - + !$acc parallel loop collapse(2) present(a, x, y) do j = 1, nxy do i = 1, n @@ -75,12 +74,13 @@ subroutine psb_d_oacc_csr_csmm(alpha, a, x, beta, y, info, trans) end do end do endif - + call psb_erractionrestore(err_act) return - - 9999 call psb_error_handler(err_act) + +9999 call psb_error_handler(err_act) return - + end subroutine psb_d_oacc_csr_csmm - +end submodule psb_d_oacc_csr_csmm_impl + diff --git a/openacc/impl/psb_d_oacc_csr_csmv.F90 b/openacc/impl/psb_d_oacc_csr_csmv.F90 index 65acc8e4..205a7a59 100644 --- a/openacc/impl/psb_d_oacc_csr_csmv.F90 +++ b/openacc/impl/psb_d_oacc_csr_csmv.F90 @@ -1,8 +1,7 @@ -subroutine psb_d_oacc_csr_csmv(alpha, a, x, beta, y, info, trans) - use psb_base_mod - !use elldev_mod - !use psb_vectordev_mod - use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_csmv +submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_csmv_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_csr_csmv(alpha, a, x, beta, y, info, trans) implicit none class(psb_d_oacc_csr_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta @@ -10,31 +9,31 @@ subroutine psb_d_oacc_csr_csmv(alpha, a, x, beta, y, info, trans) real(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - + character :: trans_ integer(psb_ipk_) :: i, j, m, n logical :: tra integer(psb_ipk_) :: err_act character(len=20) :: name = 'd_oacc_csmv' logical, parameter :: debug = .false. - + call psb_erractionsave(err_act) info = psb_success_ - + if (present(trans)) then trans_ = trans else trans_ = 'N' end if - + if (.not.a%is_asb()) then info = psb_err_invalid_mat_state_ call psb_errpush(info, name) goto 9999 endif - + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') - + if (tra) then m = a%get_ncols() n = a%get_nrows() @@ -42,19 +41,19 @@ subroutine psb_d_oacc_csr_csmv(alpha, a, x, beta, y, info, trans) n = a%get_ncols() m = a%get_nrows() end if - + if (size(x,1) < n) then info = 36 call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) goto 9999 end if - + if (size(y,1) < m) then info = 36 call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) goto 9999 end if - + if (tra) then call a%psb_d_csr_sparse_mat%spmm(alpha, x, beta, y, info, trans) else @@ -62,7 +61,7 @@ subroutine psb_d_oacc_csr_csmv(alpha, a, x, beta, y, info, trans) do i = 1, m y(i) = beta * y(i) end do - + !$acc parallel loop present(a, x, y) do i = 1, n do j = a%irp(i), a%irp(i+1) - 1 @@ -70,12 +69,13 @@ subroutine psb_d_oacc_csr_csmv(alpha, a, x, beta, y, info, trans) end do end do endif - + call psb_erractionrestore(err_act) return - - 9999 call psb_error_handler(err_act) + +9999 call psb_error_handler(err_act) return - + end subroutine psb_d_oacc_csr_csmv - +end submodule psb_d_oacc_csr_csmv_impl + diff --git a/openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90 b/openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90 index a15a710e..5f0ef4a0 100644 --- a/openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90 +++ b/openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90 @@ -1,16 +1,14 @@ -subroutine psb_d_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans) - use psb_base_mod - ! use elldev_mod - ! use psb_vectordev_mod - use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_inner_vect_sv - use psb_d_oacc_vect_mod +submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_inner_vect_sv_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans) implicit none class(psb_d_oacc_csr_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - + real(psb_dpk_), allocatable :: rx(:), ry(:) logical :: tra character :: trans_ @@ -18,24 +16,24 @@ subroutine psb_d_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans) character(len=20) :: name = 'd_oacc_csr_inner_vect_sv' logical, parameter :: debug = .false. integer(psb_ipk_) :: i - + call psb_get_erraction(err_act) info = psb_success_ - + if (present(trans)) then trans_ = trans else trans_ = 'N' end if - + if (.not.a%is_asb()) then info = psb_err_invalid_mat_state_ call psb_errpush(info, name) goto 9999 endif - + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') - + if (tra .or. (beta /= dzero)) then call x%sync() call y%sync() @@ -68,17 +66,18 @@ subroutine psb_d_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans) call y%bld(ry) end select endif - + if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info, name, a_err = 'csrg_vect_sv') goto 9999 endif - + call psb_erractionrestore(err_act) return - - 9999 call psb_error_handler(err_act) + +9999 call psb_error_handler(err_act) return end subroutine psb_d_oacc_csr_inner_vect_sv - +end submodule psb_d_oacc_csr_inner_vect_sv_impl + diff --git a/openacc/impl/psb_d_oacc_csr_mold.F90 b/openacc/impl/psb_d_oacc_csr_mold.F90 index 08598cb6..dc9ff711 100644 --- a/openacc/impl/psb_d_oacc_csr_mold.F90 +++ b/openacc/impl/psb_d_oacc_csr_mold.F90 @@ -1,6 +1,7 @@ -subroutine psb_d_oacc_csr_mold(a, b, info) - use psb_base_mod - use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_mold +submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_mold_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_csr_mold(a, b, info) implicit none class(psb_d_oacc_csr_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b @@ -8,26 +9,27 @@ subroutine psb_d_oacc_csr_mold(a, b, info) integer(psb_ipk_) :: err_act character(len=20) :: name='csr_mold' logical, parameter :: debug=.false. - + call psb_get_erraction(err_act) - + info = 0 if (allocated(b)) then call b%free() deallocate(b, stat=info) end if if (info == 0) allocate(psb_d_oacc_csr_sparse_mat :: b, stat=info) - + if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info, name) goto 9999 end if return - - 9999 call psb_error_handler(err_act) - + +9999 call psb_error_handler(err_act) + return - + end subroutine psb_d_oacc_csr_mold - \ No newline at end of file +end submodule psb_d_oacc_csr_mold_impl + diff --git a/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 b/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 index 5f4a6b41..2ed9b032 100644 --- a/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 +++ b/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 @@ -1,24 +1,25 @@ -subroutine psb_d_oacc_csr_mv_from_coo(a, b, info) - use psb_base_mod - use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_mv_from_coo +submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_mv_from_coo_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_csr_mv_from_coo(a, b, info) implicit none - + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - + info = psb_success_ - + call a%psb_d_csr_sparse_mat%mv_from_coo(b, info) if (info /= 0) goto 9999 - + !$acc update device(a%val, a%ja, a%irp) - + return - - 9999 continue + +9999 continue info = psb_err_alloc_dealloc_ return - -end subroutine psb_d_oacc_csr_mv_from_coo - \ No newline at end of file + + end subroutine psb_d_oacc_csr_mv_from_coo +end submodule psb_d_oacc_csr_mv_from_coo_impl diff --git a/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 b/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 index 16a4636b..197ed911 100644 --- a/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 +++ b/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 @@ -1,23 +1,24 @@ -subroutine psb_d_oacc_csr_mv_from_fmt(a, b, info) - use psb_base_mod - use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_mv_from_fmt +submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_mv_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_csr_mv_from_fmt(a, b, info) implicit none - + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - + info = psb_success_ - + select type(b) type is (psb_d_coo_sparse_mat) call a%mv_from_coo(b, info) class default call a%psb_d_csr_sparse_mat%mv_from_fmt(b, info) if (info /= 0) return - + !$acc update device(a%val, a%ja, a%irp) end select - -end subroutine psb_d_oacc_csr_mv_from_fmt - \ No newline at end of file + + end subroutine psb_d_oacc_csr_mv_from_fmt +end submodule psb_d_oacc_csr_mv_from_fmt_impl diff --git a/openacc/impl/psb_d_oacc_csr_reallocate_nz.F90 b/openacc/impl/psb_d_oacc_csr_reallocate_nz.F90 index 7a6723d0..c345f681 100644 --- a/openacc/impl/psb_d_oacc_csr_reallocate_nz.F90 +++ b/openacc/impl/psb_d_oacc_csr_reallocate_nz.F90 @@ -1,6 +1,7 @@ -subroutine psb_d_oacc_csr_reallocate_nz(nz, a) - use psb_base_mod - use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_reallocate_nz +submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_reallocate_nz_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_csr_reallocate_nz(nz, a) implicit none integer(psb_ipk_), intent(in) :: nz class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a @@ -8,20 +9,20 @@ subroutine psb_d_oacc_csr_reallocate_nz(nz, a) integer(psb_ipk_) :: err_act character(len=20) :: name='d_oacc_csr_reallocate_nz' logical, parameter :: debug=.false. - + call psb_erractionsave(err_act) info = psb_success_ - + call a%psb_d_csr_sparse_mat%reallocate(nz) - + call a%set_dev() if (info /= 0) goto 9999 - + call psb_erractionrestore(err_act) return - - 9999 call psb_error_handler(err_act) + +9999 call psb_error_handler(err_act) return - + end subroutine psb_d_oacc_csr_reallocate_nz - \ No newline at end of file +end submodule psb_d_oacc_csr_reallocate_nz_impl diff --git a/openacc/impl/psb_d_oacc_csr_scal.F90 b/openacc/impl/psb_d_oacc_csr_scal.F90 index b2bf12e8..cc693fa2 100644 --- a/openacc/impl/psb_d_oacc_csr_scal.F90 +++ b/openacc/impl/psb_d_oacc_csr_scal.F90 @@ -1,22 +1,23 @@ -subroutine psb_d_oacc_csr_scal(d, a, info, side) - use psb_base_mod - use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_scal +submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_scal_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_csr_scal(d, a, info, side) implicit none class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - + integer(psb_ipk_) :: err_act character(len=20) :: name='scal' logical, parameter :: debug=.false. integer(psb_ipk_) :: i, j - + info = psb_success_ call psb_erractionsave(err_act) - + if (a%is_host()) call a%sync() - + if (present(side)) then if (side == 'L') then !$acc parallel loop present(a, d) @@ -39,14 +40,14 @@ subroutine psb_d_oacc_csr_scal(d, a, info, side) a%val(i) = a%val(i) * d(i) end do end if - + call a%set_dev() - + call psb_erractionrestore(err_act) return - - 9999 call psb_error_handler(err_act) + +9999 call psb_error_handler(err_act) return - + end subroutine psb_d_oacc_csr_scal - \ No newline at end of file +end submodule psb_d_oacc_csr_scal_impl diff --git a/openacc/impl/psb_d_oacc_csr_scals.F90 b/openacc/impl/psb_d_oacc_csr_scals.F90 index 8c77f647..157355d8 100644 --- a/openacc/impl/psb_d_oacc_csr_scals.F90 +++ b/openacc/impl/psb_d_oacc_csr_scals.F90 @@ -1,32 +1,34 @@ -subroutine psb_d_oacc_csr_scals(d, a, info) +submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_scals_impl use psb_base_mod - use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_scals - implicit none - class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a - real(psb_dpk_), intent(in) :: d - integer(psb_ipk_), intent(out) :: info +contains + module subroutine psb_d_oacc_csr_scals(d, a, info) + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: err_act - character(len=20) :: name='scal' - logical, parameter :: debug=.false. - integer(psb_ipk_) :: i + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i - info = psb_success_ - call psb_erractionsave(err_act) + info = psb_success_ + call psb_erractionsave(err_act) - if (a%is_host()) call a%sync() + if (a%is_host()) call a%sync() - !$acc parallel loop present(a) - do i = 1, size(a%val) - a%val(i) = a%val(i) * d - end do + !$acc parallel loop present(a) + do i = 1, size(a%val) + a%val(i) = a%val(i) * d + end do - call a%set_dev() + call a%set_dev() - call psb_erractionrestore(err_act) - return + call psb_erractionrestore(err_act) + return 9999 call psb_error_handler(err_act) - return + return -end subroutine psb_d_oacc_csr_scals + end subroutine psb_d_oacc_csr_scals +end submodule psb_d_oacc_csr_scals_impl diff --git a/openacc/impl/psb_d_oacc_csr_vect_mv.F90 b/openacc/impl/psb_d_oacc_csr_vect_mv.F90 index f0394591..1dca2ba2 100644 --- a/openacc/impl/psb_d_oacc_csr_vect_mv.F90 +++ b/openacc/impl/psb_d_oacc_csr_vect_mv.F90 @@ -1,6 +1,7 @@ -subroutine psb_d_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans) - use psb_base_mod - use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_vect_mv +submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_vect_mv_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans) implicit none real(psb_dpk_), intent(in) :: alpha, beta @@ -16,9 +17,9 @@ subroutine psb_d_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans) n = a%get_ncols() if ((n /= size(x%v)) .or. (n /= size(y%v))) then - write(0,*) 'Size error ', m, n, size(x%v), size(y%v) - info = psb_err_invalid_mat_state_ - return + write(0,*) 'Size error ', m, n, size(x%v), size(y%v) + info = psb_err_invalid_mat_state_ + return end if if (a%is_host()) call a%sync() @@ -28,34 +29,35 @@ subroutine psb_d_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans) call inner_spmv(m, n, alpha, a%val, a%ja, a%irp, x%v, beta, y%v, info) call y%set_dev() -contains + contains subroutine inner_spmv(m, n, alpha, val, ja, irp, x, beta, y, info) - implicit none - integer(psb_ipk_) :: m, n - real(psb_dpk_), intent(in) :: alpha, beta - real(psb_dpk_) :: val(:), x(:), y(:) - integer(psb_ipk_) :: ja(:), irp(:) - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: i, j, ii, isz - real(psb_dpk_) :: tmp - integer(psb_ipk_), parameter :: vsz = 256 - - info = 0 - - !$acc parallel loop vector_length(vsz) private(isz) - do ii = 1, m, vsz - isz = min(vsz, m - ii + 1) - !$acc loop independent private(tmp) - do i = ii, ii + isz - 1 - tmp = 0.0_psb_dpk_ - !$acc loop seq - do j = irp(i), irp(i + 1) - 1 - tmp = tmp + val(j) * x(ja(j)) - end do - y(i) = alpha * tmp + beta * y(i) - end do + implicit none + integer(psb_ipk_) :: m, n + real(psb_dpk_), intent(in) :: alpha, beta + real(psb_dpk_) :: val(:), x(:), y(:) + integer(psb_ipk_) :: ja(:), irp(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, j, ii, isz + real(psb_dpk_) :: tmp + integer(psb_ipk_), parameter :: vsz = 256 + + info = 0 + + !$acc parallel loop vector_length(vsz) private(isz) + do ii = 1, m, vsz + isz = min(vsz, m - ii + 1) + !$acc loop independent private(tmp) + do i = ii, ii + isz - 1 + tmp = 0.0_psb_dpk_ + !$acc loop seq + do j = irp(i), irp(i + 1) - 1 + tmp = tmp + val(j) * x(ja(j)) + end do + y(i) = alpha * tmp + beta * y(i) end do + end do end subroutine inner_spmv -end subroutine psb_d_oacc_csr_vect_mv + end subroutine psb_d_oacc_csr_vect_mv +end submodule psb_d_oacc_csr_vect_mv_impl diff --git a/openacc/impl/psb_d_oacc_mlt_v.f90 b/openacc/impl/psb_d_oacc_mlt_v.f90 index bedd0247..ab242b57 100644 --- a/openacc/impl/psb_d_oacc_mlt_v.f90 +++ b/openacc/impl/psb_d_oacc_mlt_v.f90 @@ -1,31 +1,34 @@ +submodule (psb_d_oacc_vect_mod) psb_d_oacc_mlt_v_impl + use psb_string_mod +contains -subroutine d_oacc_mlt_v(x, y, info) - use psb_d_oacc_vect_mod, psb_protect_name => d_oacc_mlt_v + module subroutine psb_d_oacc_mlt_v(x, y, info) - implicit none - class(psb_d_base_vect_type), intent(inout) :: x - class(psb_d_vect_oacc), intent(inout) :: y - integer(psb_ipk_), intent(out) :: info + implicit none + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_vect_oacc), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: i, n + integer(psb_ipk_) :: i, n - info = 0 - n = min(x%get_nrows(), y%get_nrows()) - select type(xx => x) - class is (psb_d_vect_oacc) - if (y%is_host()) call y%sync() - if (xx%is_host()) call xx%sync() - !$acc parallel loop - do i = 1, n - y%v(i) = y%v(i) * xx%v(i) - end do - call y%set_dev() - class default - if (xx%is_dev()) call xx%sync() - if (y%is_dev()) call y%sync() - do i = 1, n - y%v(i) = y%v(i) * xx%v(i) - end do - call y%set_host() - end select -end subroutine d_oacc_mlt_v + info = 0 + n = min(x%get_nrows(), y%get_nrows()) + select type(xx => x) + class is (psb_d_vect_oacc) + if (y%is_host()) call y%sync() + if (xx%is_host()) call xx%sync() + !$acc parallel loop + do i = 1, n + y%v(i) = y%v(i) * xx%v(i) + end do + call y%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (y%is_dev()) call y%sync() + do i = 1, n + y%v(i) = y%v(i) * xx%v(i) + end do + call y%set_host() + end select + end subroutine psb_d_oacc_mlt_v +end submodule psb_d_oacc_mlt_v_impl diff --git a/openacc/impl/psb_d_oacc_mlt_v_2.f90 b/openacc/impl/psb_d_oacc_mlt_v_2.f90 index 7e46495f..4ca2bdab 100644 --- a/openacc/impl/psb_d_oacc_mlt_v_2.f90 +++ b/openacc/impl/psb_d_oacc_mlt_v_2.f90 @@ -1,53 +1,55 @@ -subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) - use psb_d_oacc_vect_mod, psb_protect_name => d_oacc_mlt_v_2 +submodule (psb_d_oacc_vect_mod) d_oacc_mlt_v_2_impl use psb_string_mod - implicit none - real(psb_dpk_), intent(in) :: alpha, beta - class(psb_d_base_vect_type), intent(inout) :: x - class(psb_d_base_vect_type), intent(inout) :: y - class(psb_d_vect_oacc), intent(inout) :: z - integer(psb_ipk_), intent(out) :: info - character(len=1), intent(in), optional :: conjgx, conjgy - integer(psb_ipk_) :: i, n - logical :: conjgx_, conjgy_ +contains + module subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) + implicit none + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + class(psb_d_vect_oacc), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + character(len=1), intent(in), optional :: conjgx, conjgy + integer(psb_ipk_) :: i, n + logical :: conjgx_, conjgy_ - conjgx_ = .false. - conjgy_ = .false. - if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C') - if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C') + conjgx_ = .false. + conjgy_ = .false. + if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C') + if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C') - n = min(x%get_nrows(), y%get_nrows(), z%get_nrows()) + n = min(x%get_nrows(), y%get_nrows(), z%get_nrows()) - info = 0 - select type(xx => x) - class is (psb_d_vect_oacc) - select type (yy => y) + info = 0 + select type(xx => x) class is (psb_d_vect_oacc) - if (xx%is_host()) call xx%sync() - if (yy%is_host()) call yy%sync() - if ((beta /= dzero) .and. (z%is_host())) call z%sync() - !$acc parallel loop - do i = 1, n - z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) - end do - call z%set_dev() + select type (yy => y) + class is (psb_d_vect_oacc) + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + if ((beta /= dzero) .and. (z%is_host())) call z%sync() + !$acc parallel loop + do i = 1, n + z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) + end do + call z%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (yy%is_dev()) call yy%sync() + if ((beta /= dzero) .and. (z%is_dev())) call z%sync() + do i = 1, n + z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) + end do + call z%set_host() + end select class default - if (xx%is_dev()) call xx%sync() - if (yy%is_dev()) call yy%sync() + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() if ((beta /= dzero) .and. (z%is_dev())) call z%sync() do i = 1, n - z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) + z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i) end do call z%set_host() end select - class default - if (x%is_dev()) call x%sync() - if (y%is_dev()) call y%sync() - if ((beta /= dzero) .and. (z%is_dev())) call z%sync() - do i = 1, n - z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i) - end do - call z%set_host() - end select -end subroutine d_oacc_mlt_v_2 + end subroutine d_oacc_mlt_v_2 +end submodule d_oacc_mlt_v_2_impl diff --git a/openacc/psb_d_oacc_csr_mat_mod.F90 b/openacc/psb_d_oacc_csr_mat_mod.F90 index b8eb1be9..ca4199a8 100644 --- a/openacc/psb_d_oacc_csr_mat_mod.F90 +++ b/openacc/psb_d_oacc_csr_mat_mod.F90 @@ -1,359 +1,346 @@ module psb_d_oacc_csr_mat_mod - use iso_c_binding - use psb_d_mat_mod - use psb_d_oacc_vect_mod - !use oaccsparse_mod - - integer(psb_ipk_), parameter, private :: is_host = -1 - integer(psb_ipk_), parameter, private :: is_sync = 0 - integer(psb_ipk_), parameter, private :: is_dev = 1 - - type, extends(psb_d_csr_sparse_mat) :: psb_d_oacc_csr_sparse_mat - integer(psb_ipk_) :: devstate = is_host - contains - procedure, nopass :: get_fmt => d_oacc_csr_get_fmt - procedure, pass(a) :: sizeof => d_oacc_csr_sizeof - procedure, pass(a) :: vect_mv => psb_d_oacc_csr_vect_mv - procedure, pass(a) :: in_vect_sv => psb_d_oacc_csr_inner_vect_sv - procedure, pass(a) :: csmm => psb_d_oacc_csr_csmm - procedure, pass(a) :: csmv => psb_d_oacc_csr_csmv - procedure, pass(a) :: scals => psb_d_oacc_csr_scals - procedure, pass(a) :: scalv => psb_d_oacc_csr_scal - procedure, pass(a) :: reallocate_nz => psb_d_oacc_csr_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_d_oacc_csr_allocate_mnnz - procedure, pass(a) :: cp_from_coo => psb_d_oacc_csr_cp_from_coo - procedure, pass(a) :: cp_from_fmt => psb_d_oacc_csr_cp_from_fmt - procedure, pass(a) :: mv_from_coo => psb_d_oacc_csr_mv_from_coo - procedure, pass(a) :: mv_from_fmt => psb_d_oacc_csr_mv_from_fmt - procedure, pass(a) :: free => d_oacc_csr_free - procedure, pass(a) :: mold => psb_d_oacc_csr_mold - procedure, pass(a) :: all => d_oacc_csr_all - procedure, pass(a) :: is_host => d_oacc_csr_is_host - procedure, pass(a) :: is_sync => d_oacc_csr_is_sync - procedure, pass(a) :: is_dev => d_oacc_csr_is_dev - procedure, pass(a) :: set_host => d_oacc_csr_set_host - procedure, pass(a) :: set_sync => d_oacc_csr_set_sync - procedure, pass(a) :: set_dev => d_oacc_csr_set_dev - procedure, pass(a) :: sync_space => d_oacc_csr_sync_space - procedure, pass(a) :: sync => d_oacc_csr_sync - end type psb_d_oacc_csr_sparse_mat - - interface - subroutine psb_d_oacc_csr_mold(a,b,info) - import :: psb_d_oacc_csr_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_oacc_csr_sparse_mat), intent(in) :: a - class(psb_d_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_oacc_csr_mold - end interface - - interface - subroutine psb_d_oacc_csr_cp_from_fmt(a,b,info) - import :: psb_d_oacc_csr_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a - class(psb_d_base_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_oacc_csr_cp_from_fmt - end interface - - interface - subroutine psb_d_oacc_csr_mv_from_coo(a,b,info) - import :: psb_d_oacc_csr_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a - class(psb_d_coo_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_oacc_csr_mv_from_coo - end interface - - interface - subroutine psb_d_oacc_csr_mv_from_fmt(a,b,info) - import :: psb_d_oacc_csr_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a - class(psb_d_base_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_oacc_csr_mv_from_fmt - end interface - - interface - subroutine psb_d_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans) - import :: psb_d_oacc_csr_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ - class(psb_d_oacc_csr_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta - class(psb_d_base_vect_type), intent(inout) :: x, y - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_d_oacc_csr_vect_mv - end interface - - interface - subroutine psb_d_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans) - import :: psb_d_oacc_csr_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ - class(psb_d_oacc_csr_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta - class(psb_d_base_vect_type), intent(inout) :: x,y - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_d_oacc_csr_inner_vect_sv - end interface - - interface - subroutine psb_d_oacc_csr_csmm(alpha, a, x, beta, y, info, trans) - import :: psb_d_oacc_csr_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ - class(psb_d_oacc_csr_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) - real(psb_dpk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_d_oacc_csr_csmm - end interface - - interface - subroutine psb_d_oacc_csr_csmv(alpha, a, x, beta, y, info, trans) - import :: psb_d_oacc_csr_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ - class(psb_d_oacc_csr_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta, x(:) - real(psb_dpk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_d_oacc_csr_csmv - end interface - - interface - subroutine psb_d_oacc_csr_scals(d, a, info) - import :: psb_d_oacc_csr_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a - real(psb_dpk_), intent(in) :: d - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_oacc_csr_scals - end interface - - interface - subroutine psb_d_oacc_csr_scal(d,a,info,side) - import :: psb_d_oacc_csr_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a - real(psb_dpk_), intent(in) :: d(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: side - end subroutine psb_d_oacc_csr_scal - end interface - - interface - subroutine psb_d_oacc_csr_reallocate_nz(nz,a) - import :: psb_d_oacc_csr_sparse_mat, psb_ipk_ - class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(in) :: nz - end subroutine psb_d_oacc_csr_reallocate_nz - end interface - - interface - subroutine psb_d_oacc_csr_allocate_mnnz(m,n,a,nz) - import :: psb_d_oacc_csr_sparse_mat, psb_ipk_ - class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(in) :: m,n - integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_d_oacc_csr_allocate_mnnz - end interface - - interface - subroutine psb_d_oacc_csr_cp_from_coo(a,b,info) - import :: psb_d_oacc_csr_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a - class(psb_d_coo_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_oacc_csr_cp_from_coo - end interface + use iso_c_binding + use psb_d_mat_mod + use psb_d_oacc_vect_mod + !use oaccsparse_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_d_csr_sparse_mat) :: psb_d_oacc_csr_sparse_mat + integer(psb_ipk_) :: devstate = is_host + contains + procedure, nopass :: get_fmt => d_oacc_csr_get_fmt + procedure, pass(a) :: sizeof => d_oacc_csr_sizeof + procedure, pass(a) :: vect_mv => psb_d_oacc_csr_vect_mv + procedure, pass(a) :: in_vect_sv => psb_d_oacc_csr_inner_vect_sv + procedure, pass(a) :: csmm => psb_d_oacc_csr_csmm + procedure, pass(a) :: csmv => psb_d_oacc_csr_csmv + procedure, pass(a) :: scals => psb_d_oacc_csr_scals + procedure, pass(a) :: scalv => psb_d_oacc_csr_scal + procedure, pass(a) :: reallocate_nz => psb_d_oacc_csr_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_oacc_csr_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_d_oacc_csr_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_oacc_csr_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_oacc_csr_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_oacc_csr_mv_from_fmt + procedure, pass(a) :: free => d_oacc_csr_free + procedure, pass(a) :: mold => psb_d_oacc_csr_mold + procedure, pass(a) :: all => d_oacc_csr_all + procedure, pass(a) :: is_host => d_oacc_csr_is_host + procedure, pass(a) :: is_sync => d_oacc_csr_is_sync + procedure, pass(a) :: is_dev => d_oacc_csr_is_dev + procedure, pass(a) :: set_host => d_oacc_csr_set_host + procedure, pass(a) :: set_sync => d_oacc_csr_set_sync + procedure, pass(a) :: set_dev => d_oacc_csr_set_dev + procedure, pass(a) :: sync_space => d_oacc_csr_sync_space + procedure, pass(a) :: sync => d_oacc_csr_sync + end type psb_d_oacc_csr_sparse_mat + + interface + module subroutine psb_d_oacc_csr_mold(a,b,info) + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_csr_mold + end interface + + interface + module subroutine psb_d_oacc_csr_cp_from_fmt(a,b,info) + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_csr_cp_from_fmt + end interface + + interface + module subroutine psb_d_oacc_csr_mv_from_coo(a,b,info) + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_csr_mv_from_coo + end interface + + interface + module subroutine psb_d_oacc_csr_mv_from_fmt(a,b,info) + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_csr_mv_from_fmt + end interface + + interface + module subroutine psb_d_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_csr_vect_mv + end interface + + interface + module subroutine psb_d_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x,y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_csr_inner_vect_sv + end interface + + interface + module subroutine psb_d_oacc_csr_csmm(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_csr_csmm + end interface + + interface + module subroutine psb_d_oacc_csr_csmv(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_csr_csmv + end interface + + interface + module subroutine psb_d_oacc_csr_scals(d, a, info) + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_csr_scals + end interface + + interface + module subroutine psb_d_oacc_csr_scal(d,a,info,side) + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: side + end subroutine psb_d_oacc_csr_scal + end interface + + interface + module subroutine psb_d_oacc_csr_reallocate_nz(nz,a) + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + end subroutine psb_d_oacc_csr_reallocate_nz + end interface + + interface + module subroutine psb_d_oacc_csr_allocate_mnnz(m,n,a,nz) + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: m,n + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_oacc_csr_allocate_mnnz + end interface + + interface + module subroutine psb_d_oacc_csr_cp_from_coo(a,b,info) + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_csr_cp_from_coo + end interface contains - - - subroutine d_oacc_csr_free(a) - use psb_base_mod - implicit none - class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a - integer(psb_ipk_) :: info - - if (allocated(a%val)) then - !$acc exit data delete(a%val) - end if - if (allocated(a%ja)) then - !$acc exit data delete(a%ja) - end if - if (allocated(a%irp)) then - !$acc exit data delete(a%irp) - end if - - call a%psb_d_csr_sparse_mat%free() - - return - end subroutine d_oacc_csr_free - - - function d_oacc_csr_sizeof(a) result(res) - implicit none - class(psb_d_oacc_csr_sparse_mat), intent(in) :: a - integer(psb_epk_) :: res - if (a%is_dev()) call a%sync() - - res = 8 - res = res + psb_sizeof_dp * size(a%val) - res = res + psb_sizeof_ip * size(a%irp) - res = res + psb_sizeof_ip * size(a%ja) - - end function d_oacc_csr_sizeof - - - function d_oacc_csr_get_fmt() result(res) - implicit none - character(len=5) :: res - res = 'CSR_oacc' - end function d_oacc_csr_get_fmt - - subroutine d_oacc_csr_all(m, n, nz, a, info) - implicit none - integer(psb_ipk_), intent(in) :: m, n, nz - class(psb_d_oacc_csr_sparse_mat), intent(out) :: a - integer(psb_ipk_), intent(out) :: info - - info = 0 - if (allocated(a%val)) then - !$acc exit data delete(a%val) finalize - deallocate(a%val, stat=info) - end if - if (allocated(a%ja)) then - !$acc exit data delete(a%ja) finalize - deallocate(a%ja, stat=info) - end if - if (allocated(a%irp)) then - !$acc exit data delete(a%irp) finalize - deallocate(a%irp, stat=info) - end if - - call a%set_nrows(m) - call a%set_ncols(n) - - allocate(a%val(nz),stat=info) - allocate(a%ja(nz),stat=info) - allocate(a%irp(m+1),stat=info) - if (info == 0) call a%set_host() - if (info == 0) call a%sync_space() - end subroutine d_oacc_csr_all - - function d_oacc_csr_is_host(a) result(res) - implicit none - class(psb_d_oacc_csr_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_host) - end function d_oacc_csr_is_host - - function d_oacc_csr_is_sync(a) result(res) - implicit none - class(psb_d_oacc_csr_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_sync) - end function d_oacc_csr_is_sync - - function d_oacc_csr_is_dev(a) result(res) - implicit none - class(psb_d_oacc_csr_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_dev) - end function d_oacc_csr_is_dev - - subroutine d_oacc_csr_set_host(a) - implicit none - class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a - - a%devstate = is_host - end subroutine d_oacc_csr_set_host - - subroutine d_oacc_csr_set_sync(a) - implicit none - class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a - - a%devstate = is_sync - end subroutine d_oacc_csr_set_sync - - subroutine d_oacc_csr_set_dev(a) - implicit none - class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a - - a%devstate = is_dev - end subroutine d_oacc_csr_set_dev - - subroutine d_oacc_csr_sync_space(a) - implicit none - class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a - if (allocated(a%val)) then - call d_oacc_create_dev(a%val) - end if - if (allocated(a%ja)) then - call i_oacc_create_dev(a%ja) - end if - if (allocated(a%irp)) then - call i_oacc_create_dev(a%irp) - end if - contains - subroutine d_oacc_create_dev(v) - implicit none - real(psb_dpk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine d_oacc_create_dev - subroutine i_oacc_create_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev - end subroutine d_oacc_csr_sync_space - - subroutine d_oacc_csr_sync(a) - implicit none - class(psb_d_oacc_csr_sparse_mat), target, intent(in) :: a - class(psb_d_oacc_csr_sparse_mat), pointer :: tmpa - integer(psb_ipk_) :: info - - tmpa => a - if (a%is_dev()) then - call d_oacc_csr_to_host(a%val) - call i_oacc_csr_to_host(a%ja) - call i_oacc_csr_to_host(a%irp) - else if (a%is_host()) then - call d_oacc_csr_to_dev(a%val) - call i_oacc_csr_to_dev(a%ja) - call i_oacc_csr_to_dev(a%irp) - end if - call tmpa%set_sync() - end subroutine d_oacc_csr_sync - - subroutine d_oacc_csr_to_dev(v) - implicit none - real(psb_dpk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine d_oacc_csr_to_dev - - subroutine d_oacc_csr_to_host(v) - implicit none - real(psb_dpk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine d_oacc_csr_to_host - - subroutine i_oacc_csr_to_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine i_oacc_csr_to_dev - - subroutine i_oacc_csr_to_host(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine i_oacc_csr_to_host - - + subroutine d_oacc_csr_free(a) + use psb_base_mod + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + if (allocated(a%val)) then + !$acc exit data delete(a%val) + end if + if (allocated(a%ja)) then + !$acc exit data delete(a%ja) + end if + if (allocated(a%irp)) then + !$acc exit data delete(a%irp) + end if + + call a%psb_d_csr_sparse_mat%free() + + return + end subroutine d_oacc_csr_free + + + + function d_oacc_csr_sizeof(a) result(res) + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + + end function d_oacc_csr_sizeof + + + function d_oacc_csr_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSR_oacc' + end function d_oacc_csr_get_fmt + + subroutine d_oacc_csr_all(m, n, nz, a, info) + implicit none + integer(psb_ipk_), intent(in) :: m, n, nz + class(psb_d_oacc_csr_sparse_mat), intent(out) :: a + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (allocated(a%val)) then + !$acc exit data delete(a%val) finalize + deallocate(a%val, stat=info) + end if + if (allocated(a%ja)) then + !$acc exit data delete(a%ja) finalize + deallocate(a%ja, stat=info) + end if + if (allocated(a%irp)) then + !$acc exit data delete(a%irp) finalize + deallocate(a%irp, stat=info) + end if + + call a%set_nrows(m) + call a%set_ncols(n) + + allocate(a%val(nz),stat=info) + allocate(a%ja(nz),stat=info) + allocate(a%irp(m+1),stat=info) + if (info == 0) call a%set_host() + if (info == 0) call a%sync_space() + end subroutine d_oacc_csr_all + + function d_oacc_csr_is_host(a) result(res) + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function d_oacc_csr_is_host + + function d_oacc_csr_is_sync(a) result(res) + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function d_oacc_csr_is_sync + + function d_oacc_csr_is_dev(a) result(res) + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function d_oacc_csr_is_dev + + subroutine d_oacc_csr_set_host(a) + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine d_oacc_csr_set_host + + subroutine d_oacc_csr_set_sync(a) + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine d_oacc_csr_set_sync + + subroutine d_oacc_csr_set_dev(a) + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine d_oacc_csr_set_dev + + subroutine d_oacc_csr_sync_space(a) + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + if (allocated(a%val)) then + call d_oacc_create_dev(a%val) + end if + if (allocated(a%ja)) then + call i_oacc_create_dev(a%ja) + end if + if (allocated(a%irp)) then + call i_oacc_create_dev(a%irp) + end if + contains + subroutine d_oacc_create_dev(v) + implicit none + real(psb_dpk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine d_oacc_create_dev + subroutine i_oacc_create_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev + end subroutine d_oacc_csr_sync_space + + subroutine d_oacc_csr_sync(a) + implicit none + class(psb_d_oacc_csr_sparse_mat), target, intent(in) :: a + class(psb_d_oacc_csr_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (a%is_dev()) then + call d_oacc_csr_to_host(a%val) + call i_oacc_csr_to_host(a%ja) + call i_oacc_csr_to_host(a%irp) + else if (a%is_host()) then + call d_oacc_csr_to_dev(a%val) + call i_oacc_csr_to_dev(a%ja) + call i_oacc_csr_to_dev(a%irp) + end if + call tmpa%set_sync() + end subroutine d_oacc_csr_sync + + subroutine d_oacc_csr_to_dev(v) + implicit none + real(psb_dpk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine d_oacc_csr_to_dev + + subroutine d_oacc_csr_to_host(v) + implicit none + real(psb_dpk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine d_oacc_csr_to_host + + subroutine i_oacc_csr_to_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine i_oacc_csr_to_dev + + subroutine i_oacc_csr_to_host(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine i_oacc_csr_to_host + + end module psb_d_oacc_csr_mat_mod diff --git a/openacc/psb_d_oacc_vect_mod.F90 b/openacc/psb_d_oacc_vect_mod.F90 index 3139f9b8..3385f1ec 100644 --- a/openacc/psb_d_oacc_vect_mod.F90 +++ b/openacc/psb_d_oacc_vect_mod.F90 @@ -49,8 +49,8 @@ module psb_d_oacc_vect_mod procedure, pass(z) :: abgdxyz => d_oacc_abgdxyz procedure, pass(y) :: mlt_a => d_oacc_mlt_a procedure, pass(z) :: mlt_a_2 => d_oacc_mlt_a_2 - procedure, pass(y) :: mlt_v => d_oacc_mlt_v - procedure, pass(z) :: mlt_v_2 => d_oacc_mlt_v_2 + procedure, pass(y) :: mlt_v => psb_d_oacc_mlt_v + procedure, pass(z) :: mlt_v_2 => psb_d_oacc_mlt_v_2 procedure, pass(x) :: scal => d_oacc_scal procedure, pass(x) :: nrm2 => d_oacc_nrm2 procedure, pass(x) :: amax => d_oacc_amax @@ -63,19 +63,17 @@ module psb_d_oacc_vect_mod real(psb_dpk_), allocatable :: v1(:),v2(:),p(:) interface - subroutine d_oacc_mlt_v(x, y, info) - import + module subroutine psb_d_oacc_mlt_v(x, y, info) implicit none class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_vect_oacc), intent(inout) :: y integer(psb_ipk_), intent(out) :: info - end subroutine d_oacc_mlt_v + end subroutine psb_d_oacc_mlt_v end interface interface - subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) - import + module subroutine psb_d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) implicit none real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x @@ -83,7 +81,7 @@ module psb_d_oacc_vect_mod class(psb_d_vect_oacc), intent(inout) :: z integer(psb_ipk_), intent(out) :: info character(len=1), intent(in), optional :: conjgx, conjgy - end subroutine d_oacc_mlt_v_2 + end subroutine psb_d_oacc_mlt_v_2 end interface contains From b6fe0f3344727bfdad1c44d85d511d8796fcd70e Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 17 Jul 2024 08:52:43 +0200 Subject: [PATCH 13/39] New version for modules and methods --- openacc/Makefile | 28 +- openacc/impl/psb_c_oacc_csr_allocate_mnnz.F90 | 35 + openacc/impl/psb_c_oacc_csr_cp_from_coo.F90 | 26 + openacc/impl/psb_c_oacc_csr_cp_from_fmt.F90 | 24 + openacc/impl/psb_c_oacc_csr_csmm.F90 | 86 ++ openacc/impl/psb_c_oacc_csr_csmv.F90 | 81 ++ openacc/impl/psb_c_oacc_csr_inner_vect_sv.F90 | 83 ++ openacc/impl/psb_c_oacc_csr_mold.F90 | 35 + openacc/impl/psb_c_oacc_csr_mv_from_coo.F90 | 25 + openacc/impl/psb_c_oacc_csr_mv_from_fmt.F90 | 24 + openacc/impl/psb_c_oacc_csr_reallocate_nz.F90 | 28 + openacc/impl/psb_c_oacc_csr_scal.F90 | 53 + openacc/impl/psb_c_oacc_csr_scals.F90 | 34 + openacc/impl/psb_c_oacc_csr_vect_mv.F90 | 63 ++ openacc/impl/psb_c_oacc_mlt_v.f90 | 31 + openacc/impl/psb_c_oacc_mlt_v_2.f90 | 98 ++ openacc/impl/psb_d_oacc_mlt_v.f90 | 57 +- openacc/impl/psb_d_oacc_mlt_v_2.f90 | 125 ++- openacc/impl/psb_s_oacc_csr_allocate_mnnz.F90 | 35 + openacc/impl/psb_s_oacc_csr_cp_from_coo.F90 | 26 + openacc/impl/psb_s_oacc_csr_cp_from_fmt.F90 | 24 + openacc/impl/psb_s_oacc_csr_csmm.F90 | 86 ++ openacc/impl/psb_s_oacc_csr_csmv.F90 | 81 ++ openacc/impl/psb_s_oacc_csr_inner_vect_sv.F90 | 83 ++ openacc/impl/psb_s_oacc_csr_mold.F90 | 35 + openacc/impl/psb_s_oacc_csr_mv_from_coo.F90 | 25 + openacc/impl/psb_s_oacc_csr_mv_from_fmt.F90 | 24 + openacc/impl/psb_s_oacc_csr_reallocate_nz.F90 | 28 + openacc/impl/psb_s_oacc_csr_scal.F90 | 53 + openacc/impl/psb_s_oacc_csr_scals.F90 | 34 + openacc/impl/psb_s_oacc_csr_vect_mv.F90 | 63 ++ openacc/impl/psb_s_oacc_mlt_v.f90 | 31 + openacc/impl/psb_s_oacc_mlt_v_2.f90 | 98 ++ openacc/impl/psb_z_oacc_csr_allocate_mnnz.F90 | 35 + openacc/impl/psb_z_oacc_csr_cp_from_coo.F90 | 26 + openacc/impl/psb_z_oacc_csr_cp_from_fmt.F90 | 24 + openacc/impl/psb_z_oacc_csr_csmm.F90 | 86 ++ openacc/impl/psb_z_oacc_csr_csmv.F90 | 81 ++ openacc/impl/psb_z_oacc_csr_inner_vect_sv.F90 | 83 ++ openacc/impl/psb_z_oacc_csr_mold.F90 | 35 + openacc/impl/psb_z_oacc_csr_mv_from_coo.F90 | 25 + openacc/impl/psb_z_oacc_csr_mv_from_fmt.F90 | 24 + openacc/impl/psb_z_oacc_csr_reallocate_nz.F90 | 28 + openacc/impl/psb_z_oacc_csr_scal.F90 | 53 + openacc/impl/psb_z_oacc_csr_scals.F90 | 34 + openacc/impl/psb_z_oacc_csr_vect_mv.F90 | 63 ++ openacc/impl/psb_z_oacc_mlt_v.f90 | 31 + openacc/impl/psb_z_oacc_mlt_v_2.f90 | 98 ++ openacc/psb_c_oacc_csr_mat_mod.F90 | 343 +++++++ openacc/psb_c_oacc_vect_mod.F90 | 935 ++++++++++++++++++ openacc/psb_d_oacc_csr_mat_mod.F90 | 3 - openacc/psb_d_oacc_vect_mod.F90 | 54 +- openacc/psb_s_oacc_csr_mat_mod.F90 | 343 +++++++ openacc/psb_s_oacc_vect_mod.F90 | 935 ++++++++++++++++++ openacc/psb_z_oacc_csr_mat_mod.F90 | 343 +++++++ openacc/psb_z_oacc_vect_mod.F90 | 935 ++++++++++++++++++ 56 files changed, 6159 insertions(+), 120 deletions(-) create mode 100644 openacc/impl/psb_c_oacc_csr_allocate_mnnz.F90 create mode 100644 openacc/impl/psb_c_oacc_csr_cp_from_coo.F90 create mode 100644 openacc/impl/psb_c_oacc_csr_cp_from_fmt.F90 create mode 100644 openacc/impl/psb_c_oacc_csr_csmm.F90 create mode 100644 openacc/impl/psb_c_oacc_csr_csmv.F90 create mode 100644 openacc/impl/psb_c_oacc_csr_inner_vect_sv.F90 create mode 100644 openacc/impl/psb_c_oacc_csr_mold.F90 create mode 100644 openacc/impl/psb_c_oacc_csr_mv_from_coo.F90 create mode 100644 openacc/impl/psb_c_oacc_csr_mv_from_fmt.F90 create mode 100644 openacc/impl/psb_c_oacc_csr_reallocate_nz.F90 create mode 100644 openacc/impl/psb_c_oacc_csr_scal.F90 create mode 100644 openacc/impl/psb_c_oacc_csr_scals.F90 create mode 100644 openacc/impl/psb_c_oacc_csr_vect_mv.F90 create mode 100644 openacc/impl/psb_c_oacc_mlt_v.f90 create mode 100644 openacc/impl/psb_c_oacc_mlt_v_2.f90 create mode 100644 openacc/impl/psb_s_oacc_csr_allocate_mnnz.F90 create mode 100644 openacc/impl/psb_s_oacc_csr_cp_from_coo.F90 create mode 100644 openacc/impl/psb_s_oacc_csr_cp_from_fmt.F90 create mode 100644 openacc/impl/psb_s_oacc_csr_csmm.F90 create mode 100644 openacc/impl/psb_s_oacc_csr_csmv.F90 create mode 100644 openacc/impl/psb_s_oacc_csr_inner_vect_sv.F90 create mode 100644 openacc/impl/psb_s_oacc_csr_mold.F90 create mode 100644 openacc/impl/psb_s_oacc_csr_mv_from_coo.F90 create mode 100644 openacc/impl/psb_s_oacc_csr_mv_from_fmt.F90 create mode 100644 openacc/impl/psb_s_oacc_csr_reallocate_nz.F90 create mode 100644 openacc/impl/psb_s_oacc_csr_scal.F90 create mode 100644 openacc/impl/psb_s_oacc_csr_scals.F90 create mode 100644 openacc/impl/psb_s_oacc_csr_vect_mv.F90 create mode 100644 openacc/impl/psb_s_oacc_mlt_v.f90 create mode 100644 openacc/impl/psb_s_oacc_mlt_v_2.f90 create mode 100644 openacc/impl/psb_z_oacc_csr_allocate_mnnz.F90 create mode 100644 openacc/impl/psb_z_oacc_csr_cp_from_coo.F90 create mode 100644 openacc/impl/psb_z_oacc_csr_cp_from_fmt.F90 create mode 100644 openacc/impl/psb_z_oacc_csr_csmm.F90 create mode 100644 openacc/impl/psb_z_oacc_csr_csmv.F90 create mode 100644 openacc/impl/psb_z_oacc_csr_inner_vect_sv.F90 create mode 100644 openacc/impl/psb_z_oacc_csr_mold.F90 create mode 100644 openacc/impl/psb_z_oacc_csr_mv_from_coo.F90 create mode 100644 openacc/impl/psb_z_oacc_csr_mv_from_fmt.F90 create mode 100644 openacc/impl/psb_z_oacc_csr_reallocate_nz.F90 create mode 100644 openacc/impl/psb_z_oacc_csr_scal.F90 create mode 100644 openacc/impl/psb_z_oacc_csr_scals.F90 create mode 100644 openacc/impl/psb_z_oacc_csr_vect_mv.F90 create mode 100644 openacc/impl/psb_z_oacc_mlt_v.f90 create mode 100644 openacc/impl/psb_z_oacc_mlt_v_2.f90 create mode 100644 openacc/psb_c_oacc_csr_mat_mod.F90 create mode 100644 openacc/psb_c_oacc_vect_mod.F90 create mode 100644 openacc/psb_s_oacc_csr_mat_mod.F90 create mode 100644 openacc/psb_s_oacc_vect_mod.F90 create mode 100644 openacc/psb_z_oacc_csr_mat_mod.F90 create mode 100644 openacc/psb_z_oacc_vect_mod.F90 diff --git a/openacc/Makefile b/openacc/Makefile index d1311fe2..3a752ac7 100644 --- a/openacc/Makefile +++ b/openacc/Makefile @@ -17,9 +17,12 @@ CINCLUDES= #LIBS=-L$(LIBDIR) -lpsb_util -lpsb_ext -lpsb_base -lopenblas -lmetis -FOBJS= psb_i_oacc_vect_mod.o psb_d_oacc_vect_mod.o \ - psb_oacc_mod.o psb_d_oacc_csr_mat_mod.o \ - psb_oacc_env_mod.o +FOBJS= psb_i_oacc_vect_mod.o \ + psb_s_oacc_vect_mod.o psb_s_oacc_csr_mat_mod.o \ + psb_d_oacc_vect_mod.o psb_d_oacc_csr_mat_mod.o \ + psb_c_oacc_vect_mod.o psb_c_oacc_csr_mat_mod.o \ + psb_z_oacc_vect_mod.o psb_z_oacc_csr_mat_mod.o \ + psb_oacc_mod.o psb_oacc_env_mod.o LIBNAME=libpsb_openacc.a @@ -40,8 +43,23 @@ iobjs: $(OBJS) ilib: $(OBJS) $(MAKE) -C impl lib -psb_oacc_mod.o : psb_i_oacc_vect_mod.o psb_d_oacc_vect_mod.o \ - psb_d_oacc_csr_mat_mod.o psb_oacc_env_mod.o +psb_oacc_mod.o : psb_i_oacc_vect_mod.o \ + psb_s_oacc_vect_mod.o psb_s_oacc_csr_mat_mod.o \ + psb_d_oacc_vect_mod.o psb_d_oacc_csr_mat_mod.o \ + psb_c_oacc_vect_mod.o psb_c_oacc_csr_mat_mod.o \ + psb_z_oacc_vect_mod.o psb_z_oacc_csr_mat_mod.o \ + psb_oacc_env_mod.o + +psb_s_oacc_vect_mod.o psb_d_oacc_vect_mod.o \ + psb_c_oacc_vect_mod.o psb_z_oacc_vect_mod.o : psb_i_oacc_vect_mod.o + + +psb_s_oacc_csr_mat_mod.o: psb_s_oacc_vect_mod.o +psb_d_oacc_csr_mat_mod.o: psb_d_oacc_vect_mod.o +psb_c_oacc_csr_mat_mod.o: psb_c_oacc_vect_mod.o +psb_z_oacc_csr_mat_mod.o: psb_z_oacc_vect_mod.o + + clean: cclean iclean /bin/rm -f $(FOBJS) *$(.mod) *.a diff --git a/openacc/impl/psb_c_oacc_csr_allocate_mnnz.F90 b/openacc/impl/psb_c_oacc_csr_allocate_mnnz.F90 new file mode 100644 index 00000000..09cdc228 --- /dev/null +++ b/openacc/impl/psb_c_oacc_csr_allocate_mnnz.F90 @@ -0,0 +1,35 @@ +submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_allocate_mnnz_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_csr_allocate_mnnz(m, n, a, nz) + implicit none + integer(psb_ipk_), intent(in) :: m, n + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_c_csr_sparse_mat%allocate(m, n, nz) + + if (.not.allocated(a%val)) then + allocate(a%val(nz)) + allocate(a%ja(nz)) + allocate(a%irp(m+1)) + end if + + call a%set_dev() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_c_oacc_csr_allocate_mnnz +end submodule psb_c_oacc_csr_allocate_mnnz_impl diff --git a/openacc/impl/psb_c_oacc_csr_cp_from_coo.F90 b/openacc/impl/psb_c_oacc_csr_cp_from_coo.F90 new file mode 100644 index 00000000..70380c95 --- /dev/null +++ b/openacc/impl/psb_c_oacc_csr_cp_from_coo.F90 @@ -0,0 +1,26 @@ +submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_cp_from_coo_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_csr_cp_from_coo(a, b, info) + implicit none + + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_c_csr_sparse_mat%cp_from_coo(b, info) + if (info /= 0) goto 9999 + + call a%set_dev() + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_c_oacc_csr_cp_from_coo +end submodule psb_c_oacc_csr_cp_from_coo_impl diff --git a/openacc/impl/psb_c_oacc_csr_cp_from_fmt.F90 b/openacc/impl/psb_c_oacc_csr_cp_from_fmt.F90 new file mode 100644 index 00000000..7e664791 --- /dev/null +++ b/openacc/impl/psb_c_oacc_csr_cp_from_fmt.F90 @@ -0,0 +1,24 @@ +submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_cp_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_csr_cp_from_fmt(a, b, info) + implicit none + + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_c_coo_sparse_mat) + call a%cp_from_coo(b, info) + class default + call a%psb_c_csr_sparse_mat%cp_from_fmt(b, info) + if (info /= 0) return + + !$acc update device(a%val, a%ja, a%irp) + end select + + end subroutine psb_c_oacc_csr_cp_from_fmt +end submodule psb_c_oacc_csr_cp_from_fmt_impl diff --git a/openacc/impl/psb_c_oacc_csr_csmm.F90 b/openacc/impl/psb_c_oacc_csr_csmm.F90 new file mode 100644 index 00000000..c26df410 --- /dev/null +++ b/openacc/impl/psb_c_oacc_csr_csmm.F90 @@ -0,0 +1,86 @@ +submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_csmm_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_csr_csmm(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_c_oacc_csr_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + complex(psb_spk_), intent(in) :: x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i, j, m, n,k, nxy + logical :: tra + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'c_oacc_csmm' + logical, parameter :: debug = .false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalic_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) < n) then + info = 36 + call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) + goto 9999 + end if + + if (size(y,1) < m) then + info = 36 + call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) + goto 9999 + end if + + if (tra) then + call a%psb_c_csr_sparse_mat%spmm(alpha, x, beta, y, info, trans) + else + nxy = min(size(x,2), size(y,2)) + + !$acc parallel loop collapse(2) present(a, x, y) + do j = 1, nxy + do i = 1, m + y(i,j) = beta * y(i,j) + end do + end do + + !$acc parallel loop collapse(2) present(a, x, y) + do j = 1, nxy + do i = 1, n + do k = a%irp(i), a%irp(i+1) - 1 + y(a%ja(k), j) = y(a%ja(k), j) + alpha * a%val(k) * x(i, j) + end do + end do + end do + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_c_oacc_csr_csmm +end submodule psb_c_oacc_csr_csmm_impl + diff --git a/openacc/impl/psb_c_oacc_csr_csmv.F90 b/openacc/impl/psb_c_oacc_csr_csmv.F90 new file mode 100644 index 00000000..8f37efb3 --- /dev/null +++ b/openacc/impl/psb_c_oacc_csr_csmv.F90 @@ -0,0 +1,81 @@ +submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_csmv_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_csr_csmv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_c_oacc_csr_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + complex(psb_spk_), intent(in) :: x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i, j, m, n + logical :: tra + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'c_oacc_csmv' + logical, parameter :: debug = .false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalic_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) < n) then + info = 36 + call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) + goto 9999 + end if + + if (size(y,1) < m) then + info = 36 + call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) + goto 9999 + end if + + if (tra) then + call a%psb_c_csr_sparse_mat%spmm(alpha, x, beta, y, info, trans) + else + !$acc parallel loop present(a, x, y) + do i = 1, m + y(i) = beta * y(i) + end do + + !$acc parallel loop present(a, x, y) + do i = 1, n + do j = a%irp(i), a%irp(i+1) - 1 + y(a%ja(j)) = y(a%ja(j)) + alpha * a%val(j) * x(i) + end do + end do + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_c_oacc_csr_csmv +end submodule psb_c_oacc_csr_csmv_impl + diff --git a/openacc/impl/psb_c_oacc_csr_inner_vect_sv.F90 b/openacc/impl/psb_c_oacc_csr_inner_vect_sv.F90 new file mode 100644 index 00000000..2d733f48 --- /dev/null +++ b/openacc/impl/psb_c_oacc_csr_inner_vect_sv.F90 @@ -0,0 +1,83 @@ +submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_inner_vect_sv_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_c_oacc_csr_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + complex(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'c_oacc_csr_inner_vect_sv' + logical, parameter :: debug = .false. + integer(psb_ipk_) :: i + + call psb_get_erraction(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalic_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra .or. (beta /= dzero)) then + call x%sync() + call y%sync() + call a%psb_c_csr_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans) + call y%set_host() + else + select type (xx => x) + type is (psb_c_vect_oacc) + select type(yy => y) + type is (psb_c_vect_oacc) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + !$acc parallel loop present(a, xx, yy) + do i = 1, size(a%val) + yy%v(i) = alpha * a%val(i) * xx%v(a%ja(i)) + beta * yy%v(i) + end do + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_c_csr_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_c_csr_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + endif + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info, name, a_err = 'csrg_vect_sv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + end subroutine psb_c_oacc_csr_inner_vect_sv +end submodule psb_c_oacc_csr_inner_vect_sv_impl + diff --git a/openacc/impl/psb_c_oacc_csr_mold.F90 b/openacc/impl/psb_c_oacc_csr_mold.F90 new file mode 100644 index 00000000..6ee36985 --- /dev/null +++ b/openacc/impl/psb_c_oacc_csr_mold.F90 @@ -0,0 +1,35 @@ +submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_mold_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_csr_mold(a, b, info) + implicit none + class(psb_c_oacc_csr_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='csr_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b, stat=info) + end if + if (info == 0) allocate(psb_c_oacc_csr_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + + end subroutine psb_c_oacc_csr_mold +end submodule psb_c_oacc_csr_molc_impl + diff --git a/openacc/impl/psb_c_oacc_csr_mv_from_coo.F90 b/openacc/impl/psb_c_oacc_csr_mv_from_coo.F90 new file mode 100644 index 00000000..f8c5c39d --- /dev/null +++ b/openacc/impl/psb_c_oacc_csr_mv_from_coo.F90 @@ -0,0 +1,25 @@ +submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_mv_from_coo_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_csr_mv_from_coo(a, b, info) + implicit none + + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_c_csr_sparse_mat%mv_from_coo(b, info) + if (info /= 0) goto 9999 + + !$acc update device(a%val, a%ja, a%irp) + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_c_oacc_csr_mv_from_coo +end submodule psb_c_oacc_csr_mv_from_coo_impl diff --git a/openacc/impl/psb_c_oacc_csr_mv_from_fmt.F90 b/openacc/impl/psb_c_oacc_csr_mv_from_fmt.F90 new file mode 100644 index 00000000..7ba971b4 --- /dev/null +++ b/openacc/impl/psb_c_oacc_csr_mv_from_fmt.F90 @@ -0,0 +1,24 @@ +submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_mv_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_csr_mv_from_fmt(a, b, info) + implicit none + + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_c_coo_sparse_mat) + call a%mv_from_coo(b, info) + class default + call a%psb_c_csr_sparse_mat%mv_from_fmt(b, info) + if (info /= 0) return + + !$acc update device(a%val, a%ja, a%irp) + end select + + end subroutine psb_c_oacc_csr_mv_from_fmt +end submodule psb_c_oacc_csr_mv_from_fmt_impl diff --git a/openacc/impl/psb_c_oacc_csr_reallocate_nz.F90 b/openacc/impl/psb_c_oacc_csr_reallocate_nz.F90 new file mode 100644 index 00000000..92a53116 --- /dev/null +++ b/openacc/impl/psb_c_oacc_csr_reallocate_nz.F90 @@ -0,0 +1,28 @@ +submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_reallocate_nz_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_csr_reallocate_nz(nz, a) + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_oacc_csr_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_c_csr_sparse_mat%reallocate(nz) + + call a%set_dev() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_c_oacc_csr_reallocate_nz +end submodule psb_c_oacc_csr_reallocate_nz_impl diff --git a/openacc/impl/psb_c_oacc_csr_scal.F90 b/openacc/impl/psb_c_oacc_csr_scal.F90 new file mode 100644 index 00000000..5dece48b --- /dev/null +++ b/openacc/impl/psb_c_oacc_csr_scal.F90 @@ -0,0 +1,53 @@ +submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_scal_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_csr_scal(d, a, info, side) + implicit none + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i, j + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + if (present(side)) then + if (side == 'L') then + !$acc parallel loop present(a, d) + do i = 1, a%get_nrows() + do j = a%irp(i), a%irp(i+1) - 1 + a%val(j) = a%val(j) * d(i) + end do + end do + else if (side == 'R') then + !$acc parallel loop present(a, d) + do i = 1, a%get_ncols() + do j = a%irp(i), a%irp(i+1) - 1 + a%val(j) = a%val(j) * d(a%ja(j)) + end do + end do + end if + else + !$acc parallel loop present(a, d) + do i = 1, size(a%val) + a%val(i) = a%val(i) * d(i) + end do + end if + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_c_oacc_csr_scal +end submodule psb_c_oacc_csr_scal_impl diff --git a/openacc/impl/psb_c_oacc_csr_scals.F90 b/openacc/impl/psb_c_oacc_csr_scals.F90 new file mode 100644 index 00000000..aba22d43 --- /dev/null +++ b/openacc/impl/psb_c_oacc_csr_scals.F90 @@ -0,0 +1,34 @@ +submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_scals_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_csr_scals(d, a, info) + implicit none + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + !$acc parallel loop present(a) + do i = 1, size(a%val) + a%val(i) = a%val(i) * d + end do + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_c_oacc_csr_scals +end submodule psb_c_oacc_csr_scals_impl diff --git a/openacc/impl/psb_c_oacc_csr_vect_mv.F90 b/openacc/impl/psb_c_oacc_csr_vect_mv.F90 new file mode 100644 index 00000000..b4b79d56 --- /dev/null +++ b/openacc/impl/psb_c_oacc_csr_vect_mv.F90 @@ -0,0 +1,63 @@ +submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_vect_mv_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans) + implicit none + + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_oacc_csr_sparse_mat), intent(in) :: a + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: m, n + + info = psb_success_ + m = a%get_nrows() + n = a%get_ncols() + + if ((n /= size(x%v)) .or. (n /= size(y%v))) then + write(0,*) 'Size error ', m, n, size(x%v), size(y%v) + info = psb_err_invalic_mat_state_ + return + end if + + if (a%is_host()) call a%sync() + if (x%is_host()) call x%sync() + if (y%is_host()) call y%sync() + + call inner_spmv(m, n, alpha, a%val, a%ja, a%irp, x%v, beta, y%v, info) + call y%set_dev() + + contains + + subroutine inner_spmv(m, n, alpha, val, ja, irp, x, beta, y, info) + implicit none + integer(psb_ipk_) :: m, n + complex(psb_spk_), intent(in) :: alpha, beta + complex(psb_spk_) :: val(:), x(:), y(:) + integer(psb_ipk_) :: ja(:), irp(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, j, ii, isz + complex(psb_spk_) :: tmp + integer(psb_ipk_), parameter :: vsz = 256 + + info = 0 + + !$acc parallel loop vector_length(vsz) private(isz) + do ii = 1, m, vsz + isz = min(vsz, m - ii + 1) + !$acc loop independent private(tmp) + do i = ii, ii + isz - 1 + tmp = 0.0_psb_dpk_ + !$acc loop seq + do j = irp(i), irp(i + 1) - 1 + tmp = tmp + val(j) * x(ja(j)) + end do + y(i) = alpha * tmp + beta * y(i) + end do + end do + end subroutine inner_spmv + + end subroutine psb_c_oacc_csr_vect_mv +end submodule psb_c_oacc_csr_vect_mv_impl diff --git a/openacc/impl/psb_c_oacc_mlt_v.f90 b/openacc/impl/psb_c_oacc_mlt_v.f90 new file mode 100644 index 00000000..66c4e865 --- /dev/null +++ b/openacc/impl/psb_c_oacc_mlt_v.f90 @@ -0,0 +1,31 @@ + +subroutine c_oacc_mlt_v(x, y, info) + use psb_c_oacc_vect_mod, psb_protect_name => c_oacc_mlt_v + + implicit none + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_vect_oacc), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, n + + info = 0 + n = min(x%get_nrows(), y%get_nrows()) + select type(xx => x) + class is (psb_c_vect_oacc) + if (y%is_host()) call y%sync() + if (xx%is_host()) call xx%sync() + !$acc parallel loop + do i = 1, n + y%v(i) = y%v(i) * xx%v(i) + end do + call y%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (y%is_dev()) call y%sync() + do i = 1, n + y%v(i) = y%v(i) * xx%v(i) + end do + call y%set_host() + end select +end subroutine c_oacc_mlt_v diff --git a/openacc/impl/psb_c_oacc_mlt_v_2.f90 b/openacc/impl/psb_c_oacc_mlt_v_2.f90 new file mode 100644 index 00000000..a6bb6cc5 --- /dev/null +++ b/openacc/impl/psb_c_oacc_mlt_v_2.f90 @@ -0,0 +1,98 @@ +subroutine c_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) + use psb_c_oacc_vect_mod, psb_protect_name => c_oacc_mlt_v_2 + use psb_string_mod + implicit none + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + class(psb_c_vect_oacc), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + character(len=1), intent(in), optional :: conjgx, conjgy + integer(psb_ipk_) :: i, n + logical :: conjgx_, conjgy_ + + conjgx_ = .false. + conjgy_ = .false. + if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C') + if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C') + + n = min(x%get_nrows(), y%get_nrows(), z%get_nrows()) + info = 0 + select type(xx => x) + class is (psb_c_vect_oacc) + select type (yy => y) + class is (psb_c_vect_oacc) + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + if ((beta /= czero) .and. (z%is_host())) call z%sync() + if (conjgx_.and.conjgy_) then + !$acc parallel loop + do i = 1, n + z%v(i) = alpha * conjg(xx%v(i)) * conjg(yy%v(i)) + beta * z%v(i) + end do + else if (conjgx_.and.(.not.conjgy_)) then + !$acc parallel loop + do i = 1, n + z%v(i) = alpha * conjg(xx%v(i)) * (yy%v(i)) + beta * z%v(i) + end do + else if ((.not.conjgx_).and.(conjgy_)) then + !$acc parallel loop + do i = 1, n + z%v(i) = alpha * (xx%v(i)) * conjg(yy%v(i)) + beta * z%v(i) + end do + else + !$acc parallel loop + do i = 1, n + z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) + end do + + end if + call z%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (yy%is_dev()) call yy%sync() + if ((beta /= czero) .and. (z%is_dev())) call z%sync() + if (conjgx_.and.conjgy_) then + do i = 1, n + z%v(i) = alpha * conjg(xx%v(i)) * conjg(yy%v(i)) + beta * z%v(i) + end do + else if (conjgx_.and.(.not.conjgy_)) then + do i = 1, n + z%v(i) = alpha * conjg(xx%v(i)) * (yy%v(i)) + beta * z%v(i) + end do + else if ((.not.conjgx_).and.(conjgy_)) then + do i = 1, n + z%v(i) = alpha * (xx%v(i)) * conjg(yy%v(i)) + beta * z%v(i) + end do + else + do i = 1, n + z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) + end do + end if + call z%set_host() + end select + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + if ((beta /= czero) .and. (z%is_dev())) call z%sync() + if (conjgx_.and.conjgy_) then + do i = 1, n + z%v(i) = alpha * conjg(x%v(i)) * conjg(y%v(i)) + beta * z%v(i) + end do + else if (conjgx_.and.(.not.conjgy_)) then + do i = 1, n + z%v(i) = alpha * conjg(x%v(i)) * (y%v(i)) + beta * z%v(i) + end do + else if ((.not.conjgx_).and.(conjgy_)) then + do i = 1, n + z%v(i) = alpha * (x%v(i)) * conjg(y%v(i)) + beta * z%v(i) + end do + else + do i = 1, n + z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i) + end do + end if + call z%set_host() + end select +end subroutine c_oacc_mlt_v_2 + diff --git a/openacc/impl/psb_d_oacc_mlt_v.f90 b/openacc/impl/psb_d_oacc_mlt_v.f90 index ab242b57..bedd0247 100644 --- a/openacc/impl/psb_d_oacc_mlt_v.f90 +++ b/openacc/impl/psb_d_oacc_mlt_v.f90 @@ -1,34 +1,31 @@ -submodule (psb_d_oacc_vect_mod) psb_d_oacc_mlt_v_impl - use psb_string_mod -contains - module subroutine psb_d_oacc_mlt_v(x, y, info) +subroutine d_oacc_mlt_v(x, y, info) + use psb_d_oacc_vect_mod, psb_protect_name => d_oacc_mlt_v - implicit none - class(psb_d_base_vect_type), intent(inout) :: x - class(psb_d_vect_oacc), intent(inout) :: y - integer(psb_ipk_), intent(out) :: info + implicit none + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_vect_oacc), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: i, n + integer(psb_ipk_) :: i, n - info = 0 - n = min(x%get_nrows(), y%get_nrows()) - select type(xx => x) - class is (psb_d_vect_oacc) - if (y%is_host()) call y%sync() - if (xx%is_host()) call xx%sync() - !$acc parallel loop - do i = 1, n - y%v(i) = y%v(i) * xx%v(i) - end do - call y%set_dev() - class default - if (xx%is_dev()) call xx%sync() - if (y%is_dev()) call y%sync() - do i = 1, n - y%v(i) = y%v(i) * xx%v(i) - end do - call y%set_host() - end select - end subroutine psb_d_oacc_mlt_v -end submodule psb_d_oacc_mlt_v_impl + info = 0 + n = min(x%get_nrows(), y%get_nrows()) + select type(xx => x) + class is (psb_d_vect_oacc) + if (y%is_host()) call y%sync() + if (xx%is_host()) call xx%sync() + !$acc parallel loop + do i = 1, n + y%v(i) = y%v(i) * xx%v(i) + end do + call y%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (y%is_dev()) call y%sync() + do i = 1, n + y%v(i) = y%v(i) * xx%v(i) + end do + call y%set_host() + end select +end subroutine d_oacc_mlt_v diff --git a/openacc/impl/psb_d_oacc_mlt_v_2.f90 b/openacc/impl/psb_d_oacc_mlt_v_2.f90 index 4ca2bdab..e7dd604f 100644 --- a/openacc/impl/psb_d_oacc_mlt_v_2.f90 +++ b/openacc/impl/psb_d_oacc_mlt_v_2.f90 @@ -1,55 +1,98 @@ -submodule (psb_d_oacc_vect_mod) d_oacc_mlt_v_2_impl +subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) + use psb_d_oacc_vect_mod, psb_protect_name => d_oacc_mlt_v_2 use psb_string_mod -contains - module subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) - implicit none - real(psb_dpk_), intent(in) :: alpha, beta - class(psb_d_base_vect_type), intent(inout) :: x - class(psb_d_base_vect_type), intent(inout) :: y - class(psb_d_vect_oacc), intent(inout) :: z - integer(psb_ipk_), intent(out) :: info - character(len=1), intent(in), optional :: conjgx, conjgy - integer(psb_ipk_) :: i, n - logical :: conjgx_, conjgy_ + implicit none + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + class(psb_d_vect_oacc), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + character(len=1), intent(in), optional :: conjgx, conjgy + integer(psb_ipk_) :: i, n + logical :: conjgx_, conjgy_ - conjgx_ = .false. - conjgy_ = .false. - if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C') - if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C') + conjgx_ = .false. + conjgy_ = .false. + if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C') + if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C') - n = min(x%get_nrows(), y%get_nrows(), z%get_nrows()) - - info = 0 - select type(xx => x) + n = min(x%get_nrows(), y%get_nrows(), z%get_nrows()) + info = 0 + select type(xx => x) + class is (psb_d_vect_oacc) + select type (yy => y) class is (psb_d_vect_oacc) - select type (yy => y) - class is (psb_d_vect_oacc) - if (xx%is_host()) call xx%sync() - if (yy%is_host()) call yy%sync() - if ((beta /= dzero) .and. (z%is_host())) call z%sync() + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + if ((beta /= dzero) .and. (z%is_host())) call z%sync() + if (conjgx_.and.conjgy_) then + !$acc parallel loop + do i = 1, n + z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) + end do + else if (conjgx_.and.(.not.conjgy_)) then !$acc parallel loop do i = 1, n - z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) + z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) end do - call z%set_dev() - class default - if (xx%is_dev()) call xx%sync() - if (yy%is_dev()) call yy%sync() - if ((beta /= dzero) .and. (z%is_dev())) call z%sync() + else if ((.not.conjgx_).and.(conjgy_)) then + !$acc parallel loop do i = 1, n - z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) + z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) end do - call z%set_host() - end select + else + !$acc parallel loop + do i = 1, n + z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) + end do + + end if + call z%set_dev() class default - if (x%is_dev()) call x%sync() - if (y%is_dev()) call y%sync() + if (xx%is_dev()) call xx%sync() + if (yy%is_dev()) call yy%sync() if ((beta /= dzero) .and. (z%is_dev())) call z%sync() - do i = 1, n - z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i) - end do + if (conjgx_.and.conjgy_) then + do i = 1, n + z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) + end do + else if (conjgx_.and.(.not.conjgy_)) then + do i = 1, n + z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) + end do + else if ((.not.conjgx_).and.(conjgy_)) then + do i = 1, n + z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) + end do + else + do i = 1, n + z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) + end do + end if call z%set_host() end select - end subroutine d_oacc_mlt_v_2 -end submodule d_oacc_mlt_v_2_impl + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + if ((beta /= dzero) .and. (z%is_dev())) call z%sync() + if (conjgx_.and.conjgy_) then + do i = 1, n + z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i) + end do + else if (conjgx_.and.(.not.conjgy_)) then + do i = 1, n + z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i) + end do + else if ((.not.conjgx_).and.(conjgy_)) then + do i = 1, n + z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i) + end do + else + do i = 1, n + z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i) + end do + end if + call z%set_host() + end select +end subroutine d_oacc_mlt_v_2 diff --git a/openacc/impl/psb_s_oacc_csr_allocate_mnnz.F90 b/openacc/impl/psb_s_oacc_csr_allocate_mnnz.F90 new file mode 100644 index 00000000..08c51bce --- /dev/null +++ b/openacc/impl/psb_s_oacc_csr_allocate_mnnz.F90 @@ -0,0 +1,35 @@ +submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_allocate_mnnz_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_csr_allocate_mnnz(m, n, a, nz) + implicit none + integer(psb_ipk_), intent(in) :: m, n + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_s_csr_sparse_mat%allocate(m, n, nz) + + if (.not.allocated(a%val)) then + allocate(a%val(nz)) + allocate(a%ja(nz)) + allocate(a%irp(m+1)) + end if + + call a%set_dev() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_s_oacc_csr_allocate_mnnz +end submodule psb_s_oacc_csr_allocate_mnnz_impl diff --git a/openacc/impl/psb_s_oacc_csr_cp_from_coo.F90 b/openacc/impl/psb_s_oacc_csr_cp_from_coo.F90 new file mode 100644 index 00000000..94ef67b3 --- /dev/null +++ b/openacc/impl/psb_s_oacc_csr_cp_from_coo.F90 @@ -0,0 +1,26 @@ +submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_cp_from_coo_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_csr_cp_from_coo(a, b, info) + implicit none + + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_s_csr_sparse_mat%cp_from_coo(b, info) + if (info /= 0) goto 9999 + + call a%set_dev() + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_s_oacc_csr_cp_from_coo +end submodule psb_s_oacc_csr_cp_from_coo_impl diff --git a/openacc/impl/psb_s_oacc_csr_cp_from_fmt.F90 b/openacc/impl/psb_s_oacc_csr_cp_from_fmt.F90 new file mode 100644 index 00000000..2c64b5fe --- /dev/null +++ b/openacc/impl/psb_s_oacc_csr_cp_from_fmt.F90 @@ -0,0 +1,24 @@ +submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_cp_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_csr_cp_from_fmt(a, b, info) + implicit none + + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_s_coo_sparse_mat) + call a%cp_from_coo(b, info) + class default + call a%psb_s_csr_sparse_mat%cp_from_fmt(b, info) + if (info /= 0) return + + !$acc update device(a%val, a%ja, a%irp) + end select + + end subroutine psb_s_oacc_csr_cp_from_fmt +end submodule psb_s_oacc_csr_cp_from_fmt_impl diff --git a/openacc/impl/psb_s_oacc_csr_csmm.F90 b/openacc/impl/psb_s_oacc_csr_csmm.F90 new file mode 100644 index 00000000..2e7def53 --- /dev/null +++ b/openacc/impl/psb_s_oacc_csr_csmm.F90 @@ -0,0 +1,86 @@ +submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_csmm_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_csr_csmm(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_s_oacc_csr_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + real(psb_spk_), intent(in) :: x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i, j, m, n,k, nxy + logical :: tra + integer(psb_ipk_) :: err_act + character(len=20) :: name = 's_oacc_csmm' + logical, parameter :: debug = .false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalis_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) < n) then + info = 36 + call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) + goto 9999 + end if + + if (size(y,1) < m) then + info = 36 + call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) + goto 9999 + end if + + if (tra) then + call a%psb_s_csr_sparse_mat%spmm(alpha, x, beta, y, info, trans) + else + nxy = min(size(x,2), size(y,2)) + + !$acc parallel loop collapse(2) present(a, x, y) + do j = 1, nxy + do i = 1, m + y(i,j) = beta * y(i,j) + end do + end do + + !$acc parallel loop collapse(2) present(a, x, y) + do j = 1, nxy + do i = 1, n + do k = a%irp(i), a%irp(i+1) - 1 + y(a%ja(k), j) = y(a%ja(k), j) + alpha * a%val(k) * x(i, j) + end do + end do + end do + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_s_oacc_csr_csmm +end submodule psb_s_oacc_csr_csmm_impl + diff --git a/openacc/impl/psb_s_oacc_csr_csmv.F90 b/openacc/impl/psb_s_oacc_csr_csmv.F90 new file mode 100644 index 00000000..ba673941 --- /dev/null +++ b/openacc/impl/psb_s_oacc_csr_csmv.F90 @@ -0,0 +1,81 @@ +submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_csmv_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_csr_csmv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_s_oacc_csr_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + real(psb_spk_), intent(in) :: x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i, j, m, n + logical :: tra + integer(psb_ipk_) :: err_act + character(len=20) :: name = 's_oacc_csmv' + logical, parameter :: debug = .false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalis_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) < n) then + info = 36 + call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) + goto 9999 + end if + + if (size(y,1) < m) then + info = 36 + call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) + goto 9999 + end if + + if (tra) then + call a%psb_s_csr_sparse_mat%spmm(alpha, x, beta, y, info, trans) + else + !$acc parallel loop present(a, x, y) + do i = 1, m + y(i) = beta * y(i) + end do + + !$acc parallel loop present(a, x, y) + do i = 1, n + do j = a%irp(i), a%irp(i+1) - 1 + y(a%ja(j)) = y(a%ja(j)) + alpha * a%val(j) * x(i) + end do + end do + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_s_oacc_csr_csmv +end submodule psb_s_oacc_csr_csmv_impl + diff --git a/openacc/impl/psb_s_oacc_csr_inner_vect_sv.F90 b/openacc/impl/psb_s_oacc_csr_inner_vect_sv.F90 new file mode 100644 index 00000000..7af897a7 --- /dev/null +++ b/openacc/impl/psb_s_oacc_csr_inner_vect_sv.F90 @@ -0,0 +1,83 @@ +submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_inner_vect_sv_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_s_oacc_csr_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + real(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name = 's_oacc_csr_inner_vect_sv' + logical, parameter :: debug = .false. + integer(psb_ipk_) :: i + + call psb_get_erraction(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalis_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra .or. (beta /= dzero)) then + call x%sync() + call y%sync() + call a%psb_s_csr_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans) + call y%set_host() + else + select type (xx => x) + type is (psb_s_vect_oacc) + select type(yy => y) + type is (psb_s_vect_oacc) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + !$acc parallel loop present(a, xx, yy) + do i = 1, size(a%val) + yy%v(i) = alpha * a%val(i) * xx%v(a%ja(i)) + beta * yy%v(i) + end do + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_s_csr_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_s_csr_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + endif + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info, name, a_err = 'csrg_vect_sv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + end subroutine psb_s_oacc_csr_inner_vect_sv +end submodule psb_s_oacc_csr_inner_vect_sv_impl + diff --git a/openacc/impl/psb_s_oacc_csr_mold.F90 b/openacc/impl/psb_s_oacc_csr_mold.F90 new file mode 100644 index 00000000..a85471e5 --- /dev/null +++ b/openacc/impl/psb_s_oacc_csr_mold.F90 @@ -0,0 +1,35 @@ +submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_mold_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_csr_mold(a, b, info) + implicit none + class(psb_s_oacc_csr_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='csr_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b, stat=info) + end if + if (info == 0) allocate(psb_s_oacc_csr_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + + end subroutine psb_s_oacc_csr_mold +end submodule psb_s_oacc_csr_mols_impl + diff --git a/openacc/impl/psb_s_oacc_csr_mv_from_coo.F90 b/openacc/impl/psb_s_oacc_csr_mv_from_coo.F90 new file mode 100644 index 00000000..e531d309 --- /dev/null +++ b/openacc/impl/psb_s_oacc_csr_mv_from_coo.F90 @@ -0,0 +1,25 @@ +submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_mv_from_coo_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_csr_mv_from_coo(a, b, info) + implicit none + + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_s_csr_sparse_mat%mv_from_coo(b, info) + if (info /= 0) goto 9999 + + !$acc update device(a%val, a%ja, a%irp) + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_s_oacc_csr_mv_from_coo +end submodule psb_s_oacc_csr_mv_from_coo_impl diff --git a/openacc/impl/psb_s_oacc_csr_mv_from_fmt.F90 b/openacc/impl/psb_s_oacc_csr_mv_from_fmt.F90 new file mode 100644 index 00000000..a9dc0c70 --- /dev/null +++ b/openacc/impl/psb_s_oacc_csr_mv_from_fmt.F90 @@ -0,0 +1,24 @@ +submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_mv_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_csr_mv_from_fmt(a, b, info) + implicit none + + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_s_coo_sparse_mat) + call a%mv_from_coo(b, info) + class default + call a%psb_s_csr_sparse_mat%mv_from_fmt(b, info) + if (info /= 0) return + + !$acc update device(a%val, a%ja, a%irp) + end select + + end subroutine psb_s_oacc_csr_mv_from_fmt +end submodule psb_s_oacc_csr_mv_from_fmt_impl diff --git a/openacc/impl/psb_s_oacc_csr_reallocate_nz.F90 b/openacc/impl/psb_s_oacc_csr_reallocate_nz.F90 new file mode 100644 index 00000000..77c17120 --- /dev/null +++ b/openacc/impl/psb_s_oacc_csr_reallocate_nz.F90 @@ -0,0 +1,28 @@ +submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_reallocate_nz_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_csr_reallocate_nz(nz, a) + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_oacc_csr_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_s_csr_sparse_mat%reallocate(nz) + + call a%set_dev() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_s_oacc_csr_reallocate_nz +end submodule psb_s_oacc_csr_reallocate_nz_impl diff --git a/openacc/impl/psb_s_oacc_csr_scal.F90 b/openacc/impl/psb_s_oacc_csr_scal.F90 new file mode 100644 index 00000000..b9c8a986 --- /dev/null +++ b/openacc/impl/psb_s_oacc_csr_scal.F90 @@ -0,0 +1,53 @@ +submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_scal_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_csr_scal(d, a, info, side) + implicit none + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i, j + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + if (present(side)) then + if (side == 'L') then + !$acc parallel loop present(a, d) + do i = 1, a%get_nrows() + do j = a%irp(i), a%irp(i+1) - 1 + a%val(j) = a%val(j) * d(i) + end do + end do + else if (side == 'R') then + !$acc parallel loop present(a, d) + do i = 1, a%get_ncols() + do j = a%irp(i), a%irp(i+1) - 1 + a%val(j) = a%val(j) * d(a%ja(j)) + end do + end do + end if + else + !$acc parallel loop present(a, d) + do i = 1, size(a%val) + a%val(i) = a%val(i) * d(i) + end do + end if + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_s_oacc_csr_scal +end submodule psb_s_oacc_csr_scal_impl diff --git a/openacc/impl/psb_s_oacc_csr_scals.F90 b/openacc/impl/psb_s_oacc_csr_scals.F90 new file mode 100644 index 00000000..76ad7cf2 --- /dev/null +++ b/openacc/impl/psb_s_oacc_csr_scals.F90 @@ -0,0 +1,34 @@ +submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_scals_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_csr_scals(d, a, info) + implicit none + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + !$acc parallel loop present(a) + do i = 1, size(a%val) + a%val(i) = a%val(i) * d + end do + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_s_oacc_csr_scals +end submodule psb_s_oacc_csr_scals_impl diff --git a/openacc/impl/psb_s_oacc_csr_vect_mv.F90 b/openacc/impl/psb_s_oacc_csr_vect_mv.F90 new file mode 100644 index 00000000..9b15da3b --- /dev/null +++ b/openacc/impl/psb_s_oacc_csr_vect_mv.F90 @@ -0,0 +1,63 @@ +submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_vect_mv_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans) + implicit none + + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_oacc_csr_sparse_mat), intent(in) :: a + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: m, n + + info = psb_success_ + m = a%get_nrows() + n = a%get_ncols() + + if ((n /= size(x%v)) .or. (n /= size(y%v))) then + write(0,*) 'Size error ', m, n, size(x%v), size(y%v) + info = psb_err_invalis_mat_state_ + return + end if + + if (a%is_host()) call a%sync() + if (x%is_host()) call x%sync() + if (y%is_host()) call y%sync() + + call inner_spmv(m, n, alpha, a%val, a%ja, a%irp, x%v, beta, y%v, info) + call y%set_dev() + + contains + + subroutine inner_spmv(m, n, alpha, val, ja, irp, x, beta, y, info) + implicit none + integer(psb_ipk_) :: m, n + real(psb_spk_), intent(in) :: alpha, beta + real(psb_spk_) :: val(:), x(:), y(:) + integer(psb_ipk_) :: ja(:), irp(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, j, ii, isz + real(psb_spk_) :: tmp + integer(psb_ipk_), parameter :: vsz = 256 + + info = 0 + + !$acc parallel loop vector_length(vsz) private(isz) + do ii = 1, m, vsz + isz = min(vsz, m - ii + 1) + !$acc loop independent private(tmp) + do i = ii, ii + isz - 1 + tmp = 0.0_psb_dpk_ + !$acc loop seq + do j = irp(i), irp(i + 1) - 1 + tmp = tmp + val(j) * x(ja(j)) + end do + y(i) = alpha * tmp + beta * y(i) + end do + end do + end subroutine inner_spmv + + end subroutine psb_s_oacc_csr_vect_mv +end submodule psb_s_oacc_csr_vect_mv_impl diff --git a/openacc/impl/psb_s_oacc_mlt_v.f90 b/openacc/impl/psb_s_oacc_mlt_v.f90 new file mode 100644 index 00000000..fb043cf2 --- /dev/null +++ b/openacc/impl/psb_s_oacc_mlt_v.f90 @@ -0,0 +1,31 @@ + +subroutine s_oacc_mlt_v(x, y, info) + use psb_s_oacc_vect_mod, psb_protect_name => s_oacc_mlt_v + + implicit none + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_vect_oacc), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, n + + info = 0 + n = min(x%get_nrows(), y%get_nrows()) + select type(xx => x) + class is (psb_s_vect_oacc) + if (y%is_host()) call y%sync() + if (xx%is_host()) call xx%sync() + !$acc parallel loop + do i = 1, n + y%v(i) = y%v(i) * xx%v(i) + end do + call y%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (y%is_dev()) call y%sync() + do i = 1, n + y%v(i) = y%v(i) * xx%v(i) + end do + call y%set_host() + end select +end subroutine s_oacc_mlt_v diff --git a/openacc/impl/psb_s_oacc_mlt_v_2.f90 b/openacc/impl/psb_s_oacc_mlt_v_2.f90 new file mode 100644 index 00000000..04ee8e09 --- /dev/null +++ b/openacc/impl/psb_s_oacc_mlt_v_2.f90 @@ -0,0 +1,98 @@ +subroutine s_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) + use psb_s_oacc_vect_mod, psb_protect_name => s_oacc_mlt_v_2 + use psb_string_mod + implicit none + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + class(psb_s_vect_oacc), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + character(len=1), intent(in), optional :: conjgx, conjgy + integer(psb_ipk_) :: i, n + logical :: conjgx_, conjgy_ + + conjgx_ = .false. + conjgy_ = .false. + if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C') + if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C') + + n = min(x%get_nrows(), y%get_nrows(), z%get_nrows()) + info = 0 + select type(xx => x) + class is (psb_s_vect_oacc) + select type (yy => y) + class is (psb_s_vect_oacc) + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + if ((beta /= szero) .and. (z%is_host())) call z%sync() + if (conjgx_.and.conjgy_) then + !$acc parallel loop + do i = 1, n + z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) + end do + else if (conjgx_.and.(.not.conjgy_)) then + !$acc parallel loop + do i = 1, n + z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) + end do + else if ((.not.conjgx_).and.(conjgy_)) then + !$acc parallel loop + do i = 1, n + z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) + end do + else + !$acc parallel loop + do i = 1, n + z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) + end do + + end if + call z%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (yy%is_dev()) call yy%sync() + if ((beta /= szero) .and. (z%is_dev())) call z%sync() + if (conjgx_.and.conjgy_) then + do i = 1, n + z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) + end do + else if (conjgx_.and.(.not.conjgy_)) then + do i = 1, n + z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) + end do + else if ((.not.conjgx_).and.(conjgy_)) then + do i = 1, n + z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) + end do + else + do i = 1, n + z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) + end do + end if + call z%set_host() + end select + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + if ((beta /= szero) .and. (z%is_dev())) call z%sync() + if (conjgx_.and.conjgy_) then + do i = 1, n + z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i) + end do + else if (conjgx_.and.(.not.conjgy_)) then + do i = 1, n + z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i) + end do + else if ((.not.conjgx_).and.(conjgy_)) then + do i = 1, n + z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i) + end do + else + do i = 1, n + z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i) + end do + end if + call z%set_host() + end select +end subroutine s_oacc_mlt_v_2 + diff --git a/openacc/impl/psb_z_oacc_csr_allocate_mnnz.F90 b/openacc/impl/psb_z_oacc_csr_allocate_mnnz.F90 new file mode 100644 index 00000000..fd19d6f9 --- /dev/null +++ b/openacc/impl/psb_z_oacc_csr_allocate_mnnz.F90 @@ -0,0 +1,35 @@ +submodule (psb_z_oacc_csr_mat_mod) psb_z_oacc_csr_allocate_mnnz_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_csr_allocate_mnnz(m, n, a, nz) + implicit none + integer(psb_ipk_), intent(in) :: m, n + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_z_csr_sparse_mat%allocate(m, n, nz) + + if (.not.allocated(a%val)) then + allocate(a%val(nz)) + allocate(a%ja(nz)) + allocate(a%irp(m+1)) + end if + + call a%set_dev() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_z_oacc_csr_allocate_mnnz +end submodule psb_z_oacc_csr_allocate_mnnz_impl diff --git a/openacc/impl/psb_z_oacc_csr_cp_from_coo.F90 b/openacc/impl/psb_z_oacc_csr_cp_from_coo.F90 new file mode 100644 index 00000000..0485c9ca --- /dev/null +++ b/openacc/impl/psb_z_oacc_csr_cp_from_coo.F90 @@ -0,0 +1,26 @@ +submodule (psb_z_oacc_csr_mat_mod) psb_z_oacc_csr_cp_from_coo_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_csr_cp_from_coo(a, b, info) + implicit none + + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_z_csr_sparse_mat%cp_from_coo(b, info) + if (info /= 0) goto 9999 + + call a%set_dev() + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_z_oacc_csr_cp_from_coo +end submodule psb_z_oacc_csr_cp_from_coo_impl diff --git a/openacc/impl/psb_z_oacc_csr_cp_from_fmt.F90 b/openacc/impl/psb_z_oacc_csr_cp_from_fmt.F90 new file mode 100644 index 00000000..f2c68816 --- /dev/null +++ b/openacc/impl/psb_z_oacc_csr_cp_from_fmt.F90 @@ -0,0 +1,24 @@ +submodule (psb_z_oacc_csr_mat_mod) psb_z_oacc_csr_cp_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_csr_cp_from_fmt(a, b, info) + implicit none + + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_z_coo_sparse_mat) + call a%cp_from_coo(b, info) + class default + call a%psb_z_csr_sparse_mat%cp_from_fmt(b, info) + if (info /= 0) return + + !$acc update device(a%val, a%ja, a%irp) + end select + + end subroutine psb_z_oacc_csr_cp_from_fmt +end submodule psb_z_oacc_csr_cp_from_fmt_impl diff --git a/openacc/impl/psb_z_oacc_csr_csmm.F90 b/openacc/impl/psb_z_oacc_csr_csmm.F90 new file mode 100644 index 00000000..aeaaab33 --- /dev/null +++ b/openacc/impl/psb_z_oacc_csr_csmm.F90 @@ -0,0 +1,86 @@ +submodule (psb_z_oacc_csr_mat_mod) psb_z_oacc_csr_csmm_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_csr_csmm(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_z_oacc_csr_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + complex(psb_dpk_), intent(in) :: x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i, j, m, n,k, nxy + logical :: tra + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'z_oacc_csmm' + logical, parameter :: debug = .false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invaliz_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) < n) then + info = 36 + call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) + goto 9999 + end if + + if (size(y,1) < m) then + info = 36 + call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) + goto 9999 + end if + + if (tra) then + call a%psb_z_csr_sparse_mat%spmm(alpha, x, beta, y, info, trans) + else + nxy = min(size(x,2), size(y,2)) + + !$acc parallel loop collapse(2) present(a, x, y) + do j = 1, nxy + do i = 1, m + y(i,j) = beta * y(i,j) + end do + end do + + !$acc parallel loop collapse(2) present(a, x, y) + do j = 1, nxy + do i = 1, n + do k = a%irp(i), a%irp(i+1) - 1 + y(a%ja(k), j) = y(a%ja(k), j) + alpha * a%val(k) * x(i, j) + end do + end do + end do + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_z_oacc_csr_csmm +end submodule psb_z_oacc_csr_csmm_impl + diff --git a/openacc/impl/psb_z_oacc_csr_csmv.F90 b/openacc/impl/psb_z_oacc_csr_csmv.F90 new file mode 100644 index 00000000..f5501b21 --- /dev/null +++ b/openacc/impl/psb_z_oacc_csr_csmv.F90 @@ -0,0 +1,81 @@ +submodule (psb_z_oacc_csr_mat_mod) psb_z_oacc_csr_csmv_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_csr_csmv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_z_oacc_csr_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + complex(psb_dpk_), intent(in) :: x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i, j, m, n + logical :: tra + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'z_oacc_csmv' + logical, parameter :: debug = .false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invaliz_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) < n) then + info = 36 + call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) + goto 9999 + end if + + if (size(y,1) < m) then + info = 36 + call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) + goto 9999 + end if + + if (tra) then + call a%psb_z_csr_sparse_mat%spmm(alpha, x, beta, y, info, trans) + else + !$acc parallel loop present(a, x, y) + do i = 1, m + y(i) = beta * y(i) + end do + + !$acc parallel loop present(a, x, y) + do i = 1, n + do j = a%irp(i), a%irp(i+1) - 1 + y(a%ja(j)) = y(a%ja(j)) + alpha * a%val(j) * x(i) + end do + end do + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_z_oacc_csr_csmv +end submodule psb_z_oacc_csr_csmv_impl + diff --git a/openacc/impl/psb_z_oacc_csr_inner_vect_sv.F90 b/openacc/impl/psb_z_oacc_csr_inner_vect_sv.F90 new file mode 100644 index 00000000..b5d552d3 --- /dev/null +++ b/openacc/impl/psb_z_oacc_csr_inner_vect_sv.F90 @@ -0,0 +1,83 @@ +submodule (psb_z_oacc_csr_mat_mod) psb_z_oacc_csr_inner_vect_sv_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_z_oacc_csr_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + complex(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'z_oacc_csr_inner_vect_sv' + logical, parameter :: debug = .false. + integer(psb_ipk_) :: i + + call psb_get_erraction(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invaliz_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra .or. (beta /= dzero)) then + call x%sync() + call y%sync() + call a%psb_z_csr_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans) + call y%set_host() + else + select type (xx => x) + type is (psb_z_vect_oacc) + select type(yy => y) + type is (psb_z_vect_oacc) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + !$acc parallel loop present(a, xx, yy) + do i = 1, size(a%val) + yy%v(i) = alpha * a%val(i) * xx%v(a%ja(i)) + beta * yy%v(i) + end do + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_z_csr_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_z_csr_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + endif + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info, name, a_err = 'csrg_vect_sv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + end subroutine psb_z_oacc_csr_inner_vect_sv +end submodule psb_z_oacc_csr_inner_vect_sv_impl + diff --git a/openacc/impl/psb_z_oacc_csr_mold.F90 b/openacc/impl/psb_z_oacc_csr_mold.F90 new file mode 100644 index 00000000..e7e9e8b9 --- /dev/null +++ b/openacc/impl/psb_z_oacc_csr_mold.F90 @@ -0,0 +1,35 @@ +submodule (psb_z_oacc_csr_mat_mod) psb_z_oacc_csr_mold_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_csr_mold(a, b, info) + implicit none + class(psb_z_oacc_csr_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='csr_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b, stat=info) + end if + if (info == 0) allocate(psb_z_oacc_csr_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + + end subroutine psb_z_oacc_csr_mold +end submodule psb_z_oacc_csr_molz_impl + diff --git a/openacc/impl/psb_z_oacc_csr_mv_from_coo.F90 b/openacc/impl/psb_z_oacc_csr_mv_from_coo.F90 new file mode 100644 index 00000000..44b01b68 --- /dev/null +++ b/openacc/impl/psb_z_oacc_csr_mv_from_coo.F90 @@ -0,0 +1,25 @@ +submodule (psb_z_oacc_csr_mat_mod) psb_z_oacc_csr_mv_from_coo_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_csr_mv_from_coo(a, b, info) + implicit none + + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_z_csr_sparse_mat%mv_from_coo(b, info) + if (info /= 0) goto 9999 + + !$acc update device(a%val, a%ja, a%irp) + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_z_oacc_csr_mv_from_coo +end submodule psb_z_oacc_csr_mv_from_coo_impl diff --git a/openacc/impl/psb_z_oacc_csr_mv_from_fmt.F90 b/openacc/impl/psb_z_oacc_csr_mv_from_fmt.F90 new file mode 100644 index 00000000..bf777e85 --- /dev/null +++ b/openacc/impl/psb_z_oacc_csr_mv_from_fmt.F90 @@ -0,0 +1,24 @@ +submodule (psb_z_oacc_csr_mat_mod) psb_z_oacc_csr_mv_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_csr_mv_from_fmt(a, b, info) + implicit none + + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_z_coo_sparse_mat) + call a%mv_from_coo(b, info) + class default + call a%psb_z_csr_sparse_mat%mv_from_fmt(b, info) + if (info /= 0) return + + !$acc update device(a%val, a%ja, a%irp) + end select + + end subroutine psb_z_oacc_csr_mv_from_fmt +end submodule psb_z_oacc_csr_mv_from_fmt_impl diff --git a/openacc/impl/psb_z_oacc_csr_reallocate_nz.F90 b/openacc/impl/psb_z_oacc_csr_reallocate_nz.F90 new file mode 100644 index 00000000..bdfb88d6 --- /dev/null +++ b/openacc/impl/psb_z_oacc_csr_reallocate_nz.F90 @@ -0,0 +1,28 @@ +submodule (psb_z_oacc_csr_mat_mod) psb_z_oacc_csr_reallocate_nz_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_csr_reallocate_nz(nz, a) + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_oacc_csr_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_z_csr_sparse_mat%reallocate(nz) + + call a%set_dev() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_z_oacc_csr_reallocate_nz +end submodule psb_z_oacc_csr_reallocate_nz_impl diff --git a/openacc/impl/psb_z_oacc_csr_scal.F90 b/openacc/impl/psb_z_oacc_csr_scal.F90 new file mode 100644 index 00000000..f09ff595 --- /dev/null +++ b/openacc/impl/psb_z_oacc_csr_scal.F90 @@ -0,0 +1,53 @@ +submodule (psb_z_oacc_csr_mat_mod) psb_z_oacc_csr_scal_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_csr_scal(d, a, info, side) + implicit none + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i, j + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + if (present(side)) then + if (side == 'L') then + !$acc parallel loop present(a, d) + do i = 1, a%get_nrows() + do j = a%irp(i), a%irp(i+1) - 1 + a%val(j) = a%val(j) * d(i) + end do + end do + else if (side == 'R') then + !$acc parallel loop present(a, d) + do i = 1, a%get_ncols() + do j = a%irp(i), a%irp(i+1) - 1 + a%val(j) = a%val(j) * d(a%ja(j)) + end do + end do + end if + else + !$acc parallel loop present(a, d) + do i = 1, size(a%val) + a%val(i) = a%val(i) * d(i) + end do + end if + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_z_oacc_csr_scal +end submodule psb_z_oacc_csr_scal_impl diff --git a/openacc/impl/psb_z_oacc_csr_scals.F90 b/openacc/impl/psb_z_oacc_csr_scals.F90 new file mode 100644 index 00000000..1fe64951 --- /dev/null +++ b/openacc/impl/psb_z_oacc_csr_scals.F90 @@ -0,0 +1,34 @@ +submodule (psb_z_oacc_csr_mat_mod) psb_z_oacc_csr_scals_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_csr_scals(d, a, info) + implicit none + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + !$acc parallel loop present(a) + do i = 1, size(a%val) + a%val(i) = a%val(i) * d + end do + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_z_oacc_csr_scals +end submodule psb_z_oacc_csr_scals_impl diff --git a/openacc/impl/psb_z_oacc_csr_vect_mv.F90 b/openacc/impl/psb_z_oacc_csr_vect_mv.F90 new file mode 100644 index 00000000..437dd70a --- /dev/null +++ b/openacc/impl/psb_z_oacc_csr_vect_mv.F90 @@ -0,0 +1,63 @@ +submodule (psb_z_oacc_csr_mat_mod) psb_z_oacc_csr_vect_mv_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans) + implicit none + + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_oacc_csr_sparse_mat), intent(in) :: a + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: m, n + + info = psb_success_ + m = a%get_nrows() + n = a%get_ncols() + + if ((n /= size(x%v)) .or. (n /= size(y%v))) then + write(0,*) 'Size error ', m, n, size(x%v), size(y%v) + info = psb_err_invaliz_mat_state_ + return + end if + + if (a%is_host()) call a%sync() + if (x%is_host()) call x%sync() + if (y%is_host()) call y%sync() + + call inner_spmv(m, n, alpha, a%val, a%ja, a%irp, x%v, beta, y%v, info) + call y%set_dev() + + contains + + subroutine inner_spmv(m, n, alpha, val, ja, irp, x, beta, y, info) + implicit none + integer(psb_ipk_) :: m, n + complex(psb_dpk_), intent(in) :: alpha, beta + complex(psb_dpk_) :: val(:), x(:), y(:) + integer(psb_ipk_) :: ja(:), irp(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, j, ii, isz + complex(psb_dpk_) :: tmp + integer(psb_ipk_), parameter :: vsz = 256 + + info = 0 + + !$acc parallel loop vector_length(vsz) private(isz) + do ii = 1, m, vsz + isz = min(vsz, m - ii + 1) + !$acc loop independent private(tmp) + do i = ii, ii + isz - 1 + tmp = 0.0_psb_dpk_ + !$acc loop seq + do j = irp(i), irp(i + 1) - 1 + tmp = tmp + val(j) * x(ja(j)) + end do + y(i) = alpha * tmp + beta * y(i) + end do + end do + end subroutine inner_spmv + + end subroutine psb_z_oacc_csr_vect_mv +end submodule psb_z_oacc_csr_vect_mv_impl diff --git a/openacc/impl/psb_z_oacc_mlt_v.f90 b/openacc/impl/psb_z_oacc_mlt_v.f90 new file mode 100644 index 00000000..7018f009 --- /dev/null +++ b/openacc/impl/psb_z_oacc_mlt_v.f90 @@ -0,0 +1,31 @@ + +subroutine z_oacc_mlt_v(x, y, info) + use psb_z_oacc_vect_mod, psb_protect_name => z_oacc_mlt_v + + implicit none + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_vect_oacc), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, n + + info = 0 + n = min(x%get_nrows(), y%get_nrows()) + select type(xx => x) + class is (psb_z_vect_oacc) + if (y%is_host()) call y%sync() + if (xx%is_host()) call xx%sync() + !$acc parallel loop + do i = 1, n + y%v(i) = y%v(i) * xx%v(i) + end do + call y%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (y%is_dev()) call y%sync() + do i = 1, n + y%v(i) = y%v(i) * xx%v(i) + end do + call y%set_host() + end select +end subroutine z_oacc_mlt_v diff --git a/openacc/impl/psb_z_oacc_mlt_v_2.f90 b/openacc/impl/psb_z_oacc_mlt_v_2.f90 new file mode 100644 index 00000000..dbc0929c --- /dev/null +++ b/openacc/impl/psb_z_oacc_mlt_v_2.f90 @@ -0,0 +1,98 @@ +subroutine z_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) + use psb_z_oacc_vect_mod, psb_protect_name => z_oacc_mlt_v_2 + use psb_string_mod + implicit none + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + class(psb_z_vect_oacc), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + character(len=1), intent(in), optional :: conjgx, conjgy + integer(psb_ipk_) :: i, n + logical :: conjgx_, conjgy_ + + conjgx_ = .false. + conjgy_ = .false. + if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C') + if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C') + + n = min(x%get_nrows(), y%get_nrows(), z%get_nrows()) + info = 0 + select type(xx => x) + class is (psb_z_vect_oacc) + select type (yy => y) + class is (psb_z_vect_oacc) + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + if ((beta /= zzero) .and. (z%is_host())) call z%sync() + if (conjgx_.and.conjgy_) then + !$acc parallel loop + do i = 1, n + z%v(i) = alpha * conjg(xx%v(i)) * conjg(yy%v(i)) + beta * z%v(i) + end do + else if (conjgx_.and.(.not.conjgy_)) then + !$acc parallel loop + do i = 1, n + z%v(i) = alpha * conjg(xx%v(i)) * (yy%v(i)) + beta * z%v(i) + end do + else if ((.not.conjgx_).and.(conjgy_)) then + !$acc parallel loop + do i = 1, n + z%v(i) = alpha * (xx%v(i)) * conjg(yy%v(i)) + beta * z%v(i) + end do + else + !$acc parallel loop + do i = 1, n + z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) + end do + + end if + call z%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (yy%is_dev()) call yy%sync() + if ((beta /= zzero) .and. (z%is_dev())) call z%sync() + if (conjgx_.and.conjgy_) then + do i = 1, n + z%v(i) = alpha * conjg(xx%v(i)) * conjg(yy%v(i)) + beta * z%v(i) + end do + else if (conjgx_.and.(.not.conjgy_)) then + do i = 1, n + z%v(i) = alpha * conjg(xx%v(i)) * (yy%v(i)) + beta * z%v(i) + end do + else if ((.not.conjgx_).and.(conjgy_)) then + do i = 1, n + z%v(i) = alpha * (xx%v(i)) * conjg(yy%v(i)) + beta * z%v(i) + end do + else + do i = 1, n + z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) + end do + end if + call z%set_host() + end select + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + if ((beta /= zzero) .and. (z%is_dev())) call z%sync() + if (conjgx_.and.conjgy_) then + do i = 1, n + z%v(i) = alpha * conjg(x%v(i)) * conjg(y%v(i)) + beta * z%v(i) + end do + else if (conjgx_.and.(.not.conjgy_)) then + do i = 1, n + z%v(i) = alpha * conjg(x%v(i)) * (y%v(i)) + beta * z%v(i) + end do + else if ((.not.conjgx_).and.(conjgy_)) then + do i = 1, n + z%v(i) = alpha * (x%v(i)) * conjg(y%v(i)) + beta * z%v(i) + end do + else + do i = 1, n + z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i) + end do + end if + call z%set_host() + end select +end subroutine z_oacc_mlt_v_2 + diff --git a/openacc/psb_c_oacc_csr_mat_mod.F90 b/openacc/psb_c_oacc_csr_mat_mod.F90 new file mode 100644 index 00000000..00e79570 --- /dev/null +++ b/openacc/psb_c_oacc_csr_mat_mod.F90 @@ -0,0 +1,343 @@ +module psb_c_oacc_csr_mat_mod + + use iso_c_binding + use psb_c_mat_mod + use psb_c_oacc_vect_mod + !use oaccsparse_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_c_csr_sparse_mat) :: psb_c_oacc_csr_sparse_mat + integer(psb_ipk_) :: devstate = is_host + contains + procedure, nopass :: get_fmt => c_oacc_csr_get_fmt + procedure, pass(a) :: sizeof => c_oacc_csr_sizeof + procedure, pass(a) :: vect_mv => psb_c_oacc_csr_vect_mv + procedure, pass(a) :: in_vect_sv => psb_c_oacc_csr_inner_vect_sv + procedure, pass(a) :: csmm => psb_c_oacc_csr_csmm + procedure, pass(a) :: csmv => psb_c_oacc_csr_csmv + procedure, pass(a) :: scals => psb_c_oacc_csr_scals + procedure, pass(a) :: scalv => psb_c_oacc_csr_scal + procedure, pass(a) :: reallocate_nz => psb_c_oacc_csr_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_oacc_csr_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_c_oacc_csr_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_oacc_csr_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_oacc_csr_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_oacc_csr_mv_from_fmt + procedure, pass(a) :: free => c_oacc_csr_free + procedure, pass(a) :: mold => psb_c_oacc_csr_mold + procedure, pass(a) :: all => c_oacc_csr_all + procedure, pass(a) :: is_host => c_oacc_csr_is_host + procedure, pass(a) :: is_sync => c_oacc_csr_is_sync + procedure, pass(a) :: is_dev => c_oacc_csr_is_dev + procedure, pass(a) :: set_host => c_oacc_csr_set_host + procedure, pass(a) :: set_sync => c_oacc_csr_set_sync + procedure, pass(a) :: set_dev => c_oacc_csr_set_dev + procedure, pass(a) :: sync_space => c_oacc_csr_sync_space + procedure, pass(a) :: sync => c_oacc_csr_sync + end type psb_c_oacc_csr_sparse_mat + + interface + module subroutine psb_c_oacc_csr_mold(a,b,info) + class(psb_c_oacc_csr_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_csr_mold + end interface + + interface + module subroutine psb_c_oacc_csr_cp_from_fmt(a,b,info) + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_csr_cp_from_fmt + end interface + + interface + module subroutine psb_c_oacc_csr_mv_from_coo(a,b,info) + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_csr_mv_from_coo + end interface + + interface + module subroutine psb_c_oacc_csr_mv_from_fmt(a,b,info) + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_csr_mv_from_fmt + end interface + + interface + module subroutine psb_c_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans) + class(psb_c_oacc_csr_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_oacc_csr_vect_mv + end interface + + interface + module subroutine psb_c_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans) + class(psb_c_oacc_csr_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x,y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_oacc_csr_inner_vect_sv + end interface + + interface + module subroutine psb_c_oacc_csr_csmm(alpha, a, x, beta, y, info, trans) + class(psb_c_oacc_csr_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_oacc_csr_csmm + end interface + + interface + module subroutine psb_c_oacc_csr_csmv(alpha, a, x, beta, y, info, trans) + class(psb_c_oacc_csr_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_oacc_csr_csmv + end interface + + interface + module subroutine psb_c_oacc_csr_scals(d, a, info) + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_csr_scals + end interface + + interface + module subroutine psb_c_oacc_csr_scal(d,a,info,side) + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: side + end subroutine psb_c_oacc_csr_scal + end interface + + interface + module subroutine psb_c_oacc_csr_reallocate_nz(nz,a) + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + end subroutine psb_c_oacc_csr_reallocate_nz + end interface + + interface + module subroutine psb_c_oacc_csr_allocate_mnnz(m,n,a,nz) + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: m,n + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_oacc_csr_allocate_mnnz + end interface + + interface + module subroutine psb_c_oacc_csr_cp_from_coo(a,b,info) + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_csr_cp_from_coo + end interface + +contains + + + subroutine c_oacc_csr_free(a) + use psb_base_mod + implicit none + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + if (allocated(a%val)) then + !$acc exit data delete(a%val) + end if + if (allocated(a%ja)) then + !$acc exit data delete(a%ja) + end if + if (allocated(a%irp)) then + !$acc exit data delete(a%irp) + end if + + call a%psb_c_csr_sparse_mat%free() + + return + end subroutine c_oacc_csr_free + + function c_oacc_csr_sizeof(a) result(res) + implicit none + class(psb_c_oacc_csr_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + + res = 8 + res = res + (2*psb_sizeof_sp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + + end function c_oacc_csr_sizeof + + + function c_oacc_csr_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSR_oacc' + end function c_oacc_csr_get_fmt + + subroutine c_oacc_csr_all(m, n, nz, a, info) + implicit none + integer(psb_ipk_), intent(in) :: m, n, nz + class(psb_c_oacc_csr_sparse_mat), intent(out) :: a + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (allocated(a%val)) then + !$acc exit data delete(a%val) finalize + deallocate(a%val, stat=info) + end if + if (allocated(a%ja)) then + !$acc exit data delete(a%ja) finalize + deallocate(a%ja, stat=info) + end if + if (allocated(a%irp)) then + !$acc exit data delete(a%irp) finalize + deallocate(a%irp, stat=info) + end if + + call a%set_nrows(m) + call a%set_ncols(n) + + allocate(a%val(nz),stat=info) + allocate(a%ja(nz),stat=info) + allocate(a%irp(m+1),stat=info) + if (info == 0) call a%set_host() + if (info == 0) call a%sync_space() + end subroutine c_oacc_csr_all + + function c_oacc_csr_is_host(a) result(res) + implicit none + class(psb_c_oacc_csr_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function c_oacc_csr_is_host + + function c_oacc_csr_is_sync(a) result(res) + implicit none + class(psb_c_oacc_csr_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function c_oacc_csr_is_sync + + function c_oacc_csr_is_dev(a) result(res) + implicit none + class(psb_c_oacc_csr_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function c_oacc_csr_is_dev + + subroutine c_oacc_csr_set_host(a) + implicit none + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine c_oacc_csr_set_host + + subroutine c_oacc_csr_set_sync(a) + implicit none + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine c_oacc_csr_set_sync + + subroutine c_oacc_csr_set_dev(a) + implicit none + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine c_oacc_csr_set_dev + + subroutine c_oacc_csr_sync_space(a) + implicit none + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + if (allocated(a%val)) then + call c_oacc_create_dev(a%val) + end if + if (allocated(a%ja)) then + call i_oacc_create_dev(a%ja) + end if + if (allocated(a%irp)) then + call i_oacc_create_dev(a%irp) + end if + contains + subroutine c_oacc_create_dev(v) + implicit none + complex(psb_spk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine c_oacc_create_dev + subroutine i_oacc_create_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev + end subroutine c_oacc_csr_sync_space + + subroutine c_oacc_csr_sync(a) + implicit none + class(psb_c_oacc_csr_sparse_mat), target, intent(in) :: a + class(psb_c_oacc_csr_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (a%is_dev()) then + call c_oacc_csr_to_host(a%val) + call i_oacc_csr_to_host(a%ja) + call i_oacc_csr_to_host(a%irp) + else if (a%is_host()) then + call c_oacc_csr_to_dev(a%val) + call i_oacc_csr_to_dev(a%ja) + call i_oacc_csr_to_dev(a%irp) + end if + call tmpa%set_sync() + end subroutine c_oacc_csr_sync + + subroutine c_oacc_csr_to_dev(v) + implicit none + complex(psb_spk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine c_oacc_csr_to_dev + + subroutine c_oacc_csr_to_host(v) + implicit none + complex(psb_spk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine c_oacc_csr_to_host + + subroutine i_oacc_csr_to_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine i_oacc_csr_to_dev + + subroutine i_oacc_csr_to_host(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine i_oacc_csr_to_host + +end module psb_c_oacc_csr_mat_mod + diff --git a/openacc/psb_c_oacc_vect_mod.F90 b/openacc/psb_c_oacc_vect_mod.F90 new file mode 100644 index 00000000..6f9545ea --- /dev/null +++ b/openacc/psb_c_oacc_vect_mod.F90 @@ -0,0 +1,935 @@ +module psb_c_oacc_vect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_c_vect_mod + use psb_i_vect_mod + use psb_i_oacc_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_c_base_vect_type) :: psb_c_vect_oacc + integer :: state = is_host + + contains + procedure, pass(x) :: get_nrows => c_oacc_get_nrows + procedure, nopass :: get_fmt => c_oacc_get_fmt + + procedure, pass(x) :: all => c_oacc_vect_all + procedure, pass(x) :: zero => c_oacc_zero + procedure, pass(x) :: asb_m => c_oacc_asb_m + procedure, pass(x) :: sync => c_oacc_sync + procedure, pass(x) :: sync_space => c_oacc_sync_space + procedure, pass(x) :: bld_x => c_oacc_bld_x + procedure, pass(x) :: bld_mn => c_oacc_bld_mn + procedure, pass(x) :: free => c_oacc_vect_free + procedure, pass(x) :: ins_a => c_oacc_ins_a + procedure, pass(x) :: ins_v => c_oacc_ins_v + procedure, pass(x) :: is_host => c_oacc_is_host + procedure, pass(x) :: is_dev => c_oacc_is_dev + procedure, pass(x) :: is_sync => c_oacc_is_sync + procedure, pass(x) :: set_host => c_oacc_set_host + procedure, pass(x) :: set_dev => c_oacc_set_dev + procedure, pass(x) :: set_sync => c_oacc_set_sync + procedure, pass(x) :: set_scal => c_oacc_set_scal + + procedure, pass(x) :: gthzv_x => c_oacc_gthzv_x + procedure, pass(x) :: gthzbuf_x => c_oacc_gthzbuf + procedure, pass(y) :: sctb => c_oacc_sctb + procedure, pass(y) :: sctb_x => c_oacc_sctb_x + procedure, pass(y) :: sctb_buf => c_oacc_sctb_buf + + procedure, pass(x) :: get_size => c_oacc_get_size + procedure, pass(x) :: dot_v => c_oacc_vect_dot + procedure, pass(x) :: dot_a => c_oacc_dot_a + procedure, pass(y) :: axpby_v => c_oacc_axpby_v + procedure, pass(y) :: axpby_a => c_oacc_axpby_a + procedure, pass(z) :: abgdxyz => c_oacc_abgdxyz + procedure, pass(y) :: mlt_a => c_oacc_mlt_a + procedure, pass(z) :: mlt_a_2 => c_oacc_mlt_a_2 + procedure, pass(y) :: mlt_v => c_oacc_mlt_v + procedure, pass(z) :: mlt_v_2 => c_oacc_mlt_v_2 + procedure, pass(x) :: scal => c_oacc_scal + procedure, pass(x) :: nrm2 => c_oacc_nrm2 + procedure, pass(x) :: amax => c_oacc_amax + procedure, pass(x) :: asum => c_oacc_asum + procedure, pass(x) :: absval1 => c_oacc_absval1 + procedure, pass(x) :: absval2 => c_oacc_absval2 + + end type psb_c_vect_oacc + + interface + subroutine c_oacc_mlt_v(x, y, info) + import + implicit none + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_vect_oacc), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + end subroutine c_oacc_mlt_v + end interface + + + interface + subroutine c_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) + import + implicit none + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + class(psb_c_vect_oacc), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + character(len=1), intent(in), optional :: conjgx, conjgy + end subroutine c_oacc_mlt_v_2 + end interface + +contains + + subroutine c_oacc_absval1(x) + implicit none + class(psb_c_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: n, i + + if (x%is_host()) call x%sync_space() + n = size(x%v) + !$acc parallel loop + do i = 1, n + x%v(i) = abs(x%v(i)) + end do + call x%set_dev() + end subroutine c_oacc_absval1 + + subroutine c_oacc_absval2(x, y) + implicit none + class(psb_c_vect_oacc), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_) :: n + integer(psb_ipk_) :: i + + n = min(size(x%v), size(y%v)) + select type (yy => y) + class is (psb_c_vect_oacc) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + !$acc parallel loop + do i = 1, n + yy%v(i) = abs(x%v(i)) + end do + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call x%psb_c_base_vect_type%absval(y) + end select + end subroutine c_oacc_absval2 + + subroutine c_oacc_scal(alpha, x) + implicit none + class(psb_c_vect_oacc), intent(inout) :: x + complex(psb_spk_), intent(in) :: alpha + integer(psb_ipk_) :: info + integer(psb_ipk_) :: i + + if (x%is_host()) call x%sync_space() + !$acc parallel loop + do i = 1, size(x%v) + x%v(i) = alpha * x%v(i) + end do + call x%set_dev() + end subroutine c_oacc_scal + + function c_oacc_nrm2(n, x) result(res) + implicit none + class(psb_c_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + integer(psb_ipk_) :: info + real(psb_spk_) :: sum + integer(psb_ipk_) :: i + + if (x%is_host()) call x%sync_space() + sum = 0.0 + !$acc parallel loop reduction(+:sum) + do i = 1, n + sum = sum + abs(x%v(i))**2 + end do + res = sqrt(sum) + end function c_oacc_nrm2 + + function c_oacc_amax(n, x) result(res) + implicit none + class(psb_c_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + integer(psb_ipk_) :: info + real(psb_spk_) :: max_val + integer(psb_ipk_) :: i + + if (x%is_host()) call x%sync_space() + max_val = -huge(0.0) + !$acc parallel loop reduction(max:max_val) + do i = 1, n + if (abs(x%v(i)) > max_val) max_val = abs(x%v(i)) + end do + res = max_val + end function c_oacc_amax + + function c_oacc_asum(n, x) result(res) + implicit none + class(psb_c_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + integer(psb_ipk_) :: info + complex(psb_spk_) :: sum + integer(psb_ipk_) :: i + + if (x%is_host()) call x%sync_space() + sum = 0.0 + !$acc parallel loop reduction(+:sum) + do i = 1, n + sum = sum + abs(x%v(i)) + end do + res = sum + end function c_oacc_asum + + + subroutine c_oacc_mlt_a(x, y, info) + implicit none + complex(psb_spk_), intent(in) :: x(:) + class(psb_c_vect_oacc), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (y%is_dev()) call y%sync_space() + !$acc parallel loop + do i = 1, size(x) + y%v(i) = y%v(i) * x(i) + end do + call y%set_host() + end subroutine c_oacc_mlt_a + + subroutine c_oacc_mlt_a_2(alpha, x, y, beta, z, info) + implicit none + complex(psb_spk_), intent(in) :: alpha, beta + complex(psb_spk_), intent(in) :: x(:) + complex(psb_spk_), intent(in) :: y(:) + class(psb_c_vect_oacc), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (z%is_dev()) call z%sync_space() + !$acc parallel loop + do i = 1, size(x) + z%v(i) = alpha * x(i) * y(i) + beta * z%v(i) + end do + call z%set_host() + end subroutine c_oacc_mlt_a_2 + + +!!$ subroutine c_oacc_mlt_v(x, y, info) +!!$ implicit none +!!$ class(psb_c_base_vect_type), intent(inout) :: x +!!$ class(psb_c_vect_oacc), intent(inout) :: y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ n = min(x%get_nrows(), y%get_nrows()) +!!$ select type(xx => x) +!!$ type is (psb_c_base_vect_type) +!!$ if (y%is_dev()) call y%sync() +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ y%v(i) = y%v(i) * xx%v(i) +!!$ end do +!!$ call y%set_host() +!!$ class default +!!$ if (xx%is_dev()) call xx%sync() +!!$ if (y%is_dev()) call y%sync() +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ y%v(i) = y%v(i) * xx%v(i) +!!$ end do +!!$ call y%set_host() +!!$ end select +!!$ end subroutine c_oacc_mlt_v +!!$ +!!$ subroutine c_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) +!!$ use psi_serial_mod +!!$ use psb_string_mod +!!$ implicit none +!!$ complex(psb_spk_), intent(in) :: alpha, beta +!!$ class(psb_c_base_vect_type), intent(inout) :: x +!!$ class(psb_c_base_vect_type), intent(inout) :: y +!!$ class(psb_c_vect_oacc), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character(len=1), intent(in), optional :: conjgx, conjgy +!!$ integer(psb_ipk_) :: i, n +!!$ logical :: conjgx_, conjgy_ +!!$ +!!$ conjgx_ = .false. +!!$ conjgy_ = .false. +!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C') +!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C') +!!$ +!!$ n = min(x%get_nrows(), y%get_nrows(), z%get_nrows()) +!!$ +!!$ info = 0 +!!$ select type(xx => x) +!!$ class is (psb_c_vect_oacc) +!!$ select type (yy => y) +!!$ class is (psb_c_vect_oacc) +!!$ if (xx%is_host()) call xx%sync_space() +!!$ if (yy%is_host()) call yy%sync_space() +!!$ if ((beta /= czero) .and. (z%is_host())) call z%sync_space() +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) +!!$ end do +!!$ call z%set_dev() +!!$ class default +!!$ if (xx%is_dev()) call xx%sync_space() +!!$ if (yy%is_dev()) call yy%sync() +!!$ if ((beta /= czero) .and. (z%is_dev())) call z%sync_space() +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) +!!$ end do +!!$ call z%set_host() +!!$ end select +!!$ class default +!!$ if (x%is_dev()) call x%sync() +!!$ if (y%is_dev()) call y%sync() +!!$ if ((beta /= czero) .and. (z%is_dev())) call z%sync_space() +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i) +!!$ end do +!!$ call z%set_host() +!!$ end select +!!$ end subroutine c_oacc_mlt_v_2 + + + subroutine c_oacc_axpby_v(m, alpha, x, beta, y, info) + !use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_vect_oacc), intent(inout) :: y + complex(psb_spk_), intent(in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny, i + + info = psb_success_ + + select type(xx => x) + type is (psb_c_vect_oacc) + if ((beta /= czero) .and. y%is_host()) call y%sync_space() + if (xx%is_host()) call xx%sync_space() + nx = size(xx%v) + ny = size(y%v) + if ((nx < m) .or. (ny < m)) then + info = psb_err_internal_error_ + else + !$acc parallel loop + do i = 1, m + y%v(i) = alpha * xx%v(i) + beta * y%v(i) + end do + end if + call y%set_dev() + class default + if ((alpha /= czero) .and. (x%is_dev())) call x%sync() + call y%axpby(m, alpha, x%v, beta, info) + end select + end subroutine c_oacc_axpby_v + + subroutine c_oacc_axpby_a(m, alpha, x, beta, y, info) + !use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + complex(psb_spk_), intent(in) :: x(:) + class(psb_c_vect_oacc), intent(inout) :: y + complex(psb_spk_), intent(in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i + + if ((beta /= czero) .and. (y%is_dev())) call y%sync_space() + !$acc parallel loop + do i = 1, m + y%v(i) = alpha * x(i) + beta * y%v(i) + end do + call y%set_host() + end subroutine c_oacc_axpby_a + + subroutine c_oacc_abgdxyz(m, alpha, beta, gamma, delta, x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_c_base_vect_type), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + class(psb_c_vect_oacc), intent(inout) :: z + complex(psb_spk_), intent(in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny, nz, i + logical :: gpu_done + + info = psb_success_ + gpu_done = .false. + + select type(xx => x) + class is (psb_c_vect_oacc) + select type(yy => y) + class is (psb_c_vect_oacc) + select type(zz => z) + class is (psb_c_vect_oacc) + if ((beta /= czero) .and. yy%is_host()) call yy%sync_space() + if ((delta /= czero) .and. zz%is_host()) call zz%sync_space() + if (xx%is_host()) call xx%sync_space() + nx = size(xx%v) + ny = size(yy%v) + nz = size(zz%v) + if ((nx < m) .or. (ny < m) .or. (nz < m)) then + info = psb_err_internal_error_ + else + !$acc parallel loop + do i = 1, m + yy%v(i) = alpha * xx%v(i) + beta * yy%v(i) + zz%v(i) = gamma * yy%v(i) + delta * zz%v(i) + end do + end if + call yy%set_dev() + call zz%set_dev() + gpu_done = .true. + end select + end select + end select + + if (.not. gpu_done) then + if (x%is_host()) call x%sync() + if (y%is_host()) call y%sync() + if (z%is_host()) call z%sync() + call y%axpby(m, alpha, x, beta, info) + call z%axpby(m, gamma, y, delta, info) + end if + end subroutine c_oacc_abgdxyz + + subroutine c_oacc_sctb_buf(i, n, idx, beta, y) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + complex(psb_spk_) :: beta + class(psb_c_vect_oacc) :: y + integer(psb_ipk_) :: info + + if (.not.allocated(y%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') + return + end if + + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync_space(info) + if (y%is_host()) call y%sync_space() + + !$acc parallel loop + do i = 1, n + y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i) + end do + + class default + !$acc parallel loop + do i = 1, n + y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i) + end do + end select + end subroutine c_oacc_sctb_buf + + subroutine c_oacc_sctb_x(i, n, idx, x, beta, y) + use psb_base_mod + implicit none + integer(psb_ipk_):: i, n + class(psb_i_base_vect_type) :: idx + complex(psb_spk_) :: beta, x(:) + class(psb_c_vect_oacc) :: y + integer(psb_ipk_) :: info, ni + + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync_space(info) + class default + call psb_errpush(info, 'c_oacc_sctb_x') + return + end select + + if (y%is_host()) call y%sync_space() + + !$acc parallel loop + do i = 1, n + y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i) + end do + + call y%set_dev() + end subroutine c_oacc_sctb_x + + + + subroutine c_oacc_sctb(n, idx, x, beta, y) + use psb_base_mod + implicit none + integer(psb_ipk_) :: n + integer(psb_ipk_) :: idx(:) + complex(psb_spk_) :: beta, x(:) + class(psb_c_vect_oacc) :: y + integer(psb_ipk_) :: info + integer(psb_ipk_) :: i + + if (n == 0) return + if (y%is_dev()) call y%sync_space() + + !$acc parallel loop + do i = 1, n + y%v(idx(i)) = beta * y%v(idx(i)) + x(i) + end do + + call y%set_host() + end subroutine c_oacc_sctb + + + subroutine c_oacc_gthzbuf(i, n, idx, x) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + class(psb_c_vect_oacc) :: x + integer(psb_ipk_) :: info + + info = 0 + if (.not.allocated(x%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') + return + end if + + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync_space(info) + class default + call psb_errpush(info, 'c_oacc_gthzbuf') + return + end select + + if (x%is_host()) call x%sync_space() + + !$acc parallel loop + do i = 1, n + x%combuf(i) = x%v(idx%v(i)) + end do + end subroutine c_oacc_gthzbuf + + subroutine c_oacc_gthzv_x(i, n, idx, x, y) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type):: idx + complex(psb_spk_) :: y(:) + class(psb_c_vect_oacc):: x + integer(psb_ipk_) :: info + + info = 0 + + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync_space(info) + class default + call psb_errpush(info, 'c_oacc_gthzv_x') + return + end select + + if (x%is_host()) call x%sync_space() + + !$acc parallel loop + do i = 1, n + y(i) = x%v(idx%v(i)) + end do + end subroutine c_oacc_gthzv_x + + subroutine c_oacc_ins_v(n, irl, val, dupl, x, info) + use psi_serial_mod + implicit none + class(psb_c_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + class(psb_i_base_vect_type), intent(inout) :: irl + class(psb_c_base_vect_type), intent(inout) :: val + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, isz + logical :: done_oacc + + info = 0 + if (psb_errstatus_fatal()) return + + done_oacc = .false. + select type(virl => irl) + type is (psb_i_vect_oacc) + select type(vval => val) + type is (psb_c_vect_oacc) + if (vval%is_host()) call vval%sync_space() + if (virl%is_host()) call virl%sync_space(info) + if (x%is_host()) call x%sync_space() + !$acc parallel loop + do i = 1, n + x%v(virl%v(i)) = vval%v(i) + end do + call x%set_dev() + done_oacc = .true. + end select + end select + + if (.not.done_oacc) then + select type(virl => irl) + type is (psb_i_vect_oacc) + if (virl%is_dev()) call virl%sync_space(info) + end select + select type(vval => val) + type is (psb_c_vect_oacc) + if (vval%is_dev()) call vval%sync_space() + end select + call x%ins(n, irl%v, val%v, dupl, info) + end if + + if (info /= 0) then + call psb_errpush(info, 'oacc_vect_ins') + return + end if + + end subroutine c_oacc_ins_v + + + + subroutine c_oacc_ins_a(n, irl, val, dupl, x, info) + use psi_serial_mod + implicit none + class(psb_c_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + integer(psb_ipk_), intent(in) :: irl(:) + complex(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + if (x%is_dev()) call x%sync_space() + call x%psb_c_base_vect_type%ins(n, irl, val, dupl, info) + call x%set_host() + !$acc update device(x%v) + + end subroutine c_oacc_ins_a + + + + subroutine c_oacc_bld_mn(x, n) + use psb_base_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_c_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(n, info) + if (info /= 0) then + call psb_errpush(info, 'c_oacc_bld_mn', i_err=(/n, n, n, n, n/)) + end if + call x%set_host() + !$acc update device(x%v) + + end subroutine c_oacc_bld_mn + + + subroutine c_oacc_bld_x(x, this) + use psb_base_mod + implicit none + complex(psb_spk_), intent(in) :: this(:) + class(psb_c_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + + call psb_realloc(size(this), x%v, info) + if (info /= 0) then + info = psb_err_alloc_request_ + call psb_errpush(info, 'c_oacc_bld_x', & + i_err=(/size(this), izero, izero, izero, izero/)) + return + end if + + x%v(:) = this(:) + call x%set_host() + !$acc update device(x%v) + + end subroutine c_oacc_bld_x + + + subroutine c_oacc_asb_m(n, x, info) + use psb_base_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_c_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_mpk_) :: nd + + info = psb_success_ + + if (x%is_dev()) then + nd = size(x%v) + if (nd < n) then + call x%sync() + call x%psb_c_base_vect_type%asb(n, info) + if (info == psb_success_) call x%sync_space() + call x%set_host() + end if + else + if (size(x%v) < n) then + call x%psb_c_base_vect_type%asb(n, info) + if (info == psb_success_) call x%sync_space() + call x%set_host() + end if + end if + end subroutine c_oacc_asb_m + + + + subroutine c_oacc_set_scal(x, val, first, last) + class(psb_c_vect_oacc), intent(inout) :: x + complex(psb_spk_), intent(in) :: val + integer(psb_ipk_), optional :: first, last + + integer(psb_ipk_) :: first_, last_ + first_ = 1 + last_ = x%get_nrows() + if (present(first)) first_ = max(1, first) + if (present(last)) last_ = min(last, last_) + + !$acc parallel loop + do i = first_, last_ + x%v(i) = val + end do + !$acc end parallel loop + + call x%set_dev() + end subroutine c_oacc_set_scal + + + + subroutine c_oacc_zero(x) + use psi_serial_mod + implicit none + class(psb_c_vect_oacc), intent(inout) :: x + call x%set_dev() + call x%set_scal(czero) + end subroutine c_oacc_zero + + function c_oacc_get_nrows(x) result(res) + implicit none + class(psb_c_vect_oacc), intent(in) :: x + integer(psb_ipk_) :: res + + if (allocated(x%v)) res = size(x%v) + end function c_oacc_get_nrows + + function c_oacc_get_fmt() result(res) + implicit none + character(len=5) :: res + res = "cOACC" + + end function c_oacc_get_fmt + + function c_oacc_vect_dot(n, x, y) result(res) + implicit none + class(psb_c_vect_oacc), intent(inout) :: x + class(psb_c_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(in) :: n + complex(psb_spk_) :: res + complex(psb_spk_), external :: ddot + integer(psb_ipk_) :: info + integer(psb_ipk_) :: i + + res = czero + + select type(yy => y) + type is (psb_c_base_vect_type) + if (x%is_dev()) call x%sync() + res = ddot(n, x%v, 1, yy%v, 1) + type is (psb_c_vect_oacc) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + + !$acc parallel loop reduction(+:res) present(x%v, yy%v) + do i = 1, n + res = res + x%v(i) * yy%v(i) + end do + !$acc end parallel loop + + class default + call x%sync() + res = y%dot(n, x%v) + end select + + end function c_oacc_vect_dot + + + + + function c_oacc_dot_a(n, x, y) result(res) + implicit none + class(psb_c_vect_oacc), intent(inout) :: x + complex(psb_spk_), intent(in) :: y(:) + integer(psb_ipk_), intent(in) :: n + complex(psb_spk_) :: res + complex(psb_spk_), external :: ddot + + if (x%is_dev()) call x%sync() + res = ddot(n, y, 1, x%v, 1) + + end function c_oacc_dot_a + + ! subroutine c_oacc_set_vect(x,y) + ! implicit none + ! class(psb_c_vect_oacc), intent(inout) :: x + ! complex(psb_spk_), intent(in) :: y(:) + ! integer(psb_ipk_) :: info + + ! if (size(x%v) /= size(y)) then + ! call x%free(info) + ! call x%all(size(y),info) + ! end if + ! x%v(:) = y(:) + ! call x%set_host() + ! end subroutine c_oacc_set_vect + + subroutine c_oacc_to_dev(v) + implicit none + complex(psb_spk_) :: v(:) + !$acc update device(v) + end subroutine c_oacc_to_dev + + subroutine c_oacc_to_host(v) + implicit none + complex(psb_spk_) :: v(:) + !$acc update self(v) + end subroutine c_oacc_to_host + + subroutine c_oacc_sync_space(x) + implicit none + class(psb_c_vect_oacc), intent(inout) :: x + if (allocated(x%v)) then + call c_oacc_create_dev(x%v) + end if + contains + subroutine c_oacc_create_dev(v) + implicit none + complex(psb_spk_) :: v(:) + !$acc enter data copyin(v) + end subroutine c_oacc_create_dev + end subroutine c_oacc_sync_space + + subroutine c_oacc_sync(x) + implicit none + class(psb_c_vect_oacc), intent(inout) :: x + if (x%is_dev()) then + call c_oacc_to_host(x%v) + end if + if (x%is_host()) then + call c_oacc_to_dev(x%v) + end if + call x%set_sync() + end subroutine c_oacc_sync + + subroutine c_oacc_set_host(x) + implicit none + class(psb_c_vect_oacc), intent(inout) :: x + + x%state = is_host + end subroutine c_oacc_set_host + + subroutine c_oacc_set_dev(x) + implicit none + class(psb_c_vect_oacc), intent(inout) :: x + + x%state = is_dev + end subroutine c_oacc_set_dev + + subroutine c_oacc_set_sync(x) + implicit none + class(psb_c_vect_oacc), intent(inout) :: x + + x%state = is_sync + end subroutine c_oacc_set_sync + + function c_oacc_is_dev(x) result(res) + implicit none + class(psb_c_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function c_oacc_is_dev + + function c_oacc_is_host(x) result(res) + implicit none + class(psb_c_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function c_oacc_is_host + + function c_oacc_is_sync(x) result(res) + implicit none + class(psb_c_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function c_oacc_is_sync + + subroutine c_oacc_vect_all(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: n + class(psb_c_vect_oacc), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(n, x%v, info) + if (info == 0) then + call x%set_host() + !$acc enter data create(x%v) + call x%sync_space() + end if + if (info /= 0) then + info = psb_err_alloc_request_ + call psb_errpush(info, 'c_oacc_all', & + i_err=(/n, n, n, n, n/)) + end if + end subroutine c_oacc_vect_all + + + subroutine c_oacc_vect_free(x, info) + implicit none + class(psb_c_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + info = 0 + if (allocated(x%v)) then + !$acc exit data delete(x%v) finalize + deallocate(x%v, stat=info) + end if + + end subroutine c_oacc_vect_free + + function c_oacc_get_size(x) result(res) + implicit none + class(psb_c_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: res + + if (x%is_dev()) call x%sync() + res = size(x%v) + end function c_oacc_get_size + +end module psb_c_oacc_vect_mod diff --git a/openacc/psb_d_oacc_csr_mat_mod.F90 b/openacc/psb_d_oacc_csr_mat_mod.F90 index ca4199a8..8b7e111e 100644 --- a/openacc/psb_d_oacc_csr_mat_mod.F90 +++ b/openacc/psb_d_oacc_csr_mat_mod.F90 @@ -175,8 +175,6 @@ contains return end subroutine d_oacc_csr_free - - function d_oacc_csr_sizeof(a) result(res) implicit none class(psb_d_oacc_csr_sparse_mat), intent(in) :: a @@ -341,6 +339,5 @@ contains !$acc update self(v) end subroutine i_oacc_csr_to_host - end module psb_d_oacc_csr_mat_mod diff --git a/openacc/psb_d_oacc_vect_mod.F90 b/openacc/psb_d_oacc_vect_mod.F90 index 3385f1ec..7d51766d 100644 --- a/openacc/psb_d_oacc_vect_mod.F90 +++ b/openacc/psb_d_oacc_vect_mod.F90 @@ -41,7 +41,7 @@ module psb_d_oacc_vect_mod procedure, pass(y) :: sctb_x => d_oacc_sctb_x procedure, pass(y) :: sctb_buf => d_oacc_sctb_buf - procedure, pass(x) :: get_size => oacc_get_size + procedure, pass(x) :: get_size => d_oacc_get_size procedure, pass(x) :: dot_v => d_oacc_vect_dot procedure, pass(x) :: dot_a => d_oacc_dot_a procedure, pass(y) :: axpby_v => d_oacc_axpby_v @@ -49,8 +49,8 @@ module psb_d_oacc_vect_mod procedure, pass(z) :: abgdxyz => d_oacc_abgdxyz procedure, pass(y) :: mlt_a => d_oacc_mlt_a procedure, pass(z) :: mlt_a_2 => d_oacc_mlt_a_2 - procedure, pass(y) :: mlt_v => psb_d_oacc_mlt_v - procedure, pass(z) :: mlt_v_2 => psb_d_oacc_mlt_v_2 + procedure, pass(y) :: mlt_v => d_oacc_mlt_v + procedure, pass(z) :: mlt_v_2 => d_oacc_mlt_v_2 procedure, pass(x) :: scal => d_oacc_scal procedure, pass(x) :: nrm2 => d_oacc_nrm2 procedure, pass(x) :: amax => d_oacc_amax @@ -60,20 +60,20 @@ module psb_d_oacc_vect_mod end type psb_d_vect_oacc - real(psb_dpk_), allocatable :: v1(:),v2(:),p(:) - interface - module subroutine psb_d_oacc_mlt_v(x, y, info) + subroutine d_oacc_mlt_v(x, y, info) + import implicit none class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_vect_oacc), intent(inout) :: y integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_oacc_mlt_v + end subroutine d_oacc_mlt_v end interface interface - module subroutine psb_d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) + subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) + import implicit none real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x @@ -81,7 +81,7 @@ module psb_d_oacc_vect_mod class(psb_d_vect_oacc), intent(inout) :: z integer(psb_ipk_), intent(out) :: info character(len=1), intent(in), optional :: conjgx, conjgy - end subroutine psb_d_oacc_mlt_v_2 + end subroutine d_oacc_mlt_v_2 end interface contains @@ -151,7 +151,7 @@ contains sum = 0.0 !$acc parallel loop reduction(+:sum) do i = 1, n - sum = sum + x%v(i) * x%v(i) + sum = sum + abs(x%v(i))**2 end do res = sqrt(sum) end function d_oacc_nrm2 @@ -169,7 +169,7 @@ contains max_val = -huge(0.0) !$acc parallel loop reduction(max:max_val) do i = 1, n - if (x%v(i) > max_val) max_val = x%v(i) + if (abs(x%v(i)) > max_val) max_val = abs(x%v(i)) end do res = max_val end function d_oacc_amax @@ -923,41 +923,13 @@ contains end subroutine d_oacc_vect_free - function oacc_get_size(x) result(res) + function d_oacc_get_size(x) result(res) implicit none class(psb_d_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: res if (x%is_dev()) call x%sync() res = size(x%v) - end function oacc_get_size - -!!$ -!!$ subroutine initialize(N) -!!$ integer(psb_ipk_) :: N -!!$ integer(psb_ipk_) :: i -!!$ allocate(v1(N),v2(N),p(N)) -!!$ !$acc enter data create(v1,v2,p) -!!$ !$acc parallel -!!$ !$acc loop -!!$ do i=1,n -!!$ v1(i) = i -!!$ v2(i) = n+i -!!$ end do -!!$ !$acc end parallel -!!$ end subroutine initialize -!!$ subroutine finalize_dev() -!!$ !$acc exit data delete(v1,v2,p) -!!$ end subroutine finalize_dev -!!$ subroutine finalize_host() -!!$ deallocate(v1,v2,p) -!!$ end subroutine finalize_host -!!$ subroutine to_dev() -!!$ !$acc update device(v1,v2) -!!$ end subroutine to_dev -!!$ subroutine to_host() -!!$ !$acc update self(v1,v2) -!!$ end subroutine to_host -!!$ + end function d_oacc_get_size end module psb_d_oacc_vect_mod diff --git a/openacc/psb_s_oacc_csr_mat_mod.F90 b/openacc/psb_s_oacc_csr_mat_mod.F90 new file mode 100644 index 00000000..89b10d08 --- /dev/null +++ b/openacc/psb_s_oacc_csr_mat_mod.F90 @@ -0,0 +1,343 @@ +module psb_s_oacc_csr_mat_mod + + use iso_c_binding + use psb_s_mat_mod + use psb_s_oacc_vect_mod + !use oaccsparse_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_s_csr_sparse_mat) :: psb_s_oacc_csr_sparse_mat + integer(psb_ipk_) :: devstate = is_host + contains + procedure, nopass :: get_fmt => s_oacc_csr_get_fmt + procedure, pass(a) :: sizeof => s_oacc_csr_sizeof + procedure, pass(a) :: vect_mv => psb_s_oacc_csr_vect_mv + procedure, pass(a) :: in_vect_sv => psb_s_oacc_csr_inner_vect_sv + procedure, pass(a) :: csmm => psb_s_oacc_csr_csmm + procedure, pass(a) :: csmv => psb_s_oacc_csr_csmv + procedure, pass(a) :: scals => psb_s_oacc_csr_scals + procedure, pass(a) :: scalv => psb_s_oacc_csr_scal + procedure, pass(a) :: reallocate_nz => psb_s_oacc_csr_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_oacc_csr_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_s_oacc_csr_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_oacc_csr_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_oacc_csr_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_oacc_csr_mv_from_fmt + procedure, pass(a) :: free => s_oacc_csr_free + procedure, pass(a) :: mold => psb_s_oacc_csr_mold + procedure, pass(a) :: all => s_oacc_csr_all + procedure, pass(a) :: is_host => s_oacc_csr_is_host + procedure, pass(a) :: is_sync => s_oacc_csr_is_sync + procedure, pass(a) :: is_dev => s_oacc_csr_is_dev + procedure, pass(a) :: set_host => s_oacc_csr_set_host + procedure, pass(a) :: set_sync => s_oacc_csr_set_sync + procedure, pass(a) :: set_dev => s_oacc_csr_set_dev + procedure, pass(a) :: sync_space => s_oacc_csr_sync_space + procedure, pass(a) :: sync => s_oacc_csr_sync + end type psb_s_oacc_csr_sparse_mat + + interface + module subroutine psb_s_oacc_csr_mold(a,b,info) + class(psb_s_oacc_csr_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_csr_mold + end interface + + interface + module subroutine psb_s_oacc_csr_cp_from_fmt(a,b,info) + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_csr_cp_from_fmt + end interface + + interface + module subroutine psb_s_oacc_csr_mv_from_coo(a,b,info) + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_csr_mv_from_coo + end interface + + interface + module subroutine psb_s_oacc_csr_mv_from_fmt(a,b,info) + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_csr_mv_from_fmt + end interface + + interface + module subroutine psb_s_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans) + class(psb_s_oacc_csr_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_oacc_csr_vect_mv + end interface + + interface + module subroutine psb_s_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans) + class(psb_s_oacc_csr_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x,y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_oacc_csr_inner_vect_sv + end interface + + interface + module subroutine psb_s_oacc_csr_csmm(alpha, a, x, beta, y, info, trans) + class(psb_s_oacc_csr_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_oacc_csr_csmm + end interface + + interface + module subroutine psb_s_oacc_csr_csmv(alpha, a, x, beta, y, info, trans) + class(psb_s_oacc_csr_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_oacc_csr_csmv + end interface + + interface + module subroutine psb_s_oacc_csr_scals(d, a, info) + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_csr_scals + end interface + + interface + module subroutine psb_s_oacc_csr_scal(d,a,info,side) + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: side + end subroutine psb_s_oacc_csr_scal + end interface + + interface + module subroutine psb_s_oacc_csr_reallocate_nz(nz,a) + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + end subroutine psb_s_oacc_csr_reallocate_nz + end interface + + interface + module subroutine psb_s_oacc_csr_allocate_mnnz(m,n,a,nz) + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: m,n + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_oacc_csr_allocate_mnnz + end interface + + interface + module subroutine psb_s_oacc_csr_cp_from_coo(a,b,info) + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_csr_cp_from_coo + end interface + +contains + + + subroutine s_oacc_csr_free(a) + use psb_base_mod + implicit none + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + if (allocated(a%val)) then + !$acc exit data delete(a%val) + end if + if (allocated(a%ja)) then + !$acc exit data delete(a%ja) + end if + if (allocated(a%irp)) then + !$acc exit data delete(a%irp) + end if + + call a%psb_s_csr_sparse_mat%free() + + return + end subroutine s_oacc_csr_free + + function s_oacc_csr_sizeof(a) result(res) + implicit none + class(psb_s_oacc_csr_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + + res = 8 + res = res + psb_sizeof_sp * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + + end function s_oacc_csr_sizeof + + + function s_oacc_csr_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSR_oacc' + end function s_oacc_csr_get_fmt + + subroutine s_oacc_csr_all(m, n, nz, a, info) + implicit none + integer(psb_ipk_), intent(in) :: m, n, nz + class(psb_s_oacc_csr_sparse_mat), intent(out) :: a + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (allocated(a%val)) then + !$acc exit data delete(a%val) finalize + deallocate(a%val, stat=info) + end if + if (allocated(a%ja)) then + !$acc exit data delete(a%ja) finalize + deallocate(a%ja, stat=info) + end if + if (allocated(a%irp)) then + !$acc exit data delete(a%irp) finalize + deallocate(a%irp, stat=info) + end if + + call a%set_nrows(m) + call a%set_ncols(n) + + allocate(a%val(nz),stat=info) + allocate(a%ja(nz),stat=info) + allocate(a%irp(m+1),stat=info) + if (info == 0) call a%set_host() + if (info == 0) call a%sync_space() + end subroutine s_oacc_csr_all + + function s_oacc_csr_is_host(a) result(res) + implicit none + class(psb_s_oacc_csr_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function s_oacc_csr_is_host + + function s_oacc_csr_is_sync(a) result(res) + implicit none + class(psb_s_oacc_csr_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function s_oacc_csr_is_sync + + function s_oacc_csr_is_dev(a) result(res) + implicit none + class(psb_s_oacc_csr_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function s_oacc_csr_is_dev + + subroutine s_oacc_csr_set_host(a) + implicit none + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine s_oacc_csr_set_host + + subroutine s_oacc_csr_set_sync(a) + implicit none + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine s_oacc_csr_set_sync + + subroutine s_oacc_csr_set_dev(a) + implicit none + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine s_oacc_csr_set_dev + + subroutine s_oacc_csr_sync_space(a) + implicit none + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + if (allocated(a%val)) then + call s_oacc_create_dev(a%val) + end if + if (allocated(a%ja)) then + call i_oacc_create_dev(a%ja) + end if + if (allocated(a%irp)) then + call i_oacc_create_dev(a%irp) + end if + contains + subroutine s_oacc_create_dev(v) + implicit none + real(psb_spk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine s_oacc_create_dev + subroutine i_oacc_create_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev + end subroutine s_oacc_csr_sync_space + + subroutine s_oacc_csr_sync(a) + implicit none + class(psb_s_oacc_csr_sparse_mat), target, intent(in) :: a + class(psb_s_oacc_csr_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (a%is_dev()) then + call s_oacc_csr_to_host(a%val) + call i_oacc_csr_to_host(a%ja) + call i_oacc_csr_to_host(a%irp) + else if (a%is_host()) then + call s_oacc_csr_to_dev(a%val) + call i_oacc_csr_to_dev(a%ja) + call i_oacc_csr_to_dev(a%irp) + end if + call tmpa%set_sync() + end subroutine s_oacc_csr_sync + + subroutine s_oacc_csr_to_dev(v) + implicit none + real(psb_spk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine s_oacc_csr_to_dev + + subroutine s_oacc_csr_to_host(v) + implicit none + real(psb_spk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine s_oacc_csr_to_host + + subroutine i_oacc_csr_to_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine i_oacc_csr_to_dev + + subroutine i_oacc_csr_to_host(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine i_oacc_csr_to_host + +end module psb_s_oacc_csr_mat_mod + diff --git a/openacc/psb_s_oacc_vect_mod.F90 b/openacc/psb_s_oacc_vect_mod.F90 new file mode 100644 index 00000000..36ae7da8 --- /dev/null +++ b/openacc/psb_s_oacc_vect_mod.F90 @@ -0,0 +1,935 @@ +module psb_s_oacc_vect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_s_vect_mod + use psb_i_vect_mod + use psb_i_oacc_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_s_base_vect_type) :: psb_s_vect_oacc + integer :: state = is_host + + contains + procedure, pass(x) :: get_nrows => s_oacc_get_nrows + procedure, nopass :: get_fmt => s_oacc_get_fmt + + procedure, pass(x) :: all => s_oacc_vect_all + procedure, pass(x) :: zero => s_oacc_zero + procedure, pass(x) :: asb_m => s_oacc_asb_m + procedure, pass(x) :: sync => s_oacc_sync + procedure, pass(x) :: sync_space => s_oacc_sync_space + procedure, pass(x) :: bld_x => s_oacc_bld_x + procedure, pass(x) :: bld_mn => s_oacc_bld_mn + procedure, pass(x) :: free => s_oacc_vect_free + procedure, pass(x) :: ins_a => s_oacc_ins_a + procedure, pass(x) :: ins_v => s_oacc_ins_v + procedure, pass(x) :: is_host => s_oacc_is_host + procedure, pass(x) :: is_dev => s_oacc_is_dev + procedure, pass(x) :: is_sync => s_oacc_is_sync + procedure, pass(x) :: set_host => s_oacc_set_host + procedure, pass(x) :: set_dev => s_oacc_set_dev + procedure, pass(x) :: set_sync => s_oacc_set_sync + procedure, pass(x) :: set_scal => s_oacc_set_scal + + procedure, pass(x) :: gthzv_x => s_oacc_gthzv_x + procedure, pass(x) :: gthzbuf_x => s_oacc_gthzbuf + procedure, pass(y) :: sctb => s_oacc_sctb + procedure, pass(y) :: sctb_x => s_oacc_sctb_x + procedure, pass(y) :: sctb_buf => s_oacc_sctb_buf + + procedure, pass(x) :: get_size => s_oacc_get_size + procedure, pass(x) :: dot_v => s_oacc_vect_dot + procedure, pass(x) :: dot_a => s_oacc_dot_a + procedure, pass(y) :: axpby_v => s_oacc_axpby_v + procedure, pass(y) :: axpby_a => s_oacc_axpby_a + procedure, pass(z) :: abgdxyz => s_oacc_abgdxyz + procedure, pass(y) :: mlt_a => s_oacc_mlt_a + procedure, pass(z) :: mlt_a_2 => s_oacc_mlt_a_2 + procedure, pass(y) :: mlt_v => s_oacc_mlt_v + procedure, pass(z) :: mlt_v_2 => s_oacc_mlt_v_2 + procedure, pass(x) :: scal => s_oacc_scal + procedure, pass(x) :: nrm2 => s_oacc_nrm2 + procedure, pass(x) :: amax => s_oacc_amax + procedure, pass(x) :: asum => s_oacc_asum + procedure, pass(x) :: absval1 => s_oacc_absval1 + procedure, pass(x) :: absval2 => s_oacc_absval2 + + end type psb_s_vect_oacc + + interface + subroutine s_oacc_mlt_v(x, y, info) + import + implicit none + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_vect_oacc), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + end subroutine s_oacc_mlt_v + end interface + + + interface + subroutine s_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) + import + implicit none + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + class(psb_s_vect_oacc), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + character(len=1), intent(in), optional :: conjgx, conjgy + end subroutine s_oacc_mlt_v_2 + end interface + +contains + + subroutine s_oacc_absval1(x) + implicit none + class(psb_s_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: n, i + + if (x%is_host()) call x%sync_space() + n = size(x%v) + !$acc parallel loop + do i = 1, n + x%v(i) = abs(x%v(i)) + end do + call x%set_dev() + end subroutine s_oacc_absval1 + + subroutine s_oacc_absval2(x, y) + implicit none + class(psb_s_vect_oacc), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_) :: n + integer(psb_ipk_) :: i + + n = min(size(x%v), size(y%v)) + select type (yy => y) + class is (psb_s_vect_oacc) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + !$acc parallel loop + do i = 1, n + yy%v(i) = abs(x%v(i)) + end do + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call x%psb_s_base_vect_type%absval(y) + end select + end subroutine s_oacc_absval2 + + subroutine s_oacc_scal(alpha, x) + implicit none + class(psb_s_vect_oacc), intent(inout) :: x + real(psb_spk_), intent(in) :: alpha + integer(psb_ipk_) :: info + integer(psb_ipk_) :: i + + if (x%is_host()) call x%sync_space() + !$acc parallel loop + do i = 1, size(x%v) + x%v(i) = alpha * x%v(i) + end do + call x%set_dev() + end subroutine s_oacc_scal + + function s_oacc_nrm2(n, x) result(res) + implicit none + class(psb_s_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + integer(psb_ipk_) :: info + real(psb_spk_) :: sum + integer(psb_ipk_) :: i + + if (x%is_host()) call x%sync_space() + sum = 0.0 + !$acc parallel loop reduction(+:sum) + do i = 1, n + sum = sum + abs(x%v(i))**2 + end do + res = sqrt(sum) + end function s_oacc_nrm2 + + function s_oacc_amax(n, x) result(res) + implicit none + class(psb_s_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + integer(psb_ipk_) :: info + real(psb_spk_) :: max_val + integer(psb_ipk_) :: i + + if (x%is_host()) call x%sync_space() + max_val = -huge(0.0) + !$acc parallel loop reduction(max:max_val) + do i = 1, n + if (abs(x%v(i)) > max_val) max_val = abs(x%v(i)) + end do + res = max_val + end function s_oacc_amax + + function s_oacc_asum(n, x) result(res) + implicit none + class(psb_s_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + integer(psb_ipk_) :: info + real(psb_spk_) :: sum + integer(psb_ipk_) :: i + + if (x%is_host()) call x%sync_space() + sum = 0.0 + !$acc parallel loop reduction(+:sum) + do i = 1, n + sum = sum + abs(x%v(i)) + end do + res = sum + end function s_oacc_asum + + + subroutine s_oacc_mlt_a(x, y, info) + implicit none + real(psb_spk_), intent(in) :: x(:) + class(psb_s_vect_oacc), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (y%is_dev()) call y%sync_space() + !$acc parallel loop + do i = 1, size(x) + y%v(i) = y%v(i) * x(i) + end do + call y%set_host() + end subroutine s_oacc_mlt_a + + subroutine s_oacc_mlt_a_2(alpha, x, y, beta, z, info) + implicit none + real(psb_spk_), intent(in) :: alpha, beta + real(psb_spk_), intent(in) :: x(:) + real(psb_spk_), intent(in) :: y(:) + class(psb_s_vect_oacc), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (z%is_dev()) call z%sync_space() + !$acc parallel loop + do i = 1, size(x) + z%v(i) = alpha * x(i) * y(i) + beta * z%v(i) + end do + call z%set_host() + end subroutine s_oacc_mlt_a_2 + + +!!$ subroutine s_oacc_mlt_v(x, y, info) +!!$ implicit none +!!$ class(psb_s_base_vect_type), intent(inout) :: x +!!$ class(psb_s_vect_oacc), intent(inout) :: y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ n = min(x%get_nrows(), y%get_nrows()) +!!$ select type(xx => x) +!!$ type is (psb_s_base_vect_type) +!!$ if (y%is_dev()) call y%sync() +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ y%v(i) = y%v(i) * xx%v(i) +!!$ end do +!!$ call y%set_host() +!!$ class default +!!$ if (xx%is_dev()) call xx%sync() +!!$ if (y%is_dev()) call y%sync() +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ y%v(i) = y%v(i) * xx%v(i) +!!$ end do +!!$ call y%set_host() +!!$ end select +!!$ end subroutine s_oacc_mlt_v +!!$ +!!$ subroutine s_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) +!!$ use psi_serial_mod +!!$ use psb_string_mod +!!$ implicit none +!!$ real(psb_spk_), intent(in) :: alpha, beta +!!$ class(psb_s_base_vect_type), intent(inout) :: x +!!$ class(psb_s_base_vect_type), intent(inout) :: y +!!$ class(psb_s_vect_oacc), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character(len=1), intent(in), optional :: conjgx, conjgy +!!$ integer(psb_ipk_) :: i, n +!!$ logical :: conjgx_, conjgy_ +!!$ +!!$ conjgx_ = .false. +!!$ conjgy_ = .false. +!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C') +!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C') +!!$ +!!$ n = min(x%get_nrows(), y%get_nrows(), z%get_nrows()) +!!$ +!!$ info = 0 +!!$ select type(xx => x) +!!$ class is (psb_s_vect_oacc) +!!$ select type (yy => y) +!!$ class is (psb_s_vect_oacc) +!!$ if (xx%is_host()) call xx%sync_space() +!!$ if (yy%is_host()) call yy%sync_space() +!!$ if ((beta /= szero) .and. (z%is_host())) call z%sync_space() +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) +!!$ end do +!!$ call z%set_dev() +!!$ class default +!!$ if (xx%is_dev()) call xx%sync_space() +!!$ if (yy%is_dev()) call yy%sync() +!!$ if ((beta /= szero) .and. (z%is_dev())) call z%sync_space() +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) +!!$ end do +!!$ call z%set_host() +!!$ end select +!!$ class default +!!$ if (x%is_dev()) call x%sync() +!!$ if (y%is_dev()) call y%sync() +!!$ if ((beta /= szero) .and. (z%is_dev())) call z%sync_space() +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i) +!!$ end do +!!$ call z%set_host() +!!$ end select +!!$ end subroutine s_oacc_mlt_v_2 + + + subroutine s_oacc_axpby_v(m, alpha, x, beta, y, info) + !use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_vect_oacc), intent(inout) :: y + real(psb_spk_), intent(in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny, i + + info = psb_success_ + + select type(xx => x) + type is (psb_s_vect_oacc) + if ((beta /= szero) .and. y%is_host()) call y%sync_space() + if (xx%is_host()) call xx%sync_space() + nx = size(xx%v) + ny = size(y%v) + if ((nx < m) .or. (ny < m)) then + info = psb_err_internal_error_ + else + !$acc parallel loop + do i = 1, m + y%v(i) = alpha * xx%v(i) + beta * y%v(i) + end do + end if + call y%set_dev() + class default + if ((alpha /= szero) .and. (x%is_dev())) call x%sync() + call y%axpby(m, alpha, x%v, beta, info) + end select + end subroutine s_oacc_axpby_v + + subroutine s_oacc_axpby_a(m, alpha, x, beta, y, info) + !use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + real(psb_spk_), intent(in) :: x(:) + class(psb_s_vect_oacc), intent(inout) :: y + real(psb_spk_), intent(in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i + + if ((beta /= szero) .and. (y%is_dev())) call y%sync_space() + !$acc parallel loop + do i = 1, m + y%v(i) = alpha * x(i) + beta * y%v(i) + end do + call y%set_host() + end subroutine s_oacc_axpby_a + + subroutine s_oacc_abgdxyz(m, alpha, beta, gamma, delta, x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_s_base_vect_type), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + class(psb_s_vect_oacc), intent(inout) :: z + real(psb_spk_), intent(in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny, nz, i + logical :: gpu_done + + info = psb_success_ + gpu_done = .false. + + select type(xx => x) + class is (psb_s_vect_oacc) + select type(yy => y) + class is (psb_s_vect_oacc) + select type(zz => z) + class is (psb_s_vect_oacc) + if ((beta /= szero) .and. yy%is_host()) call yy%sync_space() + if ((delta /= szero) .and. zz%is_host()) call zz%sync_space() + if (xx%is_host()) call xx%sync_space() + nx = size(xx%v) + ny = size(yy%v) + nz = size(zz%v) + if ((nx < m) .or. (ny < m) .or. (nz < m)) then + info = psb_err_internal_error_ + else + !$acc parallel loop + do i = 1, m + yy%v(i) = alpha * xx%v(i) + beta * yy%v(i) + zz%v(i) = gamma * yy%v(i) + delta * zz%v(i) + end do + end if + call yy%set_dev() + call zz%set_dev() + gpu_done = .true. + end select + end select + end select + + if (.not. gpu_done) then + if (x%is_host()) call x%sync() + if (y%is_host()) call y%sync() + if (z%is_host()) call z%sync() + call y%axpby(m, alpha, x, beta, info) + call z%axpby(m, gamma, y, delta, info) + end if + end subroutine s_oacc_abgdxyz + + subroutine s_oacc_sctb_buf(i, n, idx, beta, y) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + real(psb_spk_) :: beta + class(psb_s_vect_oacc) :: y + integer(psb_ipk_) :: info + + if (.not.allocated(y%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') + return + end if + + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync_space(info) + if (y%is_host()) call y%sync_space() + + !$acc parallel loop + do i = 1, n + y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i) + end do + + class default + !$acc parallel loop + do i = 1, n + y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i) + end do + end select + end subroutine s_oacc_sctb_buf + + subroutine s_oacc_sctb_x(i, n, idx, x, beta, y) + use psb_base_mod + implicit none + integer(psb_ipk_):: i, n + class(psb_i_base_vect_type) :: idx + real(psb_spk_) :: beta, x(:) + class(psb_s_vect_oacc) :: y + integer(psb_ipk_) :: info, ni + + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync_space(info) + class default + call psb_errpush(info, 's_oacc_sctb_x') + return + end select + + if (y%is_host()) call y%sync_space() + + !$acc parallel loop + do i = 1, n + y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i) + end do + + call y%set_dev() + end subroutine s_oacc_sctb_x + + + + subroutine s_oacc_sctb(n, idx, x, beta, y) + use psb_base_mod + implicit none + integer(psb_ipk_) :: n + integer(psb_ipk_) :: idx(:) + real(psb_spk_) :: beta, x(:) + class(psb_s_vect_oacc) :: y + integer(psb_ipk_) :: info + integer(psb_ipk_) :: i + + if (n == 0) return + if (y%is_dev()) call y%sync_space() + + !$acc parallel loop + do i = 1, n + y%v(idx(i)) = beta * y%v(idx(i)) + x(i) + end do + + call y%set_host() + end subroutine s_oacc_sctb + + + subroutine s_oacc_gthzbuf(i, n, idx, x) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + class(psb_s_vect_oacc) :: x + integer(psb_ipk_) :: info + + info = 0 + if (.not.allocated(x%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') + return + end if + + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync_space(info) + class default + call psb_errpush(info, 's_oacc_gthzbuf') + return + end select + + if (x%is_host()) call x%sync_space() + + !$acc parallel loop + do i = 1, n + x%combuf(i) = x%v(idx%v(i)) + end do + end subroutine s_oacc_gthzbuf + + subroutine s_oacc_gthzv_x(i, n, idx, x, y) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type):: idx + real(psb_spk_) :: y(:) + class(psb_s_vect_oacc):: x + integer(psb_ipk_) :: info + + info = 0 + + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync_space(info) + class default + call psb_errpush(info, 's_oacc_gthzv_x') + return + end select + + if (x%is_host()) call x%sync_space() + + !$acc parallel loop + do i = 1, n + y(i) = x%v(idx%v(i)) + end do + end subroutine s_oacc_gthzv_x + + subroutine s_oacc_ins_v(n, irl, val, dupl, x, info) + use psi_serial_mod + implicit none + class(psb_s_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + class(psb_i_base_vect_type), intent(inout) :: irl + class(psb_s_base_vect_type), intent(inout) :: val + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, isz + logical :: done_oacc + + info = 0 + if (psb_errstatus_fatal()) return + + done_oacc = .false. + select type(virl => irl) + type is (psb_i_vect_oacc) + select type(vval => val) + type is (psb_s_vect_oacc) + if (vval%is_host()) call vval%sync_space() + if (virl%is_host()) call virl%sync_space(info) + if (x%is_host()) call x%sync_space() + !$acc parallel loop + do i = 1, n + x%v(virl%v(i)) = vval%v(i) + end do + call x%set_dev() + done_oacc = .true. + end select + end select + + if (.not.done_oacc) then + select type(virl => irl) + type is (psb_i_vect_oacc) + if (virl%is_dev()) call virl%sync_space(info) + end select + select type(vval => val) + type is (psb_s_vect_oacc) + if (vval%is_dev()) call vval%sync_space() + end select + call x%ins(n, irl%v, val%v, dupl, info) + end if + + if (info /= 0) then + call psb_errpush(info, 'oacc_vect_ins') + return + end if + + end subroutine s_oacc_ins_v + + + + subroutine s_oacc_ins_a(n, irl, val, dupl, x, info) + use psi_serial_mod + implicit none + class(psb_s_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + integer(psb_ipk_), intent(in) :: irl(:) + real(psb_spk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + if (x%is_dev()) call x%sync_space() + call x%psb_s_base_vect_type%ins(n, irl, val, dupl, info) + call x%set_host() + !$acc update device(x%v) + + end subroutine s_oacc_ins_a + + + + subroutine s_oacc_bld_mn(x, n) + use psb_base_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_s_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(n, info) + if (info /= 0) then + call psb_errpush(info, 's_oacc_bld_mn', i_err=(/n, n, n, n, n/)) + end if + call x%set_host() + !$acc update device(x%v) + + end subroutine s_oacc_bld_mn + + + subroutine s_oacc_bld_x(x, this) + use psb_base_mod + implicit none + real(psb_spk_), intent(in) :: this(:) + class(psb_s_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + + call psb_realloc(size(this), x%v, info) + if (info /= 0) then + info = psb_err_alloc_request_ + call psb_errpush(info, 's_oacc_bld_x', & + i_err=(/size(this), izero, izero, izero, izero/)) + return + end if + + x%v(:) = this(:) + call x%set_host() + !$acc update device(x%v) + + end subroutine s_oacc_bld_x + + + subroutine s_oacc_asb_m(n, x, info) + use psb_base_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_s_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_mpk_) :: nd + + info = psb_success_ + + if (x%is_dev()) then + nd = size(x%v) + if (nd < n) then + call x%sync() + call x%psb_s_base_vect_type%asb(n, info) + if (info == psb_success_) call x%sync_space() + call x%set_host() + end if + else + if (size(x%v) < n) then + call x%psb_s_base_vect_type%asb(n, info) + if (info == psb_success_) call x%sync_space() + call x%set_host() + end if + end if + end subroutine s_oacc_asb_m + + + + subroutine s_oacc_set_scal(x, val, first, last) + class(psb_s_vect_oacc), intent(inout) :: x + real(psb_spk_), intent(in) :: val + integer(psb_ipk_), optional :: first, last + + integer(psb_ipk_) :: first_, last_ + first_ = 1 + last_ = x%get_nrows() + if (present(first)) first_ = max(1, first) + if (present(last)) last_ = min(last, last_) + + !$acc parallel loop + do i = first_, last_ + x%v(i) = val + end do + !$acc end parallel loop + + call x%set_dev() + end subroutine s_oacc_set_scal + + + + subroutine s_oacc_zero(x) + use psi_serial_mod + implicit none + class(psb_s_vect_oacc), intent(inout) :: x + call x%set_dev() + call x%set_scal(szero) + end subroutine s_oacc_zero + + function s_oacc_get_nrows(x) result(res) + implicit none + class(psb_s_vect_oacc), intent(in) :: x + integer(psb_ipk_) :: res + + if (allocated(x%v)) res = size(x%v) + end function s_oacc_get_nrows + + function s_oacc_get_fmt() result(res) + implicit none + character(len=5) :: res + res = "sOACC" + + end function s_oacc_get_fmt + + function s_oacc_vect_dot(n, x, y) result(res) + implicit none + class(psb_s_vect_oacc), intent(inout) :: x + class(psb_s_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + real(psb_spk_), external :: ddot + integer(psb_ipk_) :: info + integer(psb_ipk_) :: i + + res = szero + + select type(yy => y) + type is (psb_s_base_vect_type) + if (x%is_dev()) call x%sync() + res = ddot(n, x%v, 1, yy%v, 1) + type is (psb_s_vect_oacc) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + + !$acc parallel loop reduction(+:res) present(x%v, yy%v) + do i = 1, n + res = res + x%v(i) * yy%v(i) + end do + !$acc end parallel loop + + class default + call x%sync() + res = y%dot(n, x%v) + end select + + end function s_oacc_vect_dot + + + + + function s_oacc_dot_a(n, x, y) result(res) + implicit none + class(psb_s_vect_oacc), intent(inout) :: x + real(psb_spk_), intent(in) :: y(:) + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + real(psb_spk_), external :: ddot + + if (x%is_dev()) call x%sync() + res = ddot(n, y, 1, x%v, 1) + + end function s_oacc_dot_a + + ! subroutine s_oacc_set_vect(x,y) + ! implicit none + ! class(psb_s_vect_oacc), intent(inout) :: x + ! real(psb_spk_), intent(in) :: y(:) + ! integer(psb_ipk_) :: info + + ! if (size(x%v) /= size(y)) then + ! call x%free(info) + ! call x%all(size(y),info) + ! end if + ! x%v(:) = y(:) + ! call x%set_host() + ! end subroutine s_oacc_set_vect + + subroutine s_oacc_to_dev(v) + implicit none + real(psb_spk_) :: v(:) + !$acc update device(v) + end subroutine s_oacc_to_dev + + subroutine s_oacc_to_host(v) + implicit none + real(psb_spk_) :: v(:) + !$acc update self(v) + end subroutine s_oacc_to_host + + subroutine s_oacc_sync_space(x) + implicit none + class(psb_s_vect_oacc), intent(inout) :: x + if (allocated(x%v)) then + call s_oacc_create_dev(x%v) + end if + contains + subroutine s_oacc_create_dev(v) + implicit none + real(psb_spk_) :: v(:) + !$acc enter data copyin(v) + end subroutine s_oacc_create_dev + end subroutine s_oacc_sync_space + + subroutine s_oacc_sync(x) + implicit none + class(psb_s_vect_oacc), intent(inout) :: x + if (x%is_dev()) then + call s_oacc_to_host(x%v) + end if + if (x%is_host()) then + call s_oacc_to_dev(x%v) + end if + call x%set_sync() + end subroutine s_oacc_sync + + subroutine s_oacc_set_host(x) + implicit none + class(psb_s_vect_oacc), intent(inout) :: x + + x%state = is_host + end subroutine s_oacc_set_host + + subroutine s_oacc_set_dev(x) + implicit none + class(psb_s_vect_oacc), intent(inout) :: x + + x%state = is_dev + end subroutine s_oacc_set_dev + + subroutine s_oacc_set_sync(x) + implicit none + class(psb_s_vect_oacc), intent(inout) :: x + + x%state = is_sync + end subroutine s_oacc_set_sync + + function s_oacc_is_dev(x) result(res) + implicit none + class(psb_s_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function s_oacc_is_dev + + function s_oacc_is_host(x) result(res) + implicit none + class(psb_s_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function s_oacc_is_host + + function s_oacc_is_sync(x) result(res) + implicit none + class(psb_s_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function s_oacc_is_sync + + subroutine s_oacc_vect_all(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: n + class(psb_s_vect_oacc), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(n, x%v, info) + if (info == 0) then + call x%set_host() + !$acc enter data create(x%v) + call x%sync_space() + end if + if (info /= 0) then + info = psb_err_alloc_request_ + call psb_errpush(info, 's_oacc_all', & + i_err=(/n, n, n, n, n/)) + end if + end subroutine s_oacc_vect_all + + + subroutine s_oacc_vect_free(x, info) + implicit none + class(psb_s_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + info = 0 + if (allocated(x%v)) then + !$acc exit data delete(x%v) finalize + deallocate(x%v, stat=info) + end if + + end subroutine s_oacc_vect_free + + function s_oacc_get_size(x) result(res) + implicit none + class(psb_s_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: res + + if (x%is_dev()) call x%sync() + res = size(x%v) + end function s_oacc_get_size + +end module psb_s_oacc_vect_mod diff --git a/openacc/psb_z_oacc_csr_mat_mod.F90 b/openacc/psb_z_oacc_csr_mat_mod.F90 new file mode 100644 index 00000000..7842d96c --- /dev/null +++ b/openacc/psb_z_oacc_csr_mat_mod.F90 @@ -0,0 +1,343 @@ +module psb_z_oacc_csr_mat_mod + + use iso_c_binding + use psb_z_mat_mod + use psb_z_oacc_vect_mod + !use oaccsparse_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_z_csr_sparse_mat) :: psb_z_oacc_csr_sparse_mat + integer(psb_ipk_) :: devstate = is_host + contains + procedure, nopass :: get_fmt => z_oacc_csr_get_fmt + procedure, pass(a) :: sizeof => z_oacc_csr_sizeof + procedure, pass(a) :: vect_mv => psb_z_oacc_csr_vect_mv + procedure, pass(a) :: in_vect_sv => psb_z_oacc_csr_inner_vect_sv + procedure, pass(a) :: csmm => psb_z_oacc_csr_csmm + procedure, pass(a) :: csmv => psb_z_oacc_csr_csmv + procedure, pass(a) :: scals => psb_z_oacc_csr_scals + procedure, pass(a) :: scalv => psb_z_oacc_csr_scal + procedure, pass(a) :: reallocate_nz => psb_z_oacc_csr_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_oacc_csr_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_z_oacc_csr_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_oacc_csr_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_oacc_csr_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_oacc_csr_mv_from_fmt + procedure, pass(a) :: free => z_oacc_csr_free + procedure, pass(a) :: mold => psb_z_oacc_csr_mold + procedure, pass(a) :: all => z_oacc_csr_all + procedure, pass(a) :: is_host => z_oacc_csr_is_host + procedure, pass(a) :: is_sync => z_oacc_csr_is_sync + procedure, pass(a) :: is_dev => z_oacc_csr_is_dev + procedure, pass(a) :: set_host => z_oacc_csr_set_host + procedure, pass(a) :: set_sync => z_oacc_csr_set_sync + procedure, pass(a) :: set_dev => z_oacc_csr_set_dev + procedure, pass(a) :: sync_space => z_oacc_csr_sync_space + procedure, pass(a) :: sync => z_oacc_csr_sync + end type psb_z_oacc_csr_sparse_mat + + interface + module subroutine psb_z_oacc_csr_mold(a,b,info) + class(psb_z_oacc_csr_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_csr_mold + end interface + + interface + module subroutine psb_z_oacc_csr_cp_from_fmt(a,b,info) + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_csr_cp_from_fmt + end interface + + interface + module subroutine psb_z_oacc_csr_mv_from_coo(a,b,info) + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_csr_mv_from_coo + end interface + + interface + module subroutine psb_z_oacc_csr_mv_from_fmt(a,b,info) + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_csr_mv_from_fmt + end interface + + interface + module subroutine psb_z_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans) + class(psb_z_oacc_csr_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_oacc_csr_vect_mv + end interface + + interface + module subroutine psb_z_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans) + class(psb_z_oacc_csr_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x,y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_oacc_csr_inner_vect_sv + end interface + + interface + module subroutine psb_z_oacc_csr_csmm(alpha, a, x, beta, y, info, trans) + class(psb_z_oacc_csr_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_oacc_csr_csmm + end interface + + interface + module subroutine psb_z_oacc_csr_csmv(alpha, a, x, beta, y, info, trans) + class(psb_z_oacc_csr_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_oacc_csr_csmv + end interface + + interface + module subroutine psb_z_oacc_csr_scals(d, a, info) + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_csr_scals + end interface + + interface + module subroutine psb_z_oacc_csr_scal(d,a,info,side) + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: side + end subroutine psb_z_oacc_csr_scal + end interface + + interface + module subroutine psb_z_oacc_csr_reallocate_nz(nz,a) + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + end subroutine psb_z_oacc_csr_reallocate_nz + end interface + + interface + module subroutine psb_z_oacc_csr_allocate_mnnz(m,n,a,nz) + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: m,n + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_oacc_csr_allocate_mnnz + end interface + + interface + module subroutine psb_z_oacc_csr_cp_from_coo(a,b,info) + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_csr_cp_from_coo + end interface + +contains + + + subroutine z_oacc_csr_free(a) + use psb_base_mod + implicit none + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + if (allocated(a%val)) then + !$acc exit data delete(a%val) + end if + if (allocated(a%ja)) then + !$acc exit data delete(a%ja) + end if + if (allocated(a%irp)) then + !$acc exit data delete(a%irp) + end if + + call a%psb_z_csr_sparse_mat%free() + + return + end subroutine z_oacc_csr_free + + function z_oacc_csr_sizeof(a) result(res) + implicit none + class(psb_z_oacc_csr_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + + res = 8 + res = res + (2*psb_sizeof_dp) * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + + end function z_oacc_csr_sizeof + + + function z_oacc_csr_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSR_oacc' + end function z_oacc_csr_get_fmt + + subroutine z_oacc_csr_all(m, n, nz, a, info) + implicit none + integer(psb_ipk_), intent(in) :: m, n, nz + class(psb_z_oacc_csr_sparse_mat), intent(out) :: a + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (allocated(a%val)) then + !$acc exit data delete(a%val) finalize + deallocate(a%val, stat=info) + end if + if (allocated(a%ja)) then + !$acc exit data delete(a%ja) finalize + deallocate(a%ja, stat=info) + end if + if (allocated(a%irp)) then + !$acc exit data delete(a%irp) finalize + deallocate(a%irp, stat=info) + end if + + call a%set_nrows(m) + call a%set_ncols(n) + + allocate(a%val(nz),stat=info) + allocate(a%ja(nz),stat=info) + allocate(a%irp(m+1),stat=info) + if (info == 0) call a%set_host() + if (info == 0) call a%sync_space() + end subroutine z_oacc_csr_all + + function z_oacc_csr_is_host(a) result(res) + implicit none + class(psb_z_oacc_csr_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function z_oacc_csr_is_host + + function z_oacc_csr_is_sync(a) result(res) + implicit none + class(psb_z_oacc_csr_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function z_oacc_csr_is_sync + + function z_oacc_csr_is_dev(a) result(res) + implicit none + class(psb_z_oacc_csr_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function z_oacc_csr_is_dev + + subroutine z_oacc_csr_set_host(a) + implicit none + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine z_oacc_csr_set_host + + subroutine z_oacc_csr_set_sync(a) + implicit none + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine z_oacc_csr_set_sync + + subroutine z_oacc_csr_set_dev(a) + implicit none + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine z_oacc_csr_set_dev + + subroutine z_oacc_csr_sync_space(a) + implicit none + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + if (allocated(a%val)) then + call z_oacc_create_dev(a%val) + end if + if (allocated(a%ja)) then + call i_oacc_create_dev(a%ja) + end if + if (allocated(a%irp)) then + call i_oacc_create_dev(a%irp) + end if + contains + subroutine z_oacc_create_dev(v) + implicit none + complex(psb_dpk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine z_oacc_create_dev + subroutine i_oacc_create_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev + end subroutine z_oacc_csr_sync_space + + subroutine z_oacc_csr_sync(a) + implicit none + class(psb_z_oacc_csr_sparse_mat), target, intent(in) :: a + class(psb_z_oacc_csr_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (a%is_dev()) then + call z_oacc_csr_to_host(a%val) + call i_oacc_csr_to_host(a%ja) + call i_oacc_csr_to_host(a%irp) + else if (a%is_host()) then + call z_oacc_csr_to_dev(a%val) + call i_oacc_csr_to_dev(a%ja) + call i_oacc_csr_to_dev(a%irp) + end if + call tmpa%set_sync() + end subroutine z_oacc_csr_sync + + subroutine z_oacc_csr_to_dev(v) + implicit none + complex(psb_dpk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine z_oacc_csr_to_dev + + subroutine z_oacc_csr_to_host(v) + implicit none + complex(psb_dpk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine z_oacc_csr_to_host + + subroutine i_oacc_csr_to_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine i_oacc_csr_to_dev + + subroutine i_oacc_csr_to_host(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine i_oacc_csr_to_host + +end module psb_z_oacc_csr_mat_mod + diff --git a/openacc/psb_z_oacc_vect_mod.F90 b/openacc/psb_z_oacc_vect_mod.F90 new file mode 100644 index 00000000..5d03b49d --- /dev/null +++ b/openacc/psb_z_oacc_vect_mod.F90 @@ -0,0 +1,935 @@ +module psb_z_oacc_vect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_z_vect_mod + use psb_i_vect_mod + use psb_i_oacc_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_z_base_vect_type) :: psb_z_vect_oacc + integer :: state = is_host + + contains + procedure, pass(x) :: get_nrows => z_oacc_get_nrows + procedure, nopass :: get_fmt => z_oacc_get_fmt + + procedure, pass(x) :: all => z_oacc_vect_all + procedure, pass(x) :: zero => z_oacc_zero + procedure, pass(x) :: asb_m => z_oacc_asb_m + procedure, pass(x) :: sync => z_oacc_sync + procedure, pass(x) :: sync_space => z_oacc_sync_space + procedure, pass(x) :: bld_x => z_oacc_bld_x + procedure, pass(x) :: bld_mn => z_oacc_bld_mn + procedure, pass(x) :: free => z_oacc_vect_free + procedure, pass(x) :: ins_a => z_oacc_ins_a + procedure, pass(x) :: ins_v => z_oacc_ins_v + procedure, pass(x) :: is_host => z_oacc_is_host + procedure, pass(x) :: is_dev => z_oacc_is_dev + procedure, pass(x) :: is_sync => z_oacc_is_sync + procedure, pass(x) :: set_host => z_oacc_set_host + procedure, pass(x) :: set_dev => z_oacc_set_dev + procedure, pass(x) :: set_sync => z_oacc_set_sync + procedure, pass(x) :: set_scal => z_oacc_set_scal + + procedure, pass(x) :: gthzv_x => z_oacc_gthzv_x + procedure, pass(x) :: gthzbuf_x => z_oacc_gthzbuf + procedure, pass(y) :: sctb => z_oacc_sctb + procedure, pass(y) :: sctb_x => z_oacc_sctb_x + procedure, pass(y) :: sctb_buf => z_oacc_sctb_buf + + procedure, pass(x) :: get_size => z_oacc_get_size + procedure, pass(x) :: dot_v => z_oacc_vect_dot + procedure, pass(x) :: dot_a => z_oacc_dot_a + procedure, pass(y) :: axpby_v => z_oacc_axpby_v + procedure, pass(y) :: axpby_a => z_oacc_axpby_a + procedure, pass(z) :: abgdxyz => z_oacc_abgdxyz + procedure, pass(y) :: mlt_a => z_oacc_mlt_a + procedure, pass(z) :: mlt_a_2 => z_oacc_mlt_a_2 + procedure, pass(y) :: mlt_v => z_oacc_mlt_v + procedure, pass(z) :: mlt_v_2 => z_oacc_mlt_v_2 + procedure, pass(x) :: scal => z_oacc_scal + procedure, pass(x) :: nrm2 => z_oacc_nrm2 + procedure, pass(x) :: amax => z_oacc_amax + procedure, pass(x) :: asum => z_oacc_asum + procedure, pass(x) :: absval1 => z_oacc_absval1 + procedure, pass(x) :: absval2 => z_oacc_absval2 + + end type psb_z_vect_oacc + + interface + subroutine z_oacc_mlt_v(x, y, info) + import + implicit none + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_vect_oacc), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + end subroutine z_oacc_mlt_v + end interface + + + interface + subroutine z_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) + import + implicit none + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + class(psb_z_vect_oacc), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + character(len=1), intent(in), optional :: conjgx, conjgy + end subroutine z_oacc_mlt_v_2 + end interface + +contains + + subroutine z_oacc_absval1(x) + implicit none + class(psb_z_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: n, i + + if (x%is_host()) call x%sync_space() + n = size(x%v) + !$acc parallel loop + do i = 1, n + x%v(i) = abs(x%v(i)) + end do + call x%set_dev() + end subroutine z_oacc_absval1 + + subroutine z_oacc_absval2(x, y) + implicit none + class(psb_z_vect_oacc), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_) :: n + integer(psb_ipk_) :: i + + n = min(size(x%v), size(y%v)) + select type (yy => y) + class is (psb_z_vect_oacc) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + !$acc parallel loop + do i = 1, n + yy%v(i) = abs(x%v(i)) + end do + class default + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call x%psb_z_base_vect_type%absval(y) + end select + end subroutine z_oacc_absval2 + + subroutine z_oacc_scal(alpha, x) + implicit none + class(psb_z_vect_oacc), intent(inout) :: x + complex(psb_dpk_), intent(in) :: alpha + integer(psb_ipk_) :: info + integer(psb_ipk_) :: i + + if (x%is_host()) call x%sync_space() + !$acc parallel loop + do i = 1, size(x%v) + x%v(i) = alpha * x%v(i) + end do + call x%set_dev() + end subroutine z_oacc_scal + + function z_oacc_nrm2(n, x) result(res) + implicit none + class(psb_z_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + real(psb_dpk_) :: sum + integer(psb_ipk_) :: i + + if (x%is_host()) call x%sync_space() + sum = 0.0 + !$acc parallel loop reduction(+:sum) + do i = 1, n + sum = sum + abs(x%v(i))**2 + end do + res = sqrt(sum) + end function z_oacc_nrm2 + + function z_oacc_amax(n, x) result(res) + implicit none + class(psb_z_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + real(psb_dpk_) :: max_val + integer(psb_ipk_) :: i + + if (x%is_host()) call x%sync_space() + max_val = -huge(0.0) + !$acc parallel loop reduction(max:max_val) + do i = 1, n + if (abs(x%v(i)) > max_val) max_val = abs(x%v(i)) + end do + res = max_val + end function z_oacc_amax + + function z_oacc_asum(n, x) result(res) + implicit none + class(psb_z_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: info + complex(psb_dpk_) :: sum + integer(psb_ipk_) :: i + + if (x%is_host()) call x%sync_space() + sum = 0.0 + !$acc parallel loop reduction(+:sum) + do i = 1, n + sum = sum + abs(x%v(i)) + end do + res = sum + end function z_oacc_asum + + + subroutine z_oacc_mlt_a(x, y, info) + implicit none + complex(psb_dpk_), intent(in) :: x(:) + class(psb_z_vect_oacc), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (y%is_dev()) call y%sync_space() + !$acc parallel loop + do i = 1, size(x) + y%v(i) = y%v(i) * x(i) + end do + call y%set_host() + end subroutine z_oacc_mlt_a + + subroutine z_oacc_mlt_a_2(alpha, x, y, beta, z, info) + implicit none + complex(psb_dpk_), intent(in) :: alpha, beta + complex(psb_dpk_), intent(in) :: x(:) + complex(psb_dpk_), intent(in) :: y(:) + class(psb_z_vect_oacc), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, n + + info = 0 + if (z%is_dev()) call z%sync_space() + !$acc parallel loop + do i = 1, size(x) + z%v(i) = alpha * x(i) * y(i) + beta * z%v(i) + end do + call z%set_host() + end subroutine z_oacc_mlt_a_2 + + +!!$ subroutine z_oacc_mlt_v(x, y, info) +!!$ implicit none +!!$ class(psb_z_base_vect_type), intent(inout) :: x +!!$ class(psb_z_vect_oacc), intent(inout) :: y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ n = min(x%get_nrows(), y%get_nrows()) +!!$ select type(xx => x) +!!$ type is (psb_z_base_vect_type) +!!$ if (y%is_dev()) call y%sync() +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ y%v(i) = y%v(i) * xx%v(i) +!!$ end do +!!$ call y%set_host() +!!$ class default +!!$ if (xx%is_dev()) call xx%sync() +!!$ if (y%is_dev()) call y%sync() +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ y%v(i) = y%v(i) * xx%v(i) +!!$ end do +!!$ call y%set_host() +!!$ end select +!!$ end subroutine z_oacc_mlt_v +!!$ +!!$ subroutine z_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) +!!$ use psi_serial_mod +!!$ use psb_string_mod +!!$ implicit none +!!$ complex(psb_dpk_), intent(in) :: alpha, beta +!!$ class(psb_z_base_vect_type), intent(inout) :: x +!!$ class(psb_z_base_vect_type), intent(inout) :: y +!!$ class(psb_z_vect_oacc), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character(len=1), intent(in), optional :: conjgx, conjgy +!!$ integer(psb_ipk_) :: i, n +!!$ logical :: conjgx_, conjgy_ +!!$ +!!$ conjgx_ = .false. +!!$ conjgy_ = .false. +!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C') +!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C') +!!$ +!!$ n = min(x%get_nrows(), y%get_nrows(), z%get_nrows()) +!!$ +!!$ info = 0 +!!$ select type(xx => x) +!!$ class is (psb_z_vect_oacc) +!!$ select type (yy => y) +!!$ class is (psb_z_vect_oacc) +!!$ if (xx%is_host()) call xx%sync_space() +!!$ if (yy%is_host()) call yy%sync_space() +!!$ if ((beta /= zzero) .and. (z%is_host())) call z%sync_space() +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) +!!$ end do +!!$ call z%set_dev() +!!$ class default +!!$ if (xx%is_dev()) call xx%sync_space() +!!$ if (yy%is_dev()) call yy%sync() +!!$ if ((beta /= zzero) .and. (z%is_dev())) call z%sync_space() +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) +!!$ end do +!!$ call z%set_host() +!!$ end select +!!$ class default +!!$ if (x%is_dev()) call x%sync() +!!$ if (y%is_dev()) call y%sync() +!!$ if ((beta /= zzero) .and. (z%is_dev())) call z%sync_space() +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i) +!!$ end do +!!$ call z%set_host() +!!$ end select +!!$ end subroutine z_oacc_mlt_v_2 + + + subroutine z_oacc_axpby_v(m, alpha, x, beta, y, info) + !use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_vect_oacc), intent(inout) :: y + complex(psb_dpk_), intent(in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny, i + + info = psb_success_ + + select type(xx => x) + type is (psb_z_vect_oacc) + if ((beta /= zzero) .and. y%is_host()) call y%sync_space() + if (xx%is_host()) call xx%sync_space() + nx = size(xx%v) + ny = size(y%v) + if ((nx < m) .or. (ny < m)) then + info = psb_err_internal_error_ + else + !$acc parallel loop + do i = 1, m + y%v(i) = alpha * xx%v(i) + beta * y%v(i) + end do + end if + call y%set_dev() + class default + if ((alpha /= zzero) .and. (x%is_dev())) call x%sync() + call y%axpby(m, alpha, x%v, beta, info) + end select + end subroutine z_oacc_axpby_v + + subroutine z_oacc_axpby_a(m, alpha, x, beta, y, info) + !use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + complex(psb_dpk_), intent(in) :: x(:) + class(psb_z_vect_oacc), intent(inout) :: y + complex(psb_dpk_), intent(in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i + + if ((beta /= zzero) .and. (y%is_dev())) call y%sync_space() + !$acc parallel loop + do i = 1, m + y%v(i) = alpha * x(i) + beta * y%v(i) + end do + call y%set_host() + end subroutine z_oacc_axpby_a + + subroutine z_oacc_abgdxyz(m, alpha, beta, gamma, delta, x, y, z, info) + use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + class(psb_z_base_vect_type), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + class(psb_z_vect_oacc), intent(inout) :: z + complex(psb_dpk_), intent(in) :: alpha, beta, gamma, delta + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: nx, ny, nz, i + logical :: gpu_done + + info = psb_success_ + gpu_done = .false. + + select type(xx => x) + class is (psb_z_vect_oacc) + select type(yy => y) + class is (psb_z_vect_oacc) + select type(zz => z) + class is (psb_z_vect_oacc) + if ((beta /= zzero) .and. yy%is_host()) call yy%sync_space() + if ((delta /= zzero) .and. zz%is_host()) call zz%sync_space() + if (xx%is_host()) call xx%sync_space() + nx = size(xx%v) + ny = size(yy%v) + nz = size(zz%v) + if ((nx < m) .or. (ny < m) .or. (nz < m)) then + info = psb_err_internal_error_ + else + !$acc parallel loop + do i = 1, m + yy%v(i) = alpha * xx%v(i) + beta * yy%v(i) + zz%v(i) = gamma * yy%v(i) + delta * zz%v(i) + end do + end if + call yy%set_dev() + call zz%set_dev() + gpu_done = .true. + end select + end select + end select + + if (.not. gpu_done) then + if (x%is_host()) call x%sync() + if (y%is_host()) call y%sync() + if (z%is_host()) call z%sync() + call y%axpby(m, alpha, x, beta, info) + call z%axpby(m, gamma, y, delta, info) + end if + end subroutine z_oacc_abgdxyz + + subroutine z_oacc_sctb_buf(i, n, idx, beta, y) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + complex(psb_dpk_) :: beta + class(psb_z_vect_oacc) :: y + integer(psb_ipk_) :: info + + if (.not.allocated(y%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') + return + end if + + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync_space(info) + if (y%is_host()) call y%sync_space() + + !$acc parallel loop + do i = 1, n + y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i) + end do + + class default + !$acc parallel loop + do i = 1, n + y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i) + end do + end select + end subroutine z_oacc_sctb_buf + + subroutine z_oacc_sctb_x(i, n, idx, x, beta, y) + use psb_base_mod + implicit none + integer(psb_ipk_):: i, n + class(psb_i_base_vect_type) :: idx + complex(psb_dpk_) :: beta, x(:) + class(psb_z_vect_oacc) :: y + integer(psb_ipk_) :: info, ni + + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync_space(info) + class default + call psb_errpush(info, 'z_oacc_sctb_x') + return + end select + + if (y%is_host()) call y%sync_space() + + !$acc parallel loop + do i = 1, n + y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i) + end do + + call y%set_dev() + end subroutine z_oacc_sctb_x + + + + subroutine z_oacc_sctb(n, idx, x, beta, y) + use psb_base_mod + implicit none + integer(psb_ipk_) :: n + integer(psb_ipk_) :: idx(:) + complex(psb_dpk_) :: beta, x(:) + class(psb_z_vect_oacc) :: y + integer(psb_ipk_) :: info + integer(psb_ipk_) :: i + + if (n == 0) return + if (y%is_dev()) call y%sync_space() + + !$acc parallel loop + do i = 1, n + y%v(idx(i)) = beta * y%v(idx(i)) + x(i) + end do + + call y%set_host() + end subroutine z_oacc_sctb + + + subroutine z_oacc_gthzbuf(i, n, idx, x) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + class(psb_z_vect_oacc) :: x + integer(psb_ipk_) :: info + + info = 0 + if (.not.allocated(x%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') + return + end if + + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync_space(info) + class default + call psb_errpush(info, 'z_oacc_gthzbuf') + return + end select + + if (x%is_host()) call x%sync_space() + + !$acc parallel loop + do i = 1, n + x%combuf(i) = x%v(idx%v(i)) + end do + end subroutine z_oacc_gthzbuf + + subroutine z_oacc_gthzv_x(i, n, idx, x, y) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type):: idx + complex(psb_dpk_) :: y(:) + class(psb_z_vect_oacc):: x + integer(psb_ipk_) :: info + + info = 0 + + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync_space(info) + class default + call psb_errpush(info, 'z_oacc_gthzv_x') + return + end select + + if (x%is_host()) call x%sync_space() + + !$acc parallel loop + do i = 1, n + y(i) = x%v(idx%v(i)) + end do + end subroutine z_oacc_gthzv_x + + subroutine z_oacc_ins_v(n, irl, val, dupl, x, info) + use psi_serial_mod + implicit none + class(psb_z_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + class(psb_i_base_vect_type), intent(inout) :: irl + class(psb_z_base_vect_type), intent(inout) :: val + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, isz + logical :: done_oacc + + info = 0 + if (psb_errstatus_fatal()) return + + done_oacc = .false. + select type(virl => irl) + type is (psb_i_vect_oacc) + select type(vval => val) + type is (psb_z_vect_oacc) + if (vval%is_host()) call vval%sync_space() + if (virl%is_host()) call virl%sync_space(info) + if (x%is_host()) call x%sync_space() + !$acc parallel loop + do i = 1, n + x%v(virl%v(i)) = vval%v(i) + end do + call x%set_dev() + done_oacc = .true. + end select + end select + + if (.not.done_oacc) then + select type(virl => irl) + type is (psb_i_vect_oacc) + if (virl%is_dev()) call virl%sync_space(info) + end select + select type(vval => val) + type is (psb_z_vect_oacc) + if (vval%is_dev()) call vval%sync_space() + end select + call x%ins(n, irl%v, val%v, dupl, info) + end if + + if (info /= 0) then + call psb_errpush(info, 'oacc_vect_ins') + return + end if + + end subroutine z_oacc_ins_v + + + + subroutine z_oacc_ins_a(n, irl, val, dupl, x, info) + use psi_serial_mod + implicit none + class(psb_z_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + integer(psb_ipk_), intent(in) :: irl(:) + complex(psb_dpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + if (x%is_dev()) call x%sync_space() + call x%psb_z_base_vect_type%ins(n, irl, val, dupl, info) + call x%set_host() + !$acc update device(x%v) + + end subroutine z_oacc_ins_a + + + + subroutine z_oacc_bld_mn(x, n) + use psb_base_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_z_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(n, info) + if (info /= 0) then + call psb_errpush(info, 'z_oacc_bld_mn', i_err=(/n, n, n, n, n/)) + end if + call x%set_host() + !$acc update device(x%v) + + end subroutine z_oacc_bld_mn + + + subroutine z_oacc_bld_x(x, this) + use psb_base_mod + implicit none + complex(psb_dpk_), intent(in) :: this(:) + class(psb_z_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + + call psb_realloc(size(this), x%v, info) + if (info /= 0) then + info = psb_err_alloc_request_ + call psb_errpush(info, 'z_oacc_bld_x', & + i_err=(/size(this), izero, izero, izero, izero/)) + return + end if + + x%v(:) = this(:) + call x%set_host() + !$acc update device(x%v) + + end subroutine z_oacc_bld_x + + + subroutine z_oacc_asb_m(n, x, info) + use psb_base_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_z_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_mpk_) :: nd + + info = psb_success_ + + if (x%is_dev()) then + nd = size(x%v) + if (nd < n) then + call x%sync() + call x%psb_z_base_vect_type%asb(n, info) + if (info == psb_success_) call x%sync_space() + call x%set_host() + end if + else + if (size(x%v) < n) then + call x%psb_z_base_vect_type%asb(n, info) + if (info == psb_success_) call x%sync_space() + call x%set_host() + end if + end if + end subroutine z_oacc_asb_m + + + + subroutine z_oacc_set_scal(x, val, first, last) + class(psb_z_vect_oacc), intent(inout) :: x + complex(psb_dpk_), intent(in) :: val + integer(psb_ipk_), optional :: first, last + + integer(psb_ipk_) :: first_, last_ + first_ = 1 + last_ = x%get_nrows() + if (present(first)) first_ = max(1, first) + if (present(last)) last_ = min(last, last_) + + !$acc parallel loop + do i = first_, last_ + x%v(i) = val + end do + !$acc end parallel loop + + call x%set_dev() + end subroutine z_oacc_set_scal + + + + subroutine z_oacc_zero(x) + use psi_serial_mod + implicit none + class(psb_z_vect_oacc), intent(inout) :: x + call x%set_dev() + call x%set_scal(zzero) + end subroutine z_oacc_zero + + function z_oacc_get_nrows(x) result(res) + implicit none + class(psb_z_vect_oacc), intent(in) :: x + integer(psb_ipk_) :: res + + if (allocated(x%v)) res = size(x%v) + end function z_oacc_get_nrows + + function z_oacc_get_fmt() result(res) + implicit none + character(len=5) :: res + res = "zOACC" + + end function z_oacc_get_fmt + + function z_oacc_vect_dot(n, x, y) result(res) + implicit none + class(psb_z_vect_oacc), intent(inout) :: x + class(psb_z_base_vect_type), intent(inout) :: y + integer(psb_ipk_), intent(in) :: n + complex(psb_dpk_) :: res + complex(psb_dpk_), external :: ddot + integer(psb_ipk_) :: info + integer(psb_ipk_) :: i + + res = zzero + + select type(yy => y) + type is (psb_z_base_vect_type) + if (x%is_dev()) call x%sync() + res = ddot(n, x%v, 1, yy%v, 1) + type is (psb_z_vect_oacc) + if (x%is_host()) call x%sync() + if (yy%is_host()) call yy%sync() + + !$acc parallel loop reduction(+:res) present(x%v, yy%v) + do i = 1, n + res = res + x%v(i) * yy%v(i) + end do + !$acc end parallel loop + + class default + call x%sync() + res = y%dot(n, x%v) + end select + + end function z_oacc_vect_dot + + + + + function z_oacc_dot_a(n, x, y) result(res) + implicit none + class(psb_z_vect_oacc), intent(inout) :: x + complex(psb_dpk_), intent(in) :: y(:) + integer(psb_ipk_), intent(in) :: n + complex(psb_dpk_) :: res + complex(psb_dpk_), external :: ddot + + if (x%is_dev()) call x%sync() + res = ddot(n, y, 1, x%v, 1) + + end function z_oacc_dot_a + + ! subroutine z_oacc_set_vect(x,y) + ! implicit none + ! class(psb_z_vect_oacc), intent(inout) :: x + ! complex(psb_dpk_), intent(in) :: y(:) + ! integer(psb_ipk_) :: info + + ! if (size(x%v) /= size(y)) then + ! call x%free(info) + ! call x%all(size(y),info) + ! end if + ! x%v(:) = y(:) + ! call x%set_host() + ! end subroutine z_oacc_set_vect + + subroutine z_oacc_to_dev(v) + implicit none + complex(psb_dpk_) :: v(:) + !$acc update device(v) + end subroutine z_oacc_to_dev + + subroutine z_oacc_to_host(v) + implicit none + complex(psb_dpk_) :: v(:) + !$acc update self(v) + end subroutine z_oacc_to_host + + subroutine z_oacc_sync_space(x) + implicit none + class(psb_z_vect_oacc), intent(inout) :: x + if (allocated(x%v)) then + call z_oacc_create_dev(x%v) + end if + contains + subroutine z_oacc_create_dev(v) + implicit none + complex(psb_dpk_) :: v(:) + !$acc enter data copyin(v) + end subroutine z_oacc_create_dev + end subroutine z_oacc_sync_space + + subroutine z_oacc_sync(x) + implicit none + class(psb_z_vect_oacc), intent(inout) :: x + if (x%is_dev()) then + call z_oacc_to_host(x%v) + end if + if (x%is_host()) then + call z_oacc_to_dev(x%v) + end if + call x%set_sync() + end subroutine z_oacc_sync + + subroutine z_oacc_set_host(x) + implicit none + class(psb_z_vect_oacc), intent(inout) :: x + + x%state = is_host + end subroutine z_oacc_set_host + + subroutine z_oacc_set_dev(x) + implicit none + class(psb_z_vect_oacc), intent(inout) :: x + + x%state = is_dev + end subroutine z_oacc_set_dev + + subroutine z_oacc_set_sync(x) + implicit none + class(psb_z_vect_oacc), intent(inout) :: x + + x%state = is_sync + end subroutine z_oacc_set_sync + + function z_oacc_is_dev(x) result(res) + implicit none + class(psb_z_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function z_oacc_is_dev + + function z_oacc_is_host(x) result(res) + implicit none + class(psb_z_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function z_oacc_is_host + + function z_oacc_is_sync(x) result(res) + implicit none + class(psb_z_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function z_oacc_is_sync + + subroutine z_oacc_vect_all(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: n + class(psb_z_vect_oacc), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(n, x%v, info) + if (info == 0) then + call x%set_host() + !$acc enter data create(x%v) + call x%sync_space() + end if + if (info /= 0) then + info = psb_err_alloc_request_ + call psb_errpush(info, 'z_oacc_all', & + i_err=(/n, n, n, n, n/)) + end if + end subroutine z_oacc_vect_all + + + subroutine z_oacc_vect_free(x, info) + implicit none + class(psb_z_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + info = 0 + if (allocated(x%v)) then + !$acc exit data delete(x%v) finalize + deallocate(x%v, stat=info) + end if + + end subroutine z_oacc_vect_free + + function z_oacc_get_size(x) result(res) + implicit none + class(psb_z_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: res + + if (x%is_dev()) call x%sync() + res = size(x%v) + end function z_oacc_get_size + +end module psb_z_oacc_vect_mod From 686bac42249fa58fc56c42dccc6b2b1b0e5a069c Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 17 Jul 2024 13:03:46 +0200 Subject: [PATCH 14/39] Account for S/D/C/Z variants --- openacc/impl/Makefile | 68 +++++++++++++++++++++++++++++++++++----- openacc/psb_oacc_mod.F90 | 12 +++++-- 2 files changed, 71 insertions(+), 9 deletions(-) diff --git a/openacc/impl/Makefile b/openacc/impl/Makefile index 56df9402..32f104dd 100755 --- a/openacc/impl/Makefile +++ b/openacc/impl/Makefile @@ -10,13 +10,67 @@ UP=.. FINCLUDES=$(FMFLAG).. $(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FIFLAG).. LIBNAME=libpsb_openacc.a -OBJS= psb_d_oacc_csr_vect_mv.o psb_d_oacc_csr_inner_vect_sv.o \ - psb_d_oacc_csr_csmm.o psb_d_oacc_csr_csmv.o psb_d_oacc_csr_scals.o \ - psb_d_oacc_csr_scal.o psb_d_oacc_csr_allocate_mnnz.o \ - psb_d_oacc_csr_reallocate_nz.o psb_d_oacc_csr_cp_from_coo.o \ - psb_d_oacc_csr_cp_from_fmt.o psb_d_oacc_csr_mv_from_coo.o \ - psb_d_oacc_csr_mv_from_fmt.o psb_d_oacc_csr_mold.o \ - psb_d_oacc_mlt_v_2.o psb_d_oacc_mlt_v.o +OBJS= \ +psb_s_oacc_csr_vect_mv.o \ +psb_s_oacc_csr_inner_vect_sv.o \ +psb_s_oacc_csr_csmm.o \ +psb_s_oacc_csr_csmv.o \ +psb_s_oacc_csr_scals.o \ +psb_s_oacc_csr_scal.o \ +psb_s_oacc_csr_allocate_mnnz.o \ +psb_s_oacc_csr_reallocate_nz.o \ +psb_s_oacc_csr_cp_from_coo.o \ +psb_s_oacc_csr_cp_from_fmt.o \ +psb_s_oacc_csr_mv_from_coo.o \ +psb_s_oacc_csr_mv_from_fmt.o \ +psb_s_oacc_csr_mold.o \ +psb_s_oacc_mlt_v_2.o \ +psb_s_oacc_mlt_v.o \ +psb_d_oacc_csr_vect_mv.o \ +psb_d_oacc_csr_inner_vect_sv.o \ +psb_d_oacc_csr_csmm.o \ +psb_d_oacc_csr_csmv.o \ +psb_d_oacc_csr_scals.o \ +psb_d_oacc_csr_scal.o \ +psb_d_oacc_csr_allocate_mnnz.o \ +psb_d_oacc_csr_reallocate_nz.o \ +psb_d_oacc_csr_cp_from_coo.o \ +psb_d_oacc_csr_cp_from_fmt.o \ +psb_d_oacc_csr_mv_from_coo.o \ +psb_d_oacc_csr_mv_from_fmt.o \ +psb_d_oacc_csr_mold.o \ +psb_d_oacc_mlt_v_2.o \ +psb_d_oacc_mlt_v.o \ +psb_c_oacc_csr_vect_mv.o \ +psb_c_oacc_csr_inner_vect_sv.o \ +psb_c_oacc_csr_csmm.o \ +psb_c_oacc_csr_csmv.o \ +psb_c_oacc_csr_scals.o \ +psb_c_oacc_csr_scal.o \ +psb_c_oacc_csr_allocate_mnnz.o \ +psb_c_oacc_csr_reallocate_nz.o \ +psb_c_oacc_csr_cp_from_coo.o \ +psb_c_oacc_csr_cp_from_fmt.o \ +psb_c_oacc_csr_mv_from_coo.o \ +psb_c_oacc_csr_mv_from_fmt.o \ +psb_c_oacc_csr_mold.o \ +psb_c_oacc_mlt_v_2.o \ +psb_c_oacc_mlt_v.o \ +psb_z_oacc_csr_vect_mv.o \ +psb_z_oacc_csr_inner_vect_sv.o \ +psb_z_oacc_csr_csmm.o \ +psb_z_oacc_csr_csmv.o \ +psb_z_oacc_csr_scals.o \ +psb_z_oacc_csr_scal.o \ +psb_z_oacc_csr_allocate_mnnz.o \ +psb_z_oacc_csr_reallocate_nz.o \ +psb_z_oacc_csr_cp_from_coo.o \ +psb_z_oacc_csr_cp_from_fmt.o \ +psb_z_oacc_csr_mv_from_coo.o \ +psb_z_oacc_csr_mv_from_fmt.o \ +psb_z_oacc_csr_mold.o \ +psb_z_oacc_mlt_v_2.o \ +psb_z_oacc_mlt_v.o objs: $(OBJS) diff --git a/openacc/psb_oacc_mod.F90 b/openacc/psb_oacc_mod.F90 index fe827db8..2d8e8b40 100644 --- a/openacc/psb_oacc_mod.F90 +++ b/openacc/psb_oacc_mod.F90 @@ -3,7 +3,15 @@ module psb_oacc_mod use psb_oacc_env_mod + use psb_i_oacc_vect_mod + use psb_s_oacc_vect_mod use psb_d_oacc_vect_mod - use psb_d_oacc_csr_mat_mod + use psb_c_oacc_vect_mod + use psb_z_oacc_vect_mod -end module psb_oacc_mod \ No newline at end of file + use psb_s_oacc_csr_mat_mod + use psb_d_oacc_csr_mat_mod + use psb_c_oacc_csr_mat_mod + use psb_z_oacc_csr_mat_mod + +end module psb_oacc_mod From 9e18545151c8a2d4f944ffbd01bd4848c6b36c2e Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 17 Jul 2024 13:04:07 +0200 Subject: [PATCH 15/39] Fix typos --- openacc/impl/psb_c_oacc_csr_csmm.F90 | 2 +- openacc/impl/psb_c_oacc_csr_csmv.F90 | 2 +- openacc/impl/psb_c_oacc_csr_inner_vect_sv.F90 | 2 +- openacc/impl/psb_c_oacc_csr_mold.F90 | 2 +- openacc/impl/psb_c_oacc_csr_vect_mv.F90 | 2 +- openacc/impl/psb_s_oacc_csr_csmm.F90 | 2 +- openacc/impl/psb_s_oacc_csr_csmv.F90 | 2 +- openacc/impl/psb_s_oacc_csr_inner_vect_sv.F90 | 2 +- openacc/impl/psb_s_oacc_csr_mold.F90 | 2 +- openacc/impl/psb_s_oacc_csr_vect_mv.F90 | 2 +- openacc/impl/psb_z_oacc_csr_csmm.F90 | 2 +- openacc/impl/psb_z_oacc_csr_csmv.F90 | 2 +- openacc/impl/psb_z_oacc_csr_inner_vect_sv.F90 | 2 +- openacc/impl/psb_z_oacc_csr_mold.F90 | 2 +- openacc/impl/psb_z_oacc_csr_vect_mv.F90 | 2 +- 15 files changed, 15 insertions(+), 15 deletions(-) diff --git a/openacc/impl/psb_c_oacc_csr_csmm.F90 b/openacc/impl/psb_c_oacc_csr_csmm.F90 index c26df410..5bf0bad4 100644 --- a/openacc/impl/psb_c_oacc_csr_csmm.F90 +++ b/openacc/impl/psb_c_oacc_csr_csmm.F90 @@ -27,7 +27,7 @@ contains end if if (.not.a%is_asb()) then - info = psb_err_invalic_mat_state_ + info = psb_err_invalid_mat_state_ call psb_errpush(info, name) goto 9999 endif diff --git a/openacc/impl/psb_c_oacc_csr_csmv.F90 b/openacc/impl/psb_c_oacc_csr_csmv.F90 index 8f37efb3..e5d5f24e 100644 --- a/openacc/impl/psb_c_oacc_csr_csmv.F90 +++ b/openacc/impl/psb_c_oacc_csr_csmv.F90 @@ -27,7 +27,7 @@ contains end if if (.not.a%is_asb()) then - info = psb_err_invalic_mat_state_ + info = psb_err_invalid_mat_state_ call psb_errpush(info, name) goto 9999 endif diff --git a/openacc/impl/psb_c_oacc_csr_inner_vect_sv.F90 b/openacc/impl/psb_c_oacc_csr_inner_vect_sv.F90 index 2d733f48..1dee9f2e 100644 --- a/openacc/impl/psb_c_oacc_csr_inner_vect_sv.F90 +++ b/openacc/impl/psb_c_oacc_csr_inner_vect_sv.F90 @@ -27,7 +27,7 @@ contains end if if (.not.a%is_asb()) then - info = psb_err_invalic_mat_state_ + info = psb_err_invalid_mat_state_ call psb_errpush(info, name) goto 9999 endif diff --git a/openacc/impl/psb_c_oacc_csr_mold.F90 b/openacc/impl/psb_c_oacc_csr_mold.F90 index 6ee36985..cf32ea94 100644 --- a/openacc/impl/psb_c_oacc_csr_mold.F90 +++ b/openacc/impl/psb_c_oacc_csr_mold.F90 @@ -31,5 +31,5 @@ contains return end subroutine psb_c_oacc_csr_mold -end submodule psb_c_oacc_csr_molc_impl +end submodule psb_c_oacc_csr_mold_impl diff --git a/openacc/impl/psb_c_oacc_csr_vect_mv.F90 b/openacc/impl/psb_c_oacc_csr_vect_mv.F90 index b4b79d56..0fd1ed35 100644 --- a/openacc/impl/psb_c_oacc_csr_vect_mv.F90 +++ b/openacc/impl/psb_c_oacc_csr_vect_mv.F90 @@ -18,7 +18,7 @@ contains if ((n /= size(x%v)) .or. (n /= size(y%v))) then write(0,*) 'Size error ', m, n, size(x%v), size(y%v) - info = psb_err_invalic_mat_state_ + info = psb_err_invalid_mat_state_ return end if diff --git a/openacc/impl/psb_s_oacc_csr_csmm.F90 b/openacc/impl/psb_s_oacc_csr_csmm.F90 index 2e7def53..bb8283bf 100644 --- a/openacc/impl/psb_s_oacc_csr_csmm.F90 +++ b/openacc/impl/psb_s_oacc_csr_csmm.F90 @@ -27,7 +27,7 @@ contains end if if (.not.a%is_asb()) then - info = psb_err_invalis_mat_state_ + info = psb_err_invalid_mat_state_ call psb_errpush(info, name) goto 9999 endif diff --git a/openacc/impl/psb_s_oacc_csr_csmv.F90 b/openacc/impl/psb_s_oacc_csr_csmv.F90 index ba673941..c224dc0e 100644 --- a/openacc/impl/psb_s_oacc_csr_csmv.F90 +++ b/openacc/impl/psb_s_oacc_csr_csmv.F90 @@ -27,7 +27,7 @@ contains end if if (.not.a%is_asb()) then - info = psb_err_invalis_mat_state_ + info = psb_err_invalid_mat_state_ call psb_errpush(info, name) goto 9999 endif diff --git a/openacc/impl/psb_s_oacc_csr_inner_vect_sv.F90 b/openacc/impl/psb_s_oacc_csr_inner_vect_sv.F90 index 7af897a7..ec7f4bad 100644 --- a/openacc/impl/psb_s_oacc_csr_inner_vect_sv.F90 +++ b/openacc/impl/psb_s_oacc_csr_inner_vect_sv.F90 @@ -27,7 +27,7 @@ contains end if if (.not.a%is_asb()) then - info = psb_err_invalis_mat_state_ + info = psb_err_invalid_mat_state_ call psb_errpush(info, name) goto 9999 endif diff --git a/openacc/impl/psb_s_oacc_csr_mold.F90 b/openacc/impl/psb_s_oacc_csr_mold.F90 index a85471e5..95bddde8 100644 --- a/openacc/impl/psb_s_oacc_csr_mold.F90 +++ b/openacc/impl/psb_s_oacc_csr_mold.F90 @@ -31,5 +31,5 @@ contains return end subroutine psb_s_oacc_csr_mold -end submodule psb_s_oacc_csr_mols_impl +end submodule psb_s_oacc_csr_mold_impl diff --git a/openacc/impl/psb_s_oacc_csr_vect_mv.F90 b/openacc/impl/psb_s_oacc_csr_vect_mv.F90 index 9b15da3b..c2bbd6b1 100644 --- a/openacc/impl/psb_s_oacc_csr_vect_mv.F90 +++ b/openacc/impl/psb_s_oacc_csr_vect_mv.F90 @@ -18,7 +18,7 @@ contains if ((n /= size(x%v)) .or. (n /= size(y%v))) then write(0,*) 'Size error ', m, n, size(x%v), size(y%v) - info = psb_err_invalis_mat_state_ + info = psb_err_invalid_mat_state_ return end if diff --git a/openacc/impl/psb_z_oacc_csr_csmm.F90 b/openacc/impl/psb_z_oacc_csr_csmm.F90 index aeaaab33..97a38deb 100644 --- a/openacc/impl/psb_z_oacc_csr_csmm.F90 +++ b/openacc/impl/psb_z_oacc_csr_csmm.F90 @@ -27,7 +27,7 @@ contains end if if (.not.a%is_asb()) then - info = psb_err_invaliz_mat_state_ + info = psb_err_invalid_mat_state_ call psb_errpush(info, name) goto 9999 endif diff --git a/openacc/impl/psb_z_oacc_csr_csmv.F90 b/openacc/impl/psb_z_oacc_csr_csmv.F90 index f5501b21..8def3c76 100644 --- a/openacc/impl/psb_z_oacc_csr_csmv.F90 +++ b/openacc/impl/psb_z_oacc_csr_csmv.F90 @@ -27,7 +27,7 @@ contains end if if (.not.a%is_asb()) then - info = psb_err_invaliz_mat_state_ + info = psb_err_invalid_mat_state_ call psb_errpush(info, name) goto 9999 endif diff --git a/openacc/impl/psb_z_oacc_csr_inner_vect_sv.F90 b/openacc/impl/psb_z_oacc_csr_inner_vect_sv.F90 index b5d552d3..4975b276 100644 --- a/openacc/impl/psb_z_oacc_csr_inner_vect_sv.F90 +++ b/openacc/impl/psb_z_oacc_csr_inner_vect_sv.F90 @@ -27,7 +27,7 @@ contains end if if (.not.a%is_asb()) then - info = psb_err_invaliz_mat_state_ + info = psb_err_invalid_mat_state_ call psb_errpush(info, name) goto 9999 endif diff --git a/openacc/impl/psb_z_oacc_csr_mold.F90 b/openacc/impl/psb_z_oacc_csr_mold.F90 index e7e9e8b9..93b6bb17 100644 --- a/openacc/impl/psb_z_oacc_csr_mold.F90 +++ b/openacc/impl/psb_z_oacc_csr_mold.F90 @@ -31,5 +31,5 @@ contains return end subroutine psb_z_oacc_csr_mold -end submodule psb_z_oacc_csr_molz_impl +end submodule psb_z_oacc_csr_mold_impl diff --git a/openacc/impl/psb_z_oacc_csr_vect_mv.F90 b/openacc/impl/psb_z_oacc_csr_vect_mv.F90 index 437dd70a..b8da5c8f 100644 --- a/openacc/impl/psb_z_oacc_csr_vect_mv.F90 +++ b/openacc/impl/psb_z_oacc_csr_vect_mv.F90 @@ -18,7 +18,7 @@ contains if ((n /= size(x%v)) .or. (n /= size(y%v))) then write(0,*) 'Size error ', m, n, size(x%v), size(y%v) - info = psb_err_invaliz_mat_state_ + info = psb_err_invalid_mat_state_ return end if From b5a8c549dd7f78775c12db811aae243b6658167a Mon Sep 17 00:00:00 2001 From: tloloum Date: Fri, 19 Jul 2024 11:35:11 +0200 Subject: [PATCH 16/39] psb_d_oacc_pde3d draft --- test/openacc/Makefile | 4 +- test/openacc/test.F90 | 617 ----------------- test/pargen/Makefile | 8 +- test/pargen/psb_d_oacc_pde3d.F90 | 1073 ++++++++++++++++++++++++++++++ 4 files changed, 1081 insertions(+), 621 deletions(-) delete mode 100644 test/openacc/test.F90 create mode 100644 test/pargen/psb_d_oacc_pde3d.F90 diff --git a/test/openacc/Makefile b/test/openacc/Makefile index 65bed1ad..9d62f3cd 100644 --- a/test/openacc/Makefile +++ b/test/openacc/Makefile @@ -9,7 +9,7 @@ INCDIR=$(TOPDIR)/include MODDIR=$(TOPDIR)/modules EXEDIR=./runs -PSBLAS_LIB= -L$(LIBDIR) -L$(PSBLIBDIR) -lpsb_openacc -lpsb_base -lpsb_ext -lpsb_util -lopenblas -lmetis +PSBLAS_LIB= -L$(LIBDIR) -L$(PSBLIBDIR) -lpsb_openacc -lpsb_base -lpsb_ext -lpsb_util -lpsb_krylov -lpsb_prec -lopenblas -lmetis LDLIBS=$(PSBGPULDLIBS) FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FMFLAG). $(FMFLAG)$(PSBMODDIR) $(FMFLAG)$(PSBINCDIR) $(LIBRSB_DEFINES) @@ -17,7 +17,7 @@ FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FMFLAG). $(FMFLAG)$(PSBMODDIR) FFLAGS=-O0 -march=native -fopenacc -foffload=nvptx-none="-march=sm_70" CFLAGS=-O0 -march=native -SRCS=test.F90 vectoacc.F90 datavect.F90 +SRCS=vectoacc.F90 datavect.F90 CSRC=timers.c OBJS=$(SRCS:.F90=.o) $(CSRC:.c=.o) diff --git a/test/openacc/test.F90 b/test/openacc/test.F90 deleted file mode 100644 index 0d0b756f..00000000 --- a/test/openacc/test.F90 +++ /dev/null @@ -1,617 +0,0 @@ -module psb_d_pde3d_mod - - - use psb_base_mod, only : psb_dpk_, psb_ipk_, psb_lpk_, psb_desc_type,& - & psb_dspmat_type, psb_d_vect_type, dzero,& - & psb_d_base_sparse_mat, psb_d_base_vect_type, & - & psb_i_base_vect_type, psb_l_base_vect_type - - interface - function d_func_3d(x,y,z) result(val) - import :: psb_dpk_ - real(psb_dpk_), intent(in) :: x,y,z - real(psb_dpk_) :: val - end function d_func_3d - end interface - - interface psb_gen_pde3d - module procedure psb_d_gen_pde3d - end interface psb_gen_pde3d - - contains - - function d_null_func_3d(x,y,z) result(val) - - real(psb_dpk_), intent(in) :: x,y,z - real(psb_dpk_) :: val - - val = dzero - - end function d_null_func_3d - ! - ! functions parametrizing the differential equation - ! - - ! - ! Note: b1, b2 and b3 are the coefficients of the first - ! derivative of the unknown function. The default - ! we apply here is to have them zero, so that the resulting - ! matrix is symmetric/hermitian and suitable for - ! testing with CG and FCG. - ! When testing methods for non-hermitian matrices you can - ! change the B1/B2/B3 functions to e.g. done/sqrt((3*done)) - ! - function b1(x,y,z) - use psb_base_mod, only : psb_dpk_, done, dzero - implicit none - real(psb_dpk_) :: b1 - real(psb_dpk_), intent(in) :: x,y,z - b1=done/sqrt((3*done)) - end function b1 - function b2(x,y,z) - use psb_base_mod, only : psb_dpk_, done, dzero - implicit none - real(psb_dpk_) :: b2 - real(psb_dpk_), intent(in) :: x,y,z - b2=done/sqrt((3*done)) - end function b2 - function b3(x,y,z) - use psb_base_mod, only : psb_dpk_, done, dzero - implicit none - real(psb_dpk_) :: b3 - real(psb_dpk_), intent(in) :: x,y,z - b3=done/sqrt((3*done)) - end function b3 - function c(x,y,z) - use psb_base_mod, only : psb_dpk_, done, dzero - implicit none - real(psb_dpk_) :: c - real(psb_dpk_), intent(in) :: x,y,z - c=dzero - end function c - function a1(x,y,z) - use psb_base_mod, only : psb_dpk_, done, dzero - implicit none - real(psb_dpk_) :: a1 - real(psb_dpk_), intent(in) :: x,y,z - a1=done/80 - end function a1 - function a2(x,y,z) - use psb_base_mod, only : psb_dpk_, done, dzero - implicit none - real(psb_dpk_) :: a2 - real(psb_dpk_), intent(in) :: x,y,z - a2=done/80 - end function a2 - function a3(x,y,z) - use psb_base_mod, only : psb_dpk_, done, dzero - implicit none - real(psb_dpk_) :: a3 - real(psb_dpk_), intent(in) :: x,y,z - a3=done/80 - end function a3 - function g(x,y,z) - use psb_base_mod, only : psb_dpk_, done, dzero - implicit none - real(psb_dpk_) :: g - real(psb_dpk_), intent(in) :: x,y,z - g = dzero - if (x == done) then - g = done - else if (x == dzero) then - g = exp(y**2-z**2) - end if - end function g - - - ! - ! subroutine to allocate and fill in the coefficient matrix and - ! the rhs. - ! - subroutine psb_d_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,afmt,info,& - & f,amold,vmold,imold,partition,nrl,iv,tnd) - use psb_base_mod - use psb_util_mod - ! - ! Discretizes the partial differential equation - ! - ! a1 dd(u) a2 dd(u) a3 dd(u) b1 d(u) b2 d(u) b3 d(u) - ! - ------ - ------ - ------ + ----- + ------ + ------ + c u = f - ! dxdx dydy dzdz dx dy dz - ! - ! with Dirichlet boundary conditions - ! u = g - ! - ! on the unit cube 0<=x,y,z<=1. - ! - ! - ! Note that if b1=b2=b3=c=0., the PDE is the Laplace equation. - ! - implicit none - integer(psb_ipk_) :: idim - type(psb_dspmat_type) :: a - type(psb_d_vect_type) :: xv,bv - type(psb_desc_type) :: desc_a - type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: info - character(len=*) :: afmt - procedure(d_func_3d), optional :: f - class(psb_d_base_sparse_mat), optional :: amold - class(psb_d_base_vect_type), optional :: vmold - class(psb_i_base_vect_type), optional :: imold - integer(psb_ipk_), optional :: partition, nrl,iv(:) - logical, optional :: tnd - ! Local variables. - - integer(psb_ipk_), parameter :: nb=20 - type(psb_d_csc_sparse_mat) :: acsc - type(psb_d_coo_sparse_mat) :: acoo - type(psb_d_csr_sparse_mat) :: acsr - real(psb_dpk_) :: zt(nb),x,y,z - integer(psb_ipk_) :: nnz,nr,nlr,i,j,ii,ib,k, partition_ - integer(psb_lpk_) :: m,n,glob_row,nt - integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner - ! For 3D partition - ! Note: integer control variables going directly into an MPI call - ! must be 4 bytes, i.e. psb_mpk_ - integer(psb_mpk_) :: npdims(3), npp, minfo - integer(psb_ipk_) :: npx,npy,npz, iamx,iamy,iamz,mynx,myny,mynz - integer(psb_ipk_), allocatable :: bndx(:),bndy(:),bndz(:) - ! Process grid - integer(psb_ipk_) :: np, iam - integer(psb_ipk_) :: icoeff - integer(psb_lpk_), allocatable :: irow(:),icol(:),myidx(:) - real(psb_dpk_), allocatable :: val(:) - ! deltah dimension of each grid cell - ! deltat discretization time - real(psb_dpk_) :: deltah, sqdeltah, deltah2 - real(psb_dpk_), parameter :: rhs=dzero,one=done,zero=dzero - real(psb_dpk_) :: t0, t1, t2, t3, tasb, talc, ttot, tgen, tcdasb - integer(psb_ipk_) :: err_act - procedure(d_func_3d), pointer :: f_ - logical :: tnd_ - character(len=20) :: name, ch_err,tmpfmt - - info = psb_success_ - name = 'create_matrix' - call psb_erractionsave(err_act) - - call psb_info(ctxt, iam, np) - - - if (present(f)) then - f_ => f - else - f_ => d_null_func_3d - end if - - deltah = done/(idim+2) - sqdeltah = deltah*deltah - deltah2 = (2*done)* deltah - - if (present(partition)) then - if ((1<= partition).and.(partition <= 3)) then - partition_ = partition - else - write(*,*) 'Invalid partition choice ',partition,' defaulting to 3' - partition_ = 3 - end if - else - partition_ = 3 - end if - - ! initialize array descriptor and sparse matrix storage. provide an - ! estimate of the number of non zeroes - - m = (1_psb_lpk_*idim)*idim*idim - n = m - nnz = ((n*7)/(np)) - if(iam == psb_root_) write(psb_out_unit,'("Generating Matrix (size=",i0,")...")')n - t0 = psb_wtime() - select case(partition_) - case(1) - ! A BLOCK partition - if (present(nrl)) then - nr = nrl - else - ! - ! Using a simple BLOCK distribution. - ! - nt = (m+np-1)/np - nr = max(0,min(nt,m-(iam*nt))) - end if - - nt = nr - call psb_sum(ctxt,nt) - if (nt /= m) then - write(psb_err_unit,*) iam, 'Initialization error ',nr,nt,m - info = -1 - call psb_barrier(ctxt) - call psb_abort(ctxt) - return - end if - - ! - ! First example of use of CDALL: specify for each process a number of - ! contiguous rows - ! - call psb_cdall(ctxt,desc_a,info,nl=nr) - myidx = desc_a%get_global_indices() - nlr = size(myidx) - - case(2) - ! A partition defined by the user through IV - - if (present(iv)) then - if (size(iv) /= m) then - write(psb_err_unit,*) iam, 'Initialization error: wrong IV size',size(iv),m - info = -1 - call psb_barrier(ctxt) - call psb_abort(ctxt) - return - end if - else - write(psb_err_unit,*) iam, 'Initialization error: IV not present' - info = -1 - call psb_barrier(ctxt) - call psb_abort(ctxt) - return - end if - - ! - ! Second example of use of CDALL: specify for each row the - ! process that owns it - ! - call psb_cdall(ctxt,desc_a,info,vg=iv) - myidx = desc_a%get_global_indices() - nlr = size(myidx) - - case(3) - ! A 3-dimensional partition - - ! A nifty MPI function will split the process list - npdims = 0 - call mpi_dims_create(np,3,npdims,info) - npx = npdims(1) - npy = npdims(2) - npz = npdims(3) - - allocate(bndx(0:npx),bndy(0:npy),bndz(0:npz)) - ! We can reuse idx2ijk for process indices as well. - call idx2ijk(iamx,iamy,iamz,iam,npx,npy,npz,base=0) - ! Now let's split the 3D cube in hexahedra - call dist1Didx(bndx,idim,npx) - mynx = bndx(iamx+1)-bndx(iamx) - call dist1Didx(bndy,idim,npy) - myny = bndy(iamy+1)-bndy(iamy) - call dist1Didx(bndz,idim,npz) - mynz = bndz(iamz+1)-bndz(iamz) - - ! How many indices do I own? - nlr = mynx*myny*mynz - allocate(myidx(nlr)) - ! Now, let's generate the list of indices I own - nr = 0 - do i=bndx(iamx),bndx(iamx+1)-1 - do j=bndy(iamy),bndy(iamy+1)-1 - do k=bndz(iamz),bndz(iamz+1)-1 - nr = nr + 1 - call ijk2idx(myidx(nr),i,j,k,idim,idim,idim) - end do - end do - end do - if (nr /= nlr) then - write(psb_err_unit,*) iam,iamx,iamy,iamz, 'Initialization error: NR vs NLR ',& - & nr,nlr,mynx,myny,mynz - info = -1 - call psb_barrier(ctxt) - call psb_abort(ctxt) - end if - - ! - ! Third example of use of CDALL: specify for each process - ! the set of global indices it owns. - ! - call psb_cdall(ctxt,desc_a,info,vl=myidx) - - case default - write(psb_err_unit,*) iam, 'Initialization error: should not get here' - info = -1 - call psb_barrier(ctxt) - call psb_abort(ctxt) - return - end select - - - if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz,& - & dupl=psb_dupl_err_) - ! define rhs from boundary conditions; also build initial guess - if (info == psb_success_) call psb_geall(xv,desc_a,info) - if (info == psb_success_) call psb_geall(bv,desc_a,info) - - call psb_barrier(ctxt) - talc = psb_wtime()-t0 - - if (info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='allocation rout.' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - ! we build an auxiliary matrix consisting of one row at a - ! time; just a small matrix. might be extended to generate - ! a bunch of rows per call. - ! - allocate(val(20*nb),irow(20*nb),& - &icol(20*nb),stat=info) - if (info /= psb_success_ ) then - info=psb_err_alloc_dealloc_ - call psb_errpush(info,name) - goto 9999 - endif - - - ! loop over rows belonging to current process in a block - ! distribution. - - call psb_barrier(ctxt) - t1 = psb_wtime() - do ii=1, nlr,nb - ib = min(nb,nlr-ii+1) - icoeff = 1 - do k=1,ib - i=ii+k-1 - ! local matrix pointer - glob_row=myidx(i) - ! compute gridpoint coordinates - call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim) - ! x, y, z coordinates - x = (ix-1)*deltah - y = (iy-1)*deltah - z = (iz-1)*deltah - zt(k) = f_(x,y,z) - ! internal point: build discretization - ! - ! term depending on (x-1,y,z) - ! - val(icoeff) = -a1(x,y,z)/sqdeltah-b1(x,y,z)/deltah2 - if (ix == 1) then - zt(k) = g(dzero,y,z)*(-val(icoeff)) + zt(k) - else - call ijk2idx(icol(icoeff),ix-1,iy,iz,idim,idim,idim) - irow(icoeff) = glob_row - icoeff = icoeff+1 - endif - ! term depending on (x,y-1,z) - val(icoeff) = -a2(x,y,z)/sqdeltah-b2(x,y,z)/deltah2 - if (iy == 1) then - zt(k) = g(x,dzero,z)*(-val(icoeff)) + zt(k) - else - call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim) - irow(icoeff) = glob_row - icoeff = icoeff+1 - endif - ! term depending on (x,y,z-1) - val(icoeff)=-a3(x,y,z)/sqdeltah-b3(x,y,z)/deltah2 - if (iz == 1) then - zt(k) = g(x,y,dzero)*(-val(icoeff)) + zt(k) - else - call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim) - irow(icoeff) = glob_row - icoeff = icoeff+1 - endif - - ! term depending on (x,y,z) - val(icoeff)=(2*done)*(a1(x,y,z)+a2(x,y,z)+a3(x,y,z))/sqdeltah & - & + c(x,y,z) - call ijk2idx(icol(icoeff),ix,iy,iz,idim,idim,idim) - irow(icoeff) = glob_row - icoeff = icoeff+1 - ! term depending on (x,y,z+1) - val(icoeff)=-a3(x,y,z)/sqdeltah+b3(x,y,z)/deltah2 - if (iz == idim) then - zt(k) = g(x,y,done)*(-val(icoeff)) + zt(k) - else - call ijk2idx(icol(icoeff),ix,iy,iz+1,idim,idim,idim) - irow(icoeff) = glob_row - icoeff = icoeff+1 - endif - ! term depending on (x,y+1,z) - val(icoeff)=-a2(x,y,z)/sqdeltah+b2(x,y,z)/deltah2 - if (iy == idim) then - zt(k) = g(x,done,z)*(-val(icoeff)) + zt(k) - else - call ijk2idx(icol(icoeff),ix,iy+1,iz,idim,idim,idim) - irow(icoeff) = glob_row - icoeff = icoeff+1 - endif - ! term depending on (x+1,y,z) - val(icoeff)=-a1(x,y,z)/sqdeltah+b1(x,y,z)/deltah2 - if (ix==idim) then - zt(k) = g(done,y,z)*(-val(icoeff)) + zt(k) - else - call ijk2idx(icol(icoeff),ix+1,iy,iz,idim,idim,idim) - irow(icoeff) = glob_row - icoeff = icoeff+1 - endif - - end do - call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info) - if(info /= psb_success_) exit - call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info) - if(info /= psb_success_) exit - zt(:)=dzero - call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info) - if(info /= psb_success_) exit - end do - - tgen = psb_wtime()-t1 - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='insert rout.' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - - deallocate(val,irow,icol) - - call psb_barrier(ctxt) - t1 = psb_wtime() - call psb_cdasb(desc_a,info,mold=imold) - tcdasb = psb_wtime()-t1 - call psb_barrier(ctxt) - t1 = psb_wtime() - if (info == psb_success_) then - if (present(amold)) then - call psb_spasb(a,desc_a,info,mold=amold,bld_and=tnd) - else - call psb_spasb(a,desc_a,info,afmt=afmt,bld_and=tnd) - end if - end if - call psb_barrier(ctxt) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='asb rout.' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - if (info == psb_success_) call psb_geasb(xv,desc_a,info,mold=vmold) - if (info == psb_success_) call psb_geasb(bv,desc_a,info,mold=vmold) - if(info /= psb_success_) then - info=psb_err_from_subroutine_ - ch_err='asb rout.' - call psb_errpush(info,name,a_err=ch_err) - goto 9999 - end if - tasb = psb_wtime()-t1 - call psb_barrier(ctxt) - ttot = psb_wtime() - t0 - - call psb_amx(ctxt,talc) - call psb_amx(ctxt,tgen) - call psb_amx(ctxt,tasb) - call psb_amx(ctxt,ttot) - if(iam == psb_root_) then - tmpfmt = a%get_fmt() - write(psb_out_unit,'("The matrix has been generated and assembled in ",a3," format.")')& - & tmpfmt - write(psb_out_unit,'("-allocation time : ",es12.5)') talc - write(psb_out_unit,'("-coeff. gen. time : ",es12.5)') tgen - write(psb_out_unit,'("-desc asbly time : ",es12.5)') tcdasb - write(psb_out_unit,'("- mat asbly time : ",es12.5)') tasb - write(psb_out_unit,'("-total time : ",es12.5)') ttot - - end if - call psb_erractionrestore(err_act) - return - - 9999 call psb_error_handler(ctxt,err_act) - - return - end subroutine psb_d_gen_pde3d - - -end module psb_d_pde3d_mod - - - -program test - use psb_base_mod - use psb_ext_mod - use psb_oacc_mod - use psb_d_pde3d_mod - - implicit none - integer(psb_ipk_) :: n, i, info, m, nrm, nz - integer(psb_ipk_), parameter :: ntests=80, ngpu=20 - real(psb_dpk_) :: dot_dev, dot_host - type(psb_d_vect_oacc) :: tx, ty - type(psb_d_oacc_csr_sparse_mat) :: aacsr - real(psb_dpk_) :: t0, t1, t2, t3, csflp, elflp - double precision, external :: etime - - type(psb_dspmat_type) :: a - type(psb_desc_type) :: desc_a - type(psb_d_vect_type) :: xxv, bv - type(psb_d_csr_sparse_mat) :: acsr - character(len=5) :: afmt='csr' - real(psb_dpk_), allocatable :: vv(:), ydev(:), yhost(:) - type(psb_ctxt_type) :: ctxt - integer(psb_ipk_) :: iam, np, nth, idim - integer(psb_epk_) :: neq - - call psb_init(ctxt) - call psb_info(ctxt, iam, np) - - write(*,*) 'Enter size :' - read(*,*) idim - idim = max(1, idim) - - n = idim**3 - call psb_gen_pde3d(ctxt, idim, a, bv, xxv, desc_a, afmt, info) - call a%cp_to(acsr) - m = acsr%get_nrows() - n = acsr%get_ncols() - nz = acsr%get_nzeros() - call aacsr%all(m, n, nz, info) - aacsr%val = (acsr%val) - aacsr%ja = (acsr%ja) - aacsr%irp = (acsr%irp) - call aacsr%set_host() - call aacsr%sync() - - call initialize(n) - - call to_host() - t2 = etime() - do i = 1, ntests - dot_host = h_dot(n) - end do - t3 = etime() - - call tx%all(n, info) - call ty%all(n, info) - vv = bv%get_vect() - call bv%set_vect(v1) - call tx%set_vect(v1) - call ty%set_vect(v2) - t0 = etime() - do i = 1, ntests * ngpu - dot_dev = tx%dot_v(n, ty) - end do - !$acc wait - t1 = etime() - write(*,*) ' Dot Results : dev:', dot_dev, ' host:', dot_host - write(*,*) ' Timing : dev:', t1 - t0, (t1 - t0) / (ntests * ngpu), & - ' host:', t3 - t2, (t3 - t2) / ntests - - call a%mv_from(acsr) - t2 = etime() - do i = 1, ntests - call a%spmm(done, bv, dzero, xxv, info) - end do - t3 = etime() - yhost = xxv%get_vect() - t0 = etime() - do i = 1, ntests * ngpu - call aacsr%vect_mv(done, tx, dzero, ty, info) - end do - !$acc wait - t1 = etime() - ydev = ty%get_vect() - write(*,*) 'Correctness check: ', maxval(abs(ydev(:) - yhost(:))) - write(*,*) ' CSR PROD ' - write(*, '(2(a,f12.3,2x))') ' Timing (ms): ' - csflp = 2.d0 * nz / ((t1 - t0) / (ntests * ngpu)) - write(*, '(2(a,f12.3,2x))') ' dev:', 1e3 * (t1 - t0) / (ntests * ngpu), ' :', csflp / 1.d6 - csflp = 2.d0 * nz / ((t3 - t2) / (ntests)) - write(*, '(2(a,f12.3,2x))') ' host:', 1e3 * (t3 - t2) / ntests, ' :', csflp / 1.d6 - write(*,*) 'Done' - - call tx%free(info) - call ty%free(info) - call finalize_dev() - call finalize_host() - call psb_exit(ctxt) -end program test diff --git a/test/pargen/Makefile b/test/pargen/Makefile index 20a95c0b..9b720ac2 100644 --- a/test/pargen/Makefile +++ b/test/pargen/Makefile @@ -5,7 +5,7 @@ include $(INCDIR)/Make.inc.psblas # # Libraries used LIBDIR=$(INSTALLDIR)/lib -PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base +PSBLAS_LIB= -L$(LIBDIR) -lpsb_openacc -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base LDLIBS=$(PSBLDLIBS) # # Compilers and such @@ -25,6 +25,10 @@ psb_d_pde3d: psb_d_pde3d.o $(FLINK) psb_d_pde3d.o -o psb_d_pde3d $(PSBLAS_LIB) $(LDLIBS) /bin/mv psb_d_pde3d $(EXEDIR) +psb_d_oacc_pde3d: + mpifort -fallow-argument-mismatch -frecursive -g -O3 -frecursive -I../../modules/ -I. -DOPENACC -DHAVE_LAPACK -DHAVE_FLUSH_STMT -DLPK8 -DIPK4 -DMPI_MOD -c psb_d_oacc_pde3d.F90 -o psb_d_oacc_pde3d.o + $(FLINK) -fopenacc -DOPENACC psb_d_oacc_pde3d.o -o psb_d_oacc_pde3d $(PSBLAS_LIB) $(LDLIBS) + /bin/mv psb_d_oacc_pde3d $(EXEDIR) psb_s_pde3d: psb_s_pde3d.o $(FLINK) psb_s_pde3d.o -o psb_s_pde3d $(PSBLAS_LIB) $(LDLIBS) @@ -41,7 +45,7 @@ psb_s_pde2d: psb_s_pde2d.o clean: - /bin/rm -f psb_d_pde3d.o psb_s_pde3d.o psb_d_pde2d.o psb_s_pde2d.o *$(.mod) \ + /bin/rm -f psb_d_pde3d.o psb_d_oacc_pde3d.o psb_s_pde3d.o psb_d_pde2d.o psb_s_pde2d.o *$(.mod) \ $(EXEDIR)/psb_d_pde3d $(EXEDIR)/psb_s_pde3d $(EXEDIR)/psb_d_pde2d $(EXEDIR)/psb_s_pde2d verycleanlib: (cd ../..; make veryclean) diff --git a/test/pargen/psb_d_oacc_pde3d.F90 b/test/pargen/psb_d_oacc_pde3d.F90 new file mode 100644 index 00000000..ac992884 --- /dev/null +++ b/test/pargen/psb_d_oacc_pde3d.F90 @@ -0,0 +1,1073 @@ +! +! Parallel Sparse BLAS version 3.5 +! (C) Copyright 2006-2018 +! Salvatore Filippone +! Alfredo Buttari +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions +! are met: +! 1. Redistributions of source code must retain the above copyright +! notice, this list of conditions and the following disclaimer. +! 2. Redistributions in binary form must reproduce the above copyright +! notice, this list of conditions, and the following disclaimer in the +! documentation and/or other materials provided with the distribution. +! 3. The name of the PSBLAS group or the names of its contributors may +! not be used to endorse or promote products derived from this +! software without specific written permission. +! +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS +! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +! POSSIBILITY OF SUCH DAMAGE. +! +! +! File: psb_d_pde3d.f90 +! +! Program: psb_d_pde3d +! This sample program solves a linear system obtained by discretizing a +! PDE with Dirichlet BCs. +! +! +! The PDE is a general second order equation in 3d +! +! a1 dd(u) a2 dd(u) a3 dd(u) b1 d(u) b2 d(u) b3 d(u) +! - ------ - ------ - ------ + ----- + ------ + ------ + c u = f +! dxdx dydy dzdz dx dy dz +! +! with Dirichlet boundary conditions +! u = g +! +! on the unit cube 0<=x,y,z<=1. +! +! +! Note that if b1=b2=b3=c=0., the PDE is the Laplace equation. +! +! There are three choices available for data distribution: +! 1. A simple BLOCK distribution +! 2. A ditribution based on arbitrary assignment of indices to processes, +! typically from a graph partitioner +! 3. A 3D distribution in which the unit cube is partitioned +! into subcubes, each one assigned to a process. +! +! +module psb_d_pde3d_mod + + + use psb_base_mod, only : psb_dpk_, psb_ipk_, psb_lpk_, psb_desc_type,& + & psb_dspmat_type, psb_d_vect_type, dzero,& + & psb_d_base_sparse_mat, psb_d_base_vect_type, & + & psb_i_base_vect_type, psb_l_base_vect_type + + interface + function d_func_3d(x,y,z) result(val) + import :: psb_dpk_ + real(psb_dpk_), intent(in) :: x,y,z + real(psb_dpk_) :: val + end function d_func_3d + end interface + + interface psb_gen_pde3d + module procedure psb_d_gen_pde3d + end interface psb_gen_pde3d + +contains + + function d_null_func_3d(x,y,z) result(val) + + real(psb_dpk_), intent(in) :: x,y,z + real(psb_dpk_) :: val + + val = dzero + + end function d_null_func_3d + ! + ! functions parametrizing the differential equation + ! + + ! + ! Note: b1, b2 and b3 are the coefficients of the first + ! derivative of the unknown function. The default + ! we apply here is to have them zero, so that the resulting + ! matrix is symmetric/hermitian and suitable for + ! testing with CG and FCG. + ! When testing methods for non-hermitian matrices you can + ! change the B1/B2/B3 functions to e.g. done/sqrt((3*done)) + ! + function b1(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: b1 + real(psb_dpk_), intent(in) :: x,y,z + b1=dzero + end function b1 + function b2(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: b2 + real(psb_dpk_), intent(in) :: x,y,z + b2=dzero + end function b2 + function b3(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: b3 + real(psb_dpk_), intent(in) :: x,y,z + b3=dzero + end function b3 + function c(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: c + real(psb_dpk_), intent(in) :: x,y,z + c=dzero + end function c + function a1(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: a1 + real(psb_dpk_), intent(in) :: x,y,z + a1=done/80 + end function a1 + function a2(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: a2 + real(psb_dpk_), intent(in) :: x,y,z + a2=done/80 + end function a2 + function a3(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: a3 + real(psb_dpk_), intent(in) :: x,y,z + a3=done/80 + end function a3 + function g(x,y,z) + use psb_base_mod, only : psb_dpk_, done, dzero + implicit none + real(psb_dpk_) :: g + real(psb_dpk_), intent(in) :: x,y,z + g = dzero + if (x == done) then + g = done + else if (x == dzero) then + g = exp(y**2-z**2) + end if + end function g + + + ! + ! subroutine to allocate and fill in the coefficient matrix and + ! the rhs. + ! + subroutine psb_d_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,afmt,info,& + & f,amold,vmold,imold,partition,nrl,iv) + use psb_base_mod + use psb_util_mod +#if defined(OPENMP) + use omp_lib +#endif + ! + ! Discretizes the partial differential equation + ! + ! a1 dd(u) a2 dd(u) a3 dd(u) b1 d(u) b2 d(u) b3 d(u) + ! - ------ - ------ - ------ + ----- + ------ + ------ + c u = f + ! dxdx dydy dzdz dx dy dz + ! + ! with Dirichlet boundary conditions + ! u = g + ! + ! on the unit cube 0<=x,y,z<=1. + ! + ! + ! Note that if b1=b2=b3=c=0., the PDE is the Laplace equation. + ! + implicit none + integer(psb_ipk_) :: idim + type(psb_dspmat_type) :: a + type(psb_d_vect_type) :: xv,bv + type(psb_desc_type) :: desc_a + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: info + character(len=*) :: afmt + procedure(d_func_3d), optional :: f + class(psb_d_base_sparse_mat), optional :: amold + class(psb_d_base_vect_type), optional :: vmold + class(psb_i_base_vect_type), optional :: imold + integer(psb_ipk_), optional :: partition, nrl,iv(:) + + ! Local variables. + + integer(psb_ipk_), parameter :: nb=20 + type(psb_d_csc_sparse_mat) :: acsc + type(psb_d_coo_sparse_mat) :: acoo + type(psb_d_csr_sparse_mat) :: acsr + real(psb_dpk_) :: zt(nb),x,y,z + integer(psb_ipk_) :: nnz,nr,nlr,i,j,ii,ib,k, partition_, mysz + integer(psb_lpk_) :: m,n,glob_row,nt + integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner + ! For 3D partition + ! Note: integer control variables going directly into an MPI call + ! must be 4 bytes, i.e. psb_mpk_ + integer(psb_mpk_) :: npdims(3), npp, minfo + integer(psb_ipk_) :: npx,npy,npz, iamx,iamy,iamz,mynx,myny,mynz + integer(psb_ipk_), allocatable :: bndx(:),bndy(:),bndz(:) + ! Process grid + integer(psb_ipk_) :: np, iam + integer(psb_ipk_) :: icoeff + integer(psb_lpk_), allocatable :: myidx(:) + ! deltah dimension of each grid cell + ! deltat discretization time + real(psb_dpk_) :: deltah, sqdeltah, deltah2 + real(psb_dpk_), parameter :: rhs=dzero,one=done,zero=dzero + real(psb_dpk_) :: t0, t1, t2, t3, tasb, talc, ttot, tgen, tcdasb + integer(psb_ipk_) :: err_act + procedure(d_func_3d), pointer :: f_ + character(len=20) :: name, ch_err,tmpfmt + + info = psb_success_ + name = 'create_matrix' + call psb_erractionsave(err_act) + + call psb_info(ctxt, iam, np) + + + if (present(f)) then + f_ => f + else + f_ => d_null_func_3d + end if + + deltah = done/(idim+1) + sqdeltah = deltah*deltah + deltah2 = (2*done)* deltah + + if (present(partition)) then + if ((1<= partition).and.(partition <= 3)) then + partition_ = partition + else + write(*,*) 'Invalid partition choice ',partition,' defaulting to 3' + partition_ = 3 + end if + else + partition_ = 3 + end if + + ! initialize array descriptor and sparse matrix storage. provide an + ! estimate of the number of non zeroes + + m = (1_psb_lpk_*idim)*idim*idim + n = m + nnz = ((n*7)/(np)) + if(iam == psb_root_) write(psb_out_unit,'("Generating Matrix (size=",i0,")...")')n + t0 = psb_wtime() + select case(partition_) + case(1) + ! A BLOCK partition + if (present(nrl)) then + nr = nrl + else + ! + ! Using a simple BLOCK distribution. + ! + nt = (m+np-1)/np + nr = max(0,min(nt,m-(iam*nt))) + end if + + nt = nr + call psb_sum(ctxt,nt) + if (nt /= m) then + write(psb_err_unit,*) iam, 'Initialization error ',nr,nt,m + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end if + + ! + ! First example of use of CDALL: specify for each process a number of + ! contiguous rows + ! + call psb_cdall(ctxt,desc_a,info,nl=nr) + myidx = desc_a%get_global_indices() + nlr = size(myidx) + + case(2) + ! A partition defined by the user through IV + + if (present(iv)) then + if (size(iv) /= m) then + write(psb_err_unit,*) iam, 'Initialization error: wrong IV size',size(iv),m + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end if + else + write(psb_err_unit,*) iam, 'Initialization error: IV not present' + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end if + + ! + ! Second example of use of CDALL: specify for each row the + ! process that owns it + ! + call psb_cdall(ctxt,desc_a,info,vg=iv) + myidx = desc_a%get_global_indices() + nlr = size(myidx) + + case(3) + ! A 3-dimensional partition + + ! A nifty MPI function will split the process list + npdims = 0 +#if defined(SERIAL_MPI) + npdims = 1 +#else + call mpi_dims_create(np,3,npdims,info) +#endif + npx = npdims(1) + npy = npdims(2) + npz = npdims(3) + + allocate(bndx(0:npx),bndy(0:npy),bndz(0:npz)) + ! We can reuse idx2ijk for process indices as well. + call idx2ijk(iamx,iamy,iamz,iam,npx,npy,npz,base=0) + ! Now let's split the 3D cube in hexahedra + call dist1Didx(bndx,idim,npx) + mynx = bndx(iamx+1)-bndx(iamx) + call dist1Didx(bndy,idim,npy) + myny = bndy(iamy+1)-bndy(iamy) + call dist1Didx(bndz,idim,npz) + mynz = bndz(iamz+1)-bndz(iamz) + + ! How many indices do I own? + nlr = mynx*myny*mynz + allocate(myidx(nlr)) + ! Now, let's generate the list of indices I own + nr = 0 + do i=bndx(iamx),bndx(iamx+1)-1 + do j=bndy(iamy),bndy(iamy+1)-1 + do k=bndz(iamz),bndz(iamz+1)-1 + nr = nr + 1 + call ijk2idx(myidx(nr),i,j,k,idim,idim,idim) + end do + end do + end do + if (nr /= nlr) then + write(psb_err_unit,*) iam,iamx,iamy,iamz, 'Initialization error: NR vs NLR ',& + & nr,nlr,mynx,myny,mynz + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + end if + + ! + ! Third example of use of CDALL: specify for each process + ! the set of global indices it owns. + ! + call psb_cdall(ctxt,desc_a,info,vl=myidx) + + ! + ! Specify process topology + ! + block + ! + ! Use adjcncy methods + ! + integer(psb_mpk_), allocatable :: neighbours(:) + integer(psb_mpk_) :: cnt + logical, parameter :: debug_adj=.true. + if (debug_adj.and.(np > 1)) then + cnt = 0 + allocate(neighbours(np)) + if (iamx < npx-1) then + cnt = cnt + 1 + call ijk2idx(neighbours(cnt),iamx+1,iamy,iamz,npx,npy,npz,base=0) + end if + if (iamy < npy-1) then + cnt = cnt + 1 + call ijk2idx(neighbours(cnt),iamx,iamy+1,iamz,npx,npy,npz,base=0) + end if + if (iamz < npz-1) then + cnt = cnt + 1 + call ijk2idx(neighbours(cnt),iamx,iamy,iamz+1,npx,npy,npz,base=0) + end if + if (iamx >0) then + cnt = cnt + 1 + call ijk2idx(neighbours(cnt),iamx-1,iamy,iamz,npx,npy,npz,base=0) + end if + if (iamy >0) then + cnt = cnt + 1 + call ijk2idx(neighbours(cnt),iamx,iamy-1,iamz,npx,npy,npz,base=0) + end if + if (iamz >0) then + cnt = cnt + 1 + call ijk2idx(neighbours(cnt),iamx,iamy,iamz-1,npx,npy,npz,base=0) + end if + call psb_realloc(cnt, neighbours,info) + call desc_a%set_p_adjcncy(neighbours) + !write(0,*) iam,' Check on neighbours: ',desc_a%get_p_adjcncy() + end if + end block + + case default + write(psb_err_unit,*) iam, 'Initialization error: should not get here' + info = -1 + call psb_barrier(ctxt) + call psb_abort(ctxt) + return + end select + + + if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz, & + & bldmode=psb_matbld_remote_,dupl=psb_dupl_add_) + ! define rhs from boundary conditions; also build initial guess + if (info == psb_success_) call psb_geall(xv,desc_a,info) + if (info == psb_success_) call psb_geall(bv,desc_a,info,& + & bldmode=psb_matbld_remote_,dupl=psb_dupl_add_) + + call psb_barrier(ctxt) + talc = psb_wtime()-t0 + + if (info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='allocation rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + call psb_barrier(ctxt) + t1 = psb_wtime() + !$omp parallel shared(deltah,myidx,a,desc_a) + ! + block + integer(psb_ipk_) :: i,j,k,ii,ib,icoeff, ix,iy,iz, ith,nth + integer(psb_lpk_) :: glob_row + integer(psb_lpk_), allocatable :: irow(:),icol(:) + real(psb_dpk_), allocatable :: val(:) + real(psb_dpk_) :: x,y,z, zt(nb) +#if defined(OPENMP) + nth = omp_get_num_threads() + ith = omp_get_thread_num() +#else + nth = 1 + ith = 0 +#endif + allocate(val(20*nb),irow(20*nb),& + &icol(20*nb),stat=info) + if (info /= psb_success_ ) then + info=psb_err_alloc_dealloc_ + call psb_errpush(info,name) + !goto 9999 + endif + + !$omp do schedule(dynamic) + ! + do ii=1, nlr, nb + if(info /= psb_success_) cycle + ib = min(nb,nlr-ii+1) + !ib = min(nb,mysz-ii+1) + icoeff = 1 + do k=1,ib + i=ii+k-1 + ! local matrix pointer + glob_row=myidx(i) + ! compute gridpoint coordinates + call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim) + ! x, y, z coordinates + x = (ix-1)*deltah + y = (iy-1)*deltah + z = (iz-1)*deltah + zt(k) = f_(x,y,z) + ! internal point: build discretization + ! + ! term depending on (x-1,y,z) + ! + val(icoeff) = -a1(x,y,z)/sqdeltah-b1(x,y,z)/deltah2 + if (ix == 1) then + zt(k) = g(dzero,y,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix-1,iy,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x,y-1,z) + val(icoeff) = -a2(x,y,z)/sqdeltah-b2(x,y,z)/deltah2 + if (iy == 1) then + zt(k) = g(x,dzero,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x,y,z-1) + val(icoeff)=-a3(x,y,z)/sqdeltah-b3(x,y,z)/deltah2 + if (iz == 1) then + zt(k) = g(x,y,dzero)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + + ! term depending on (x,y,z) + val(icoeff)=(2*done)*(a1(x,y,z)+a2(x,y,z)+a3(x,y,z))/sqdeltah & + & + c(x,y,z) + call ijk2idx(icol(icoeff),ix,iy,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + ! term depending on (x,y,z+1) + val(icoeff)=-a3(x,y,z)/sqdeltah+b3(x,y,z)/deltah2 + if (iz == idim) then + zt(k) = g(x,y,done)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy,iz+1,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x,y+1,z) + val(icoeff)=-a2(x,y,z)/sqdeltah+b2(x,y,z)/deltah2 + if (iy == idim) then + zt(k) = g(x,done,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix,iy+1,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + ! term depending on (x+1,y,z) + val(icoeff)=-a1(x,y,z)/sqdeltah+b1(x,y,z)/deltah2 + if (ix==idim) then + zt(k) = g(done,y,z)*(-val(icoeff)) + zt(k) + else + call ijk2idx(icol(icoeff),ix+1,iy,iz,idim,idim,idim) + irow(icoeff) = glob_row + icoeff = icoeff+1 + endif + + end do +#if defined(OPENMP) +!!$ write(0,*) omp_get_thread_num(),' Check insertion ',& +!!$ & irow(1:icoeff-1),':',icol(1:icoeff-1) +#endif + call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info) + if(info /= psb_success_) cycle + call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info) + if(info /= psb_success_) cycle + zt(:)=dzero + call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info) + if(info /= psb_success_) cycle + end do + !$omp end do + deallocate(val,irow,icol) + end block + !$omp end parallel + + tgen = psb_wtime()-t1 + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='insert rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + + + call psb_barrier(ctxt) + t1 = psb_wtime() + call psb_cdasb(desc_a,info,mold=imold) + tcdasb = psb_wtime()-t1 + + call psb_barrier(ctxt) + t1 = psb_wtime() + if (info == psb_success_) then + if (present(amold)) then + call psb_spasb(a,desc_a,info,mold=amold) + else + call psb_spasb(a,desc_a,info,afmt=afmt) + end if + end if + call psb_barrier(ctxt) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='asb rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + if (info == psb_success_) call psb_geasb(xv,desc_a,info,mold=vmold) + if (info == psb_success_) call psb_geasb(bv,desc_a,info,mold=vmold) + if(info /= psb_success_) then + info=psb_err_from_subroutine_ + ch_err='asb rout.' + call psb_errpush(info,name,a_err=ch_err) + goto 9999 + end if + tasb = psb_wtime()-t1 + call psb_barrier(ctxt) + ttot = psb_wtime() - t0 + + call psb_amx(ctxt,talc) + call psb_amx(ctxt,tgen) + call psb_amx(ctxt,tasb) + call psb_amx(ctxt,ttot) + if(iam == psb_root_) then + tmpfmt = a%get_fmt() + write(psb_out_unit,'("The matrix has been generated and assembled in ",a3," format.")')& + & tmpfmt + write(psb_out_unit,'("-allocation time : ",es12.5)') talc + write(psb_out_unit,'("-coeff. gen. time : ",es12.5)') tgen + write(psb_out_unit,'("-desc asbly time : ",es12.5)') tcdasb + write(psb_out_unit,'("- mat asbly time : ",es12.5)') tasb + write(psb_out_unit,'("-total time : ",es12.5)') ttot + + end if + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(ctxt,err_act) + + return + end subroutine psb_d_gen_pde3d + function outside(i,j,k,bndx,bndy,bndz,iamx,iamy,iamz) result(res) + logical :: res + integer(psb_ipk_), intent(in) :: i,j,k,iamx,iamy,iamz + integer(psb_ipk_), intent(in) :: bndx(0:),bndy(0:),bndz(0:) + + res = (i=bndx(iamx+1)) & + & .or.(j=bndy(iamy+1)) & + & .or.(k=bndz(iamz+1)) + end function outside +end module psb_d_pde3d_mod + +program psb_d_oacc_pde3d + use psb_base_mod + use psb_prec_mod + use psb_krylov_mod + use psb_util_mod + use psb_d_pde3d_mod +#if defined(OPENACC) + use psb_oacc_mod +#endif + implicit none + + ! input parameters + character(len=20) :: kmethd, ptype + character(len=5) :: afmt, agfmt + integer(psb_ipk_) :: idim + integer(psb_epk_) :: system_size + + ! miscellaneous + real(psb_dpk_), parameter :: one = done + real(psb_dpk_) :: t1, t2, tprec + + ! sparse matrix and preconditioner + type(psb_dspmat_type) :: a, agpu + type(psb_dprec_type) :: prec + ! descriptor + type(psb_desc_type) :: desc_a + ! dense vectors + type(psb_d_vect_type), target :: xxv, bv, xg, bg +#ifdef OPENACC + type(psb_d_vect_oacc) :: vmold + type(psb_i_vect_oacc) :: imold + type(psb_d_oacc_csr_sparse_mat) :: acsrg +#endif + real(psb_dpk_), allocatable :: x0(:) + ! parallel environment + type(psb_ctxt_type) :: ctxt + integer(psb_ipk_) :: iam, np, nth + + ! solver parameters + integer(psb_ipk_) :: iter, itmax, itrace, istopc, irst, ipart + integer(psb_epk_) :: amatsize, precsize, descsize, d2size + real(psb_dpk_) :: err, eps + + ! Parameters for solvers in Block-Jacobi preconditioner + type ainvparms + character(len=12) :: alg, orth_alg, ilu_alg, ilut_scale + integer(psb_ipk_) :: fill, inv_fill + real(psb_dpk_) :: thresh, inv_thresh + end type ainvparms + type(ainvparms) :: parms + + ! other variables + integer(psb_ipk_) :: info, i + character(len=20) :: name, ch_err + character(len=40) :: fname + + info = psb_success_ + + call psb_init(ctxt) + call psb_info(ctxt, iam, np) + +#if defined(OPENACC) + call psb_oacc_init(ctxt) +#endif + + nth = 1 + + if (iam < 0) then + ! This should not happen, but just in case + call psb_exit(ctxt) + stop + endif + if (psb_errstatus_fatal()) goto 9999 + name = 'pde3d90_oacc' + call psb_set_errverbosity(itwo) + + ! Hello world + if (iam == psb_root_) then + write(*,*) 'Welcome to PSBLAS version: ', psb_version_string_ + write(*,*) 'This is the ', trim(name), ' sample program' + end if + + ! get parameters + call get_parms(ctxt, kmethd, ptype, afmt, agfmt, idim, istopc, itmax, itrace, irst, ipart, parms) + + ! allocate and fill in the coefficient matrix, rhs and initial guess + call psb_barrier(ctxt) + t1 = psb_wtime() + call psb_gen_pde3d(ctxt, idim, a, bv, xxv, desc_a, afmt, info, partition = ipart) + call psb_barrier(ctxt) + t2 = psb_wtime() - t1 + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + ch_err = 'psb_gen_pde3d' + call psb_errpush(info, name, a_err = ch_err) + goto 9999 + end if + if (iam == psb_root_) write(psb_out_unit, '("Overall matrix creation time : ", es12.5)') t2 + if (iam == psb_root_) write(psb_out_unit, '(" ")') + +#ifdef OPENACC + ! Convert matrix to GPU format + call a%cscnv(agpu, info, mold = acsrg) + if ((info /= 0) .or. (psb_get_errstatus() /= 0)) then + write(0,*) 'From cscnv ', info + call psb_error() + stop + end if + call desc_a%cnv(mold = imold) + call psb_geasb(bg, desc_a, info, scratch = .true., mold = vmold) + call psb_geasb(xg, desc_a, info, scratch = .true., mold = vmold) +#endif + + ! prepare the preconditioner. + if (iam == psb_root_) write(psb_out_unit, '("Setting preconditioner to : ", a)') ptype + call prec%init(ctxt, ptype, info) + + ! Set the options for the BJAC preconditioner + if (psb_toupper(ptype) == "BJAC") then + call prec%set('sub_solve', parms%alg, info) + select case (psb_toupper(parms%alg)) + case ("ILU") + call prec%set('sub_fillin', parms%fill, info) + call prec%set('ilu_alg', parms%ilu_alg, info) + case ("ILUT") + call prec%set('sub_fillin', parms%fill, info) + call prec%set('sub_iluthrs', parms%thresh, info) + call prec%set('ilut_scale', parms%ilut_scale, info) + case ("AINV") + call prec%set('inv_thresh', parms%inv_thresh, info) + call prec%set('inv_fillin', parms%inv_fill, info) + call prec%set('ilut_scale', parms%ilut_scale, info) + call prec%set('ainv_alg', parms%orth_alg, info) + case ("INVK") + call prec%set('sub_fillin', parms%fill, info) + call prec%set('inv_fillin', parms%inv_fill, info) + call prec%set('ilut_scale', parms%ilut_scale, info) + case ("INVT") + call prec%set('sub_fillin', parms%fill, info) + call prec%set('inv_fillin', parms%inv_fill, info) + call prec%set('sub_iluthrs', parms%thresh, info) + call prec%set('inv_thresh', parms%inv_thresh, info) + call prec%set('ilut_scale', parms%ilut_scale, info) + case default + ! Do nothing, use default setting in the init routine + end select + else + ! nothing to set for NONE or DIAG preconditioner + end if + + call psb_barrier(ctxt) + t1 = psb_wtime() + call prec%build(a, desc_a, info) + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + ch_err = 'psb_precbld' + call psb_errpush(info, name, a_err = ch_err) + goto 9999 + end if + + tprec = psb_wtime() - t1 + + call psb_amx(ctxt, tprec) + + if (iam == psb_root_) write(psb_out_unit, '("Preconditioner time : ", es12.5)') tprec + if (iam == psb_root_) write(psb_out_unit, '(" ")') + call prec%descr(info) + + ! iterative method parameters + if (iam == psb_root_) write(psb_out_unit, '("Calling iterative method ", a)') kmethd + call psb_barrier(ctxt) + t1 = psb_wtime() + eps = 1.d-6 + +#ifdef OPENACC + call psb_krylov(kmethd, agpu, prec, bv, xxv, eps, desc_a, info, & + itmax = itmax, iter = iter, err = err, itrace = itrace, istop = istopc, irst = irst) +#else + call psb_krylov(kmethd, a, prec, bv, xxv, eps, desc_a, info, & + itmax = itmax, iter = iter, err = err, itrace = itrace, istop = istopc, irst = irst) +#endif + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + ch_err = 'solver routine' + call psb_errpush(info, name, a_err = ch_err) + goto 9999 + end if + + call psb_barrier(ctxt) + t2 = psb_wtime() - t1 + call psb_amx(ctxt, t2) + amatsize = a%sizeof() + descsize = desc_a%sizeof() + precsize = prec%sizeof() + system_size = desc_a%get_global_rows() + call psb_sum(ctxt, amatsize) + call psb_sum(ctxt, descsize) + call psb_sum(ctxt, precsize) + + if (iam == psb_root_) then + write(psb_out_unit, '(" ")') + write(psb_out_unit, '("Number of processes : ", i12)') np + write(psb_out_unit, '("Number of threads : ", i12)') nth + write(psb_out_unit, '("Total number of tasks : ", i12)') nth * np + write(psb_out_unit, '("Linear system size : ", i12)') system_size + write(psb_out_unit, '("Time to solve system : ", es12.5)') t2 + write(psb_out_unit, '("Time per iteration : ", es12.5)') t2 / iter + write(psb_out_unit, '("Number of iterations : ", i12)') iter + write(psb_out_unit, '("Convergence indicator on exit : ", es12.5)') err + write(psb_out_unit, '("Info on exit : ", i12)') info + write(psb_out_unit, '("Total memory occupation for A: ", i12)') amatsize + write(psb_out_unit, '("Total memory occupation for PREC: ", i12)') precsize + write(psb_out_unit, '("Total memory occupation for DESC_A: ", i12)') descsize + write(psb_out_unit, '("Storage format for A: ", a)') a%get_fmt() + write(psb_out_unit, '("Storage format for DESC_A: ", a)') desc_a%get_fmt() + end if + + ! cleanup storage and exit + call psb_gefree(bv, desc_a, info) + call psb_gefree(xxv, desc_a, info) + call psb_spfree(a, desc_a, info) + call prec%free(info) + call psb_cdfree(desc_a, info) + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + ch_err = 'free routine' + call psb_errpush(info, name, a_err = ch_err) + goto 9999 + end if + +#ifdef OPENACC + call psb_oacc_exit() +#endif + call psb_exit(ctxt) + stop + +9999 call psb_error(ctxt) + + stop + +contains + ! get iteration parameters from standard input + subroutine get_parms(ctxt, kmethd, ptype, afmt, agfmt, idim, istopc, itmax, itrace, irst, ipart, parms) + type(psb_ctxt_type) :: ctxt + character(len = *) :: kmethd, ptype, afmt, agfmt + integer(psb_ipk_) :: idim, istopc, itmax, itrace, irst, ipart + integer(psb_ipk_) :: np, iam + integer(psb_ipk_) :: ip, inp_unit + character(len = 1024) :: filename + type(ainvparms) :: parms + + call psb_info(ctxt, iam, np) + + if (iam == 0) then + if (command_argument_count() > 0) then + call get_command_argument(1, filename) + inp_unit = 30 + open(inp_unit, file = filename, action = 'read', iostat = info) + if (info /= 0) then + write(psb_err_unit, *) 'Could not open file ', filename, ' for input' + call psb_abort(ctxt) + stop + else + write(psb_err_unit, *) 'Opened file ', trim(filename), ' for input' + end if + else + inp_unit = psb_inp_unit + end if + read(inp_unit, *) ip + if (ip >= 3) then + read(inp_unit, *) kmethd + read(inp_unit, *) ptype + read(inp_unit, *) afmt + read(inp_unit, *) agfmt + read(inp_unit, *) idim + if (ip >= 4) then + read(inp_unit, *) ipart + else + ipart = 3 + endif + if (ip >= 5) then + read(inp_unit, *) istopc + else + istopc = 1 + endif + if (ip >= 6) then + read(inp_unit, *) itmax + else + itmax = 500 + endif + if (ip >= 7) then + read(inp_unit, *) itrace + else + itrace = -1 + endif + if (ip >= 8) then + read(inp_unit, *) irst + else + irst = 1 + endif + if (ip >= 9) then + read(inp_unit, *) parms%alg + read(inp_unit, *) parms%ilu_alg + read(inp_unit, *) parms%ilut_scale + read(inp_unit, *) parms%fill + read(inp_unit, *) parms%inv_fill + read(inp_unit, *) parms%thresh + read(inp_unit, *) parms%inv_thresh + read(inp_unit, *) parms%orth_alg + else + parms%alg = 'ILU' ! Block Solver ILU, ILUT, INVK, AINVT, AORTH + parms%ilu_alg = 'NONE' ! If ILU : MILU or NONE otherwise ignored + parms%ilut_scale = 'NONE' ! If ILUT: NONE, MAXVAL, DIAG, ARWSUM, ACLSUM, ARCSUM + parms%fill = 0 ! Level of fill for forward factorization + parms%inv_fill = 1 ! Level of fill for inverse factorization (only INVK) + parms%thresh = 1E-1_psb_dpk_ ! Threshold for forward factorization + parms%inv_thresh = 1E-1_psb_dpk_ ! Threshold for inverse factorization + parms%orth_alg = 'LLK' ! What orthogonalization algorithm? + endif + + write(psb_out_unit, '("Solving matrix : ell1")') + write(psb_out_unit, & + '("Grid dimensions : ", i4, " x ", i4, " x ", i4)') & + idim, idim, idim + write(psb_out_unit, '("Number of processors : ", i0)') np + select case (ipart) + case (1) + write(psb_out_unit, '("Data distribution : BLOCK")') + case (3) + write(psb_out_unit, '("Data distribution : 3D")') + case default + ipart = 3 + write(psb_out_unit, '("Unknown data distrbution, defaulting to 3D")') + end select + write(psb_out_unit, '("Preconditioner : ", a)') ptype + if (psb_toupper(ptype) == "BJAC") then + write(psb_out_unit, '("Block subsolver : ", a)') parms%alg + select case (psb_toupper(parms%alg)) + case ('ILU') + write(psb_out_unit, '("Fill in : ", i0)') parms%fill + write(psb_out_unit, '("MILU : ", a)') parms%ilu_alg + case ('ILUT') + write(psb_out_unit, '("Fill in : ", i0)') parms%fill + write(psb_out_unit, '("Threshold : ", es12.5)') parms%thresh + write(psb_out_unit, '("Scaling : ", a)') parms%ilut_scale + case ('INVK') + write(psb_out_unit, '("Fill in : ", i0)') parms%fill + write(psb_out_unit, '("Invese Fill in : ", i0)') parms%inv_fill + write(psb_out_unit, '("Scaling : ", a)') parms%ilut_scale + case ('INVT') + write(psb_out_unit, '("Fill in : ", i0)') parms%fill + write(psb_out_unit, '("Threshold : ", es12.5)') parms%thresh + write(psb_out_unit, '("Invese Fill in : ", i0)') parms%inv_fill + write(psb_out_unit, '("Inverse Threshold : ", es12.5)') parms%inv_thresh + write(psb_out_unit, '("Scaling : ", a)') parms%ilut_scale + case ('AINV', 'AORTH') + write(psb_out_unit, '("Inverse Threshold : ", es12.5)') parms%inv_thresh + write(psb_out_unit, '("Invese Fill in : ", i0)') parms%inv_fill + write(psb_out_unit, '("Orthogonalization : ", a)') parms%orth_alg + write(psb_out_unit, '("Scaling : ", a)') parms%ilut_scale + case default + write(psb_out_unit, '("Unknown diagonal solver")') + end select + end if + write(psb_out_unit, '("Iterative method : ", a)') kmethd + write(psb_out_unit, '(" ")') + else + ! wrong number of parameter, print an error message and exit + call pr_usage(izero) + call psb_abort(ctxt) + stop 1 + endif + if (inp_unit /= psb_inp_unit) then + close(inp_unit) + end if + end if + ! broadcast parameters to all processors + call psb_bcast(ctxt, kmethd) + call psb_bcast(ctxt, afmt) + call psb_bcast(ctxt, agfmt) + call psb_bcast(ctxt, ptype) + call psb_bcast(ctxt, idim) + call psb_bcast(ctxt, ipart) + call psb_bcast(ctxt, istopc) + call psb_bcast(ctxt, itmax) + call psb_bcast(ctxt, itrace) + call psb_bcast(ctxt, irst) + call psb_bcast(ctxt, parms%alg) + call psb_bcast(ctxt, parms%fill) + call psb_bcast(ctxt, parms%inv_fill) + call psb_bcast(ctxt, parms%thresh) + call psb_bcast(ctxt, parms%inv_thresh) + call psb_bcast(ctxt, parms%orth_alg) + call psb_bcast(ctxt, parms%ilut_scale) + + return + + end subroutine get_parms + + ! print an error message + subroutine pr_usage(iout) + integer(psb_ipk_) :: iout + write(iout, *) 'incorrect parameter(s) found' + write(iout, *) ' usage: pde3d90 methd prec dim &' + write(iout, *) '[istop itmax itrace]' + write(iout, *) ' where:' + write(iout, *) ' methd: cgstab cgs rgmres bicgstabl' + write(iout, *) ' prec : bjac diag none' + write(iout, *) ' dim number of points along each axis' + write(iout, *) ' the size of the resulting linear ' + write(iout, *) ' system is dim**3' + write(iout, *) ' ipart data partition 1 3 ' + write(iout, *) ' istop stopping criterion 1, 2 ' + write(iout, *) ' itmax maximum number of iterations [500] ' + write(iout, *) ' itrace <=0 (no tracing, default) or ' + write(iout, *) ' >= 1 do tracing every itrace' + write(iout, *) ' iterations ' + end subroutine pr_usage + +end program psb_d_oacc_pde3d From 08a9744413475b756341bbd9cd47e3b28aefd069 Mon Sep 17 00:00:00 2001 From: tloloum Date: Fri, 19 Jul 2024 13:01:07 +0200 Subject: [PATCH 17/39] moving test --- test/openacc/Makefile | 7 ++++++- test/{pargen => openacc}/psb_d_oacc_pde3d.F90 | 0 test/pargen/Makefile | 7 +------ 3 files changed, 7 insertions(+), 7 deletions(-) rename test/{pargen => openacc}/psb_d_oacc_pde3d.F90 (100%) diff --git a/test/openacc/Makefile b/test/openacc/Makefile index 9d62f3cd..a2827b30 100644 --- a/test/openacc/Makefile +++ b/test/openacc/Makefile @@ -9,7 +9,7 @@ INCDIR=$(TOPDIR)/include MODDIR=$(TOPDIR)/modules EXEDIR=./runs -PSBLAS_LIB= -L$(LIBDIR) -L$(PSBLIBDIR) -lpsb_openacc -lpsb_base -lpsb_ext -lpsb_util -lpsb_krylov -lpsb_prec -lopenblas -lmetis +PSBLAS_LIB= -L$(LIBDIR) -L$(PSBLIBDIR) -lpsb_openacc -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base LDLIBS=$(PSBGPULDLIBS) FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FMFLAG). $(FMFLAG)$(PSBMODDIR) $(FMFLAG)$(PSBINCDIR) $(LIBRSB_DEFINES) @@ -39,6 +39,11 @@ dir: %.o: %.c $(CC) $(CFLAGS) $(FINCLUDES) -c $< -o $@ +psb_d_oacc_pde3d: + mpifort -fallow-argument-mismatch -frecursive -g -O3 -frecursive -I../../modules/ -I. -DOPENACC -DHAVE_LAPACK -DHAVE_FLUSH_STMT -DLPK8 -DIPK4 -DMPI_MOD -c psb_d_oacc_pde3d.F90 -o psb_d_oacc_pde3d.o + $(FLINK) -fopenacc -DOPENACC psb_d_oacc_pde3d.o -o psb_d_oacc_pde3d $(PSBLAS_LIB) $(LDLIBS) + /bin/mv psb_d_oacc_pde3d $(EXEDIR) + clean: /bin/rm -fr *.o *.mod $(EXEDIR)/* diff --git a/test/pargen/psb_d_oacc_pde3d.F90 b/test/openacc/psb_d_oacc_pde3d.F90 similarity index 100% rename from test/pargen/psb_d_oacc_pde3d.F90 rename to test/openacc/psb_d_oacc_pde3d.F90 diff --git a/test/pargen/Makefile b/test/pargen/Makefile index 9b720ac2..c0a1c375 100644 --- a/test/pargen/Makefile +++ b/test/pargen/Makefile @@ -5,7 +5,7 @@ include $(INCDIR)/Make.inc.psblas # # Libraries used LIBDIR=$(INSTALLDIR)/lib -PSBLAS_LIB= -L$(LIBDIR) -lpsb_openacc -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base +PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base LDLIBS=$(PSBLDLIBS) # # Compilers and such @@ -25,11 +25,6 @@ psb_d_pde3d: psb_d_pde3d.o $(FLINK) psb_d_pde3d.o -o psb_d_pde3d $(PSBLAS_LIB) $(LDLIBS) /bin/mv psb_d_pde3d $(EXEDIR) -psb_d_oacc_pde3d: - mpifort -fallow-argument-mismatch -frecursive -g -O3 -frecursive -I../../modules/ -I. -DOPENACC -DHAVE_LAPACK -DHAVE_FLUSH_STMT -DLPK8 -DIPK4 -DMPI_MOD -c psb_d_oacc_pde3d.F90 -o psb_d_oacc_pde3d.o - $(FLINK) -fopenacc -DOPENACC psb_d_oacc_pde3d.o -o psb_d_oacc_pde3d $(PSBLAS_LIB) $(LDLIBS) - /bin/mv psb_d_oacc_pde3d $(EXEDIR) - psb_s_pde3d: psb_s_pde3d.o $(FLINK) psb_s_pde3d.o -o psb_s_pde3d $(PSBLAS_LIB) $(LDLIBS) /bin/mv psb_s_pde3d $(EXEDIR) From 162b2fc78fee4bf3596d5098af69ea10e5a3f760 Mon Sep 17 00:00:00 2001 From: tloloum Date: Mon, 22 Jul 2024 15:27:33 +0200 Subject: [PATCH 18/39] beginning of oacc_ell --- openacc/Makefile | 4 +- openacc/psb_d_oacc_ell_mat_mod.F90 | 218 +++++++++++++++++++++++++++++ 2 files changed, 221 insertions(+), 1 deletion(-) create mode 100644 openacc/psb_d_oacc_ell_mat_mod.F90 diff --git a/openacc/Makefile b/openacc/Makefile index 3a752ac7..cba6b1f7 100644 --- a/openacc/Makefile +++ b/openacc/Makefile @@ -22,6 +22,7 @@ FOBJS= psb_i_oacc_vect_mod.o \ psb_d_oacc_vect_mod.o psb_d_oacc_csr_mat_mod.o \ psb_c_oacc_vect_mod.o psb_c_oacc_csr_mat_mod.o \ psb_z_oacc_vect_mod.o psb_z_oacc_csr_mat_mod.o \ + psb_d_oacc_ell_mat_mod.o \ psb_oacc_mod.o psb_oacc_env_mod.o @@ -48,6 +49,7 @@ psb_oacc_mod.o : psb_i_oacc_vect_mod.o \ psb_d_oacc_vect_mod.o psb_d_oacc_csr_mat_mod.o \ psb_c_oacc_vect_mod.o psb_c_oacc_csr_mat_mod.o \ psb_z_oacc_vect_mod.o psb_z_oacc_csr_mat_mod.o \ + psb_d_oacc_ell_mat_mod.o \ psb_oacc_env_mod.o psb_s_oacc_vect_mod.o psb_d_oacc_vect_mod.o \ @@ -58,7 +60,7 @@ psb_s_oacc_csr_mat_mod.o: psb_s_oacc_vect_mod.o psb_d_oacc_csr_mat_mod.o: psb_d_oacc_vect_mod.o psb_c_oacc_csr_mat_mod.o: psb_c_oacc_vect_mod.o psb_z_oacc_csr_mat_mod.o: psb_z_oacc_vect_mod.o - +psb_d_oacc_ell_mat_mod.o: psb_d_oacc_vect_mod.o clean: cclean iclean diff --git a/openacc/psb_d_oacc_ell_mat_mod.F90 b/openacc/psb_d_oacc_ell_mat_mod.F90 new file mode 100644 index 00000000..846707af --- /dev/null +++ b/openacc/psb_d_oacc_ell_mat_mod.F90 @@ -0,0 +1,218 @@ +module psb_d_oacc_ell_mat_mod + use iso_c_binding + use psb_d_mat_mod + use psb_d_ell_mat_mod + use psb_d_oacc_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_d_ell_sparse_mat) :: psb_d_oacc_ell_sparse_mat + integer(psb_ipk_) :: devstate = is_host + contains + procedure, nopass :: get_fmt => d_oacc_ell_get_fmt + procedure, pass(a) :: sizeof => d_oacc_ell_sizeof + procedure, pass(a) :: is_host => d_oacc_ell_is_host + procedure, pass(a) :: is_sync => d_oacc_ell_is_sync + procedure, pass(a) :: is_dev => d_oacc_ell_is_dev + procedure, pass(a) :: set_host => d_oacc_ell_set_host + procedure, pass(a) :: set_sync => d_oacc_ell_set_sync + procedure, pass(a) :: set_dev => d_oacc_ell_set_dev + procedure, pass(a) :: sync_space => d_oacc_ell_sync_space + procedure, pass(a) :: sync => d_oacc_ell_sync + procedure, pass(a) :: free => d_oacc_ell_free + + end type psb_d_oacc_ell_sparse_mat + + contains + + subroutine d_oacc_ell_free(a) + use psb_base_mod + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + if (allocated(a%val)) then + !$acc exit data delete(a%val) + end if + if (allocated(a%ja)) then + !$acc exit data delete(a%ja) + end if + if (allocated(a%irn)) then + !$acc exit data delete(a%irn) + end if + if (allocated(a%idiag)) then + !$acc exit data delete(a%idiag) + end if + + call a%psb_d_ell_sparse_mat%free() + + return + end subroutine d_oacc_ell_free + + + function d_oacc_ell_sizeof(a) result(res) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%ja) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + + end function d_oacc_ell_sizeof + + subroutine d_oacc_ell_sync_space(a) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + + if (allocated(a%val)) then + call d_oacc_create_dev(a%val) + end if + if (allocated(a%ja)) then + call i_oacc_create_dev(a%ja) + end if + if (allocated(a%irn)) then + call i_oacc_create_dev_scalar(a%irn) + end if + if (allocated(a%idiag)) then + call i_oacc_create_dev_scalar(a%idiag) + end if + + contains + subroutine d_oacc_create_dev(v) + implicit none + real(psb_dpk_), intent(in) :: v(:,:) + !$acc enter data copyin(v) + end subroutine d_oacc_create_dev + + subroutine i_oacc_create_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:,:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev + + subroutine i_oacc_create_dev_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev_scalar + end subroutine d_oacc_ell_sync_space + + + + function d_oacc_ell_is_host(a) result(res) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function d_oacc_ell_is_host + + function d_oacc_ell_is_sync(a) result(res) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function d_oacc_ell_is_sync + + function d_oacc_ell_is_dev(a) result(res) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function d_oacc_ell_is_dev + + subroutine d_oacc_ell_set_host(a) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine d_oacc_ell_set_host + + subroutine d_oacc_ell_set_sync(a) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine d_oacc_ell_set_sync + + subroutine d_oacc_ell_set_dev(a) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine d_oacc_ell_set_dev + + function d_oacc_ell_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELL_oacc' + end function d_oacc_ell_get_fmt + + subroutine d_oacc_ell_sync(a) + implicit none + class(psb_d_oacc_ell_sparse_mat), target, intent(in) :: a + class(psb_d_oacc_ell_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (a%is_dev()) then + call d_oacc_ell_to_host(a%val) + call i_oacc_ell_to_host(a%ja) + call i_oacc_ell_to_host_scalar(a%irn) + call i_oacc_ell_to_host_scalar(a%idiag) + else if (a%is_host()) then + call d_oacc_ell_to_dev(a%val) + call i_oacc_ell_to_dev(a%ja) + call i_oacc_ell_to_dev_scalar(a%irn) + call i_oacc_ell_to_dev_scalar(a%idiag) + end if + call tmpa%set_sync() + end subroutine d_oacc_ell_sync + + subroutine d_oacc_ell_to_host(v) + implicit none + real(psb_dpk_), intent(in) :: v(:,:) + !$acc update self(v) + end subroutine d_oacc_ell_to_host + + subroutine d_oacc_ell_to_host_scalar(v) + implicit none + real(psb_dpk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine d_oacc_ell_to_host_scalar + + subroutine i_oacc_ell_to_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:,:) + !$acc update device(v) + end subroutine i_oacc_ell_to_dev + + subroutine i_oacc_ell_to_dev_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine i_oacc_ell_to_dev_scalar + + subroutine i_oacc_ell_to_host(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:,:) + !$acc update self(v) + end subroutine i_oacc_ell_to_host + + subroutine i_oacc_ell_to_host_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine i_oacc_ell_to_host_scalar + + +end module psb_d_oacc_ell_mat_mod \ No newline at end of file From 10ec5eafabe405d9ee2ebb1c0be5a9bdca13f9c8 Mon Sep 17 00:00:00 2001 From: tloloum Date: Fri, 26 Jul 2024 09:56:32 +0200 Subject: [PATCH 19/39] ELL oacc impl --- openacc/impl/Makefile | 15 ++- openacc/impl/psb_d_oacc_ell_allocate_mnnz.F90 | 47 +++++++ openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 | 78 +++++++++++ openacc/impl/psb_d_oacc_ell_cp_from_fmt.F90 | 24 ++++ openacc/impl/psb_d_oacc_ell_csmm.F90 | 86 ++++++++++++ openacc/impl/psb_d_oacc_ell_csmv.F90 | 82 ++++++++++++ openacc/impl/psb_d_oacc_ell_inner_vect_sv.F90 | 85 ++++++++++++ openacc/impl/psb_d_oacc_ell_mold.F90 | 34 +++++ openacc/impl/psb_d_oacc_ell_mv_from_coo.F90 | 25 ++++ openacc/impl/psb_d_oacc_ell_mv_from_fmt.F90 | 24 ++++ openacc/impl/psb_d_oacc_ell_reallocate_nz.F90 | 28 ++++ openacc/impl/psb_d_oacc_ell_scal.F90 | 58 ++++++++ openacc/impl/psb_d_oacc_ell_scals.F90 | 39 ++++++ openacc/impl/psb_d_oacc_ell_vect_mv.F90 | 66 +++++++++ openacc/psb_d_oacc_ell_mat_mod.F90 | 125 ++++++++++++++++++ 15 files changed, 815 insertions(+), 1 deletion(-) create mode 100644 openacc/impl/psb_d_oacc_ell_allocate_mnnz.F90 create mode 100644 openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 create mode 100644 openacc/impl/psb_d_oacc_ell_cp_from_fmt.F90 create mode 100644 openacc/impl/psb_d_oacc_ell_csmm.F90 create mode 100644 openacc/impl/psb_d_oacc_ell_csmv.F90 create mode 100644 openacc/impl/psb_d_oacc_ell_inner_vect_sv.F90 create mode 100644 openacc/impl/psb_d_oacc_ell_mold.F90 create mode 100644 openacc/impl/psb_d_oacc_ell_mv_from_coo.F90 create mode 100644 openacc/impl/psb_d_oacc_ell_mv_from_fmt.F90 create mode 100644 openacc/impl/psb_d_oacc_ell_reallocate_nz.F90 create mode 100644 openacc/impl/psb_d_oacc_ell_scal.F90 create mode 100644 openacc/impl/psb_d_oacc_ell_scals.F90 create mode 100644 openacc/impl/psb_d_oacc_ell_vect_mv.F90 diff --git a/openacc/impl/Makefile b/openacc/impl/Makefile index 32f104dd..a38c703d 100755 --- a/openacc/impl/Makefile +++ b/openacc/impl/Makefile @@ -70,7 +70,20 @@ psb_z_oacc_csr_mv_from_coo.o \ psb_z_oacc_csr_mv_from_fmt.o \ psb_z_oacc_csr_mold.o \ psb_z_oacc_mlt_v_2.o \ -psb_z_oacc_mlt_v.o +psb_z_oacc_mlt_v.o \ +psb_d_oacc_ell_vect_mv.o \ +psb_d_oacc_ell_inner_vect_sv.o \ +psb_d_oacc_ell_csmm.o \ +psb_d_oacc_ell_csmv.o \ +psb_d_oacc_ell_scals.o \ +psb_d_oacc_ell_scal.o \ +psb_d_oacc_ell_reallocate_nz.o \ +psb_d_oacc_ell_allocate_mnnz.o \ +psb_d_oacc_ell_cp_from_coo.o \ +psb_d_oacc_ell_cp_from_fmt.o \ +psb_d_oacc_ell_mv_from_coo.o \ +psb_d_oacc_ell_mv_from_fmt.o \ +psb_d_oacc_ell_mold.o \ objs: $(OBJS) diff --git a/openacc/impl/psb_d_oacc_ell_allocate_mnnz.F90 b/openacc/impl/psb_d_oacc_ell_allocate_mnnz.F90 new file mode 100644 index 00000000..4923e12c --- /dev/null +++ b/openacc/impl/psb_d_oacc_ell_allocate_mnnz.F90 @@ -0,0 +1,47 @@ +submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_allocate_mnnz_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_ell_allocate_mnnz(m, n, a, nz) + implicit none + integer(psb_ipk_), intent(in) :: m, n + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act, nz_ + character(len=20) :: name='allocate_mnnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(nz)) then + nz_ = nz + else + nz_ = 10 + end if + + call a%psb_d_ell_sparse_mat%allocate(m, n, nz_) + + if (.not.allocated(a%val)) then + allocate(a%val(m, nz_)) + allocate(a%ja(m, nz_)) + allocate(a%irn(m)) + allocate(a%idiag(m)) + end if + + a%val = 0.0_psb_dpk_ + a%ja = -1 + a%irn = 0 + a%idiag = 0 + + call a%set_dev() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_ell_allocate_mnnz +end submodule psb_d_oacc_ell_allocate_mnnz_impl diff --git a/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 b/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 new file mode 100644 index 00000000..560d3451 --- /dev/null +++ b/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 @@ -0,0 +1,78 @@ +submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_cp_from_coo_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_ell_cp_from_coo(a, b, info) + implicit none + + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, j, k, row, col, nz_per_row + real(psb_dpk_) :: value + integer(psb_ipk_), allocatable :: row_counts(:) + integer(psb_ipk_) :: hacksize, nza + + info = psb_success_ + hacksize = 1 + + call a%set_nrows(b%get_nrows()) + call a%set_ncols(b%get_ncols()) + nz_per_row = a%nzt + + if (.not.allocated(a%val)) then + allocate(a%val(a%get_nrows(), nz_per_row)) + allocate(a%ja(a%get_nrows(), nz_per_row)) + allocate(a%irn(a%get_nrows())) + allocate(a%idiag(a%get_nrows())) + end if + a%val = 0.0_psb_dpk_ + a%ja = -1 + a%irn = 0 + a%idiag = 0 + + allocate(row_counts(a%get_nrows())) + row_counts = 0 + + nza = b%get_nzeros() + + !$acc parallel loop present(b, a, row_counts) + do k = 1, nza + row = b%ia(k) + col = b%ja(k) + value = b%val(k) + if (row_counts(row) < nz_per_row) then + a%val(row, row_counts(row) + 1) = value + a%ja(row, row_counts(row) + 1) = col + row_counts(row) = row_counts(row) + 1 + else + info = psb_err_invalid_mat_state_ + goto 9999 + end if + end do + + a%irn = row_counts + + !$acc parallel loop present(a) + do i = 1, a%get_nrows() + do j = 1, nz_per_row + if (a%ja(i, j) == i) then + a%idiag(i) = j + exit + end if + end do + end do + + deallocate(row_counts) + + call a%set_dev() + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_d_oacc_ell_cp_from_coo +end submodule psb_d_oacc_ell_cp_from_coo_impl diff --git a/openacc/impl/psb_d_oacc_ell_cp_from_fmt.F90 b/openacc/impl/psb_d_oacc_ell_cp_from_fmt.F90 new file mode 100644 index 00000000..b62a40d5 --- /dev/null +++ b/openacc/impl/psb_d_oacc_ell_cp_from_fmt.F90 @@ -0,0 +1,24 @@ +submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_cp_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_ell_cp_from_fmt(a, b, info) + implicit none + + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_d_coo_sparse_mat) + call a%cp_from_coo(b, info) + class default + call a%psb_d_ell_sparse_mat%cp_from_fmt(b, info) + if (info /= 0) return + + !$acc update device(a%val, a%ja, a%irn, a%idiag) + end select + + end subroutine psb_d_oacc_ell_cp_from_fmt +end submodule psb_d_oacc_ell_cp_from_fmt_impl diff --git a/openacc/impl/psb_d_oacc_ell_csmm.F90 b/openacc/impl/psb_d_oacc_ell_csmm.F90 new file mode 100644 index 00000000..6515a306 --- /dev/null +++ b/openacc/impl/psb_d_oacc_ell_csmm.F90 @@ -0,0 +1,86 @@ +submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_csmm_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_ell_csmm(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + real(psb_dpk_), intent(in) :: x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i, j, m, n, k, nxy, nzt + logical :: tra + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'd_oacc_ell_csmm' + logical, parameter :: debug = .false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) < n) then + info = 36 + call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) + goto 9999 + end if + + if (size(y,1) < m) then + info = 36 + call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) + goto 9999 + end if + + if (tra) then + call a%psb_d_ell_sparse_mat%spmm(alpha, x, beta, y, info, trans) + else + nxy = min(size(x,2), size(y,2)) + nzt = a%nzt + + !$acc parallel loop collapse(2) present(a, x, y) + do j = 1, nxy + do i = 1, m + y(i,j) = beta * y(i,j) + end do + end do + + !$acc parallel loop collapse(2) present(a, x, y) + do j = 1, nxy + do i = 1, n + do k = 1, nzt + y(i, j) = y(i, j) + alpha * a%val(i, k) * x(a%ja(i, k), j) + end do + end do + end do + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_ell_csmm +end submodule psb_d_oacc_ell_csmm_impl diff --git a/openacc/impl/psb_d_oacc_ell_csmv.F90 b/openacc/impl/psb_d_oacc_ell_csmv.F90 new file mode 100644 index 00000000..8d5e2aaa --- /dev/null +++ b/openacc/impl/psb_d_oacc_ell_csmv.F90 @@ -0,0 +1,82 @@ +submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_csmv_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_ell_csmv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + real(psb_dpk_), intent(in) :: x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i, j, m, n, nzt + logical :: tra + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'd_oacc_ell_csmv' + logical, parameter :: debug = .false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) < n) then + info = 36 + call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) + goto 9999 + end if + + if (size(y,1) < m) then + info = 36 + call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) + goto 9999 + end if + + if (tra) then + call a%psb_d_ell_sparse_mat%spmm(alpha, x, beta, y, info, trans) + else + nzt = a%nzt + + !$acc parallel loop present(a, x, y) + do i = 1, m + y(i) = beta * y(i) + end do + + !$acc parallel loop present(a, x, y) + do i = 1, m + do j = 1, nzt + y(i) = y(i) + alpha * a%val(i, j) * x(a%ja(i, j)) + end do + end do + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_ell_csmv +end submodule psb_d_oacc_ell_csmv_impl diff --git a/openacc/impl/psb_d_oacc_ell_inner_vect_sv.F90 b/openacc/impl/psb_d_oacc_ell_inner_vect_sv.F90 new file mode 100644 index 00000000..2c5563ce --- /dev/null +++ b/openacc/impl/psb_d_oacc_ell_inner_vect_sv.F90 @@ -0,0 +1,85 @@ +submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_inner_vect_sv_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_ell_inner_vect_sv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + real(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'd_oacc_ell_inner_vect_sv' + logical, parameter :: debug = .false. + integer(psb_ipk_) :: i, j, nzt + + call psb_get_erraction(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra .or. (beta /= dzero)) then + call x%sync() + call y%sync() + call a%psb_d_ell_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans) + call y%set_host() + else + select type (xx => x) + type is (psb_d_vect_oacc) + select type(yy => y) + type is (psb_d_vect_oacc) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + nzt = a%nzt + !$acc parallel loop present(a, xx, yy) + do i = 1, size(a%val, 1) + do j = 1, nzt + yy%v(i) = alpha * a%val(i, j) * xx%v(a%ja(i, j)) + beta * yy%v(i) + end do + end do + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_d_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_d_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + endif + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info, name, a_err = 'ell_vect_sv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + end subroutine psb_d_oacc_ell_inner_vect_sv +end submodule psb_d_oacc_ell_inner_vect_sv_impl diff --git a/openacc/impl/psb_d_oacc_ell_mold.F90 b/openacc/impl/psb_d_oacc_ell_mold.F90 new file mode 100644 index 00000000..767e7f13 --- /dev/null +++ b/openacc/impl/psb_d_oacc_ell_mold.F90 @@ -0,0 +1,34 @@ +submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_mold_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_ell_mold(a, b, info) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'ell_mold' + logical, parameter :: debug = .false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b, stat=info) + end if + if (info == 0) allocate(psb_d_oacc_ell_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + + end subroutine psb_d_oacc_ell_mold +end submodule psb_d_oacc_ell_mold_impl diff --git a/openacc/impl/psb_d_oacc_ell_mv_from_coo.F90 b/openacc/impl/psb_d_oacc_ell_mv_from_coo.F90 new file mode 100644 index 00000000..688d182b --- /dev/null +++ b/openacc/impl/psb_d_oacc_ell_mv_from_coo.F90 @@ -0,0 +1,25 @@ +submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_mv_from_coo_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_ell_mv_from_coo(a, b, info) + implicit none + + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_d_ell_sparse_mat%mv_from_coo(b, info) + if (info /= 0) goto 9999 + + !$acc update device(a%val, a%ja, a%irn, a%idiag) + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_d_oacc_ell_mv_from_coo +end submodule psb_d_oacc_ell_mv_from_coo_impl diff --git a/openacc/impl/psb_d_oacc_ell_mv_from_fmt.F90 b/openacc/impl/psb_d_oacc_ell_mv_from_fmt.F90 new file mode 100644 index 00000000..8bda6e6e --- /dev/null +++ b/openacc/impl/psb_d_oacc_ell_mv_from_fmt.F90 @@ -0,0 +1,24 @@ +submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_mv_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_ell_mv_from_fmt(a, b, info) + implicit none + + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_d_coo_sparse_mat) + call a%mv_from_coo(b, info) + class default + call a%psb_d_ell_sparse_mat%mv_from_fmt(b, info) + if (info /= 0) return + + !$acc update device(a%val, a%ja, a%irn, a%idiag) + end select + + end subroutine psb_d_oacc_ell_mv_from_fmt +end submodule psb_d_oacc_ell_mv_from_fmt_impl diff --git a/openacc/impl/psb_d_oacc_ell_reallocate_nz.F90 b/openacc/impl/psb_d_oacc_ell_reallocate_nz.F90 new file mode 100644 index 00000000..11332472 --- /dev/null +++ b/openacc/impl/psb_d_oacc_ell_reallocate_nz.F90 @@ -0,0 +1,28 @@ +submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_reallocate_nz_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_ell_reallocate_nz(nz, a) + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_oacc_ell_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_d_ell_sparse_mat%reallocate(nz) + + call a%set_dev() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_ell_reallocate_nz +end submodule psb_d_oacc_ell_reallocate_nz_impl diff --git a/openacc/impl/psb_d_oacc_ell_scal.F90 b/openacc/impl/psb_d_oacc_ell_scal.F90 new file mode 100644 index 00000000..39948d5f --- /dev/null +++ b/openacc/impl/psb_d_oacc_ell_scal.F90 @@ -0,0 +1,58 @@ +submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_scal_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_ell_scal(d, a, info, side) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i, j, m, nzt + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + m = a%get_nrows() + nzt = a%nzt + + if (present(side)) then + if (side == 'L') then + !$acc parallel loop collapse(2) present(a, d) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d(i) + end do + end do + else if (side == 'R') then + !$acc parallel loop collapse(2) present(a, d) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d(a%ja(i, j)) + end do + end do + end if + else + !$acc parallel loop collapse(2) present(a, d) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d(j) + end do + end do + end if + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_ell_scal +end submodule psb_d_oacc_ell_scal_impl diff --git a/openacc/impl/psb_d_oacc_ell_scals.F90 b/openacc/impl/psb_d_oacc_ell_scals.F90 new file mode 100644 index 00000000..a6292b72 --- /dev/null +++ b/openacc/impl/psb_d_oacc_ell_scals.F90 @@ -0,0 +1,39 @@ +submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_scals_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_ell_scals(d, a, info) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i, j, nzt, m + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + m = a%get_nrows() + nzt = a%nzt + + !$acc parallel loop collapse(2) present(a) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d + end do + end do + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_ell_scals +end submodule psb_d_oacc_ell_scals_impl diff --git a/openacc/impl/psb_d_oacc_ell_vect_mv.F90 b/openacc/impl/psb_d_oacc_ell_vect_mv.F90 new file mode 100644 index 00000000..ebcdb405 --- /dev/null +++ b/openacc/impl/psb_d_oacc_ell_vect_mv.F90 @@ -0,0 +1,66 @@ +submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_vect_mv_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans) + implicit none + + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: m, n, nzt + + info = psb_success_ + m = a%get_nrows() + n = a%get_ncols() + nzt = a%nzt + + if ((n /= size(x%v)) .or. (m /= size(y%v))) then + write(0,*) 'Size error ', m, n, size(x%v), size(y%v) + info = psb_err_invalid_mat_state_ + return + end if + + if (a%is_host()) call a%sync() + if (x%is_host()) call x%sync() + if (y%is_host()) call y%sync() + + call inner_spmv(m, n, nzt, alpha, a%val, a%ja, x%v, beta, y%v, info) + call y%set_dev() + + contains + + subroutine inner_spmv(m, n, nzt, alpha, val, ja, x, beta, y, info) + implicit none + integer(psb_ipk_) :: m, n, nzt + real(psb_dpk_), intent(in) :: alpha, beta + real(psb_dpk_) :: val(:,:), x(:), y(:) + integer(psb_ipk_) :: ja(:,:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, j, ii, isz + real(psb_dpk_) :: tmp + integer(psb_ipk_), parameter :: vsz = 256 + + info = 0 + + !$acc parallel loop vector_length(vsz) private(isz) + do ii = 1, m, vsz + isz = min(vsz, m - ii + 1) + !$acc loop independent private(tmp) + do i = ii, ii + isz - 1 + tmp = 0.0_psb_dpk_ + !$acc loop seq + do j = 1, nzt + if (ja(i,j) > 0) then + tmp = tmp + val(i,j) * x(ja(i,j)) + end if + end do + y(i) = alpha * tmp + beta * y(i) + end do + end do + end subroutine inner_spmv + + end subroutine psb_d_oacc_ell_vect_mv +end submodule psb_d_oacc_ell_vect_mv_impl diff --git a/openacc/psb_d_oacc_ell_mat_mod.F90 b/openacc/psb_d_oacc_ell_mat_mod.F90 index 846707af..8c5946ba 100644 --- a/openacc/psb_d_oacc_ell_mat_mod.F90 +++ b/openacc/psb_d_oacc_ell_mat_mod.F90 @@ -22,9 +22,134 @@ module psb_d_oacc_ell_mat_mod procedure, pass(a) :: sync_space => d_oacc_ell_sync_space procedure, pass(a) :: sync => d_oacc_ell_sync procedure, pass(a) :: free => d_oacc_ell_free + procedure, pass(a) :: vect_mv => psb_d_oacc_ell_vect_mv + procedure, pass(a) :: in_vect_sv => psb_d_oacc_ell_inner_vect_sv + procedure, pass(a) :: csmm => psb_d_oacc_ell_csmm + procedure, pass(a) :: csmv => psb_d_oacc_ell_csmv + procedure, pass(a) :: scals => psb_d_oacc_ell_scals + procedure, pass(a) :: scalv => psb_d_oacc_ell_scal + procedure, pass(a) :: reallocate_nz => psb_d_oacc_ell_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_oacc_ell_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_d_oacc_ell_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_oacc_ell_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_oacc_ell_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_oacc_ell_mv_from_fmt + procedure, pass(a) :: mold => psb_d_oacc_ell_mold end type psb_d_oacc_ell_sparse_mat + interface + module subroutine psb_d_oacc_ell_mold(a,b,info) + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_ell_mold + end interface + + interface + module subroutine psb_d_oacc_ell_cp_from_fmt(a,b,info) + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_ell_cp_from_fmt + end interface + + interface + module subroutine psb_d_oacc_ell_mv_from_coo(a,b,info) + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_ell_mv_from_coo + end interface + + interface + module subroutine psb_d_oacc_ell_mv_from_fmt(a,b,info) + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_ell_mv_from_fmt + end interface + + interface + module subroutine psb_d_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_ell_vect_mv + end interface + + interface + module subroutine psb_d_oacc_ell_inner_vect_sv(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x,y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_ell_inner_vect_sv + end interface + + interface + module subroutine psb_d_oacc_ell_csmm(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_ell_csmm + end interface + + interface + module subroutine psb_d_oacc_ell_csmv(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_ell_csmv + end interface + + interface + module subroutine psb_d_oacc_ell_scals(d, a, info) + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_ell_scals + end interface + + interface + module subroutine psb_d_oacc_ell_scal(d,a,info,side) + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: side + end subroutine psb_d_oacc_ell_scal + end interface + + interface + module subroutine psb_d_oacc_ell_reallocate_nz(nz,a) + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + end subroutine psb_d_oacc_ell_reallocate_nz + end interface + + interface + module subroutine psb_d_oacc_ell_allocate_mnnz(m,n,a,nz) + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: m,n + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_oacc_ell_allocate_mnnz + end interface + + interface + module subroutine psb_d_oacc_ell_cp_from_coo(a,b,info) + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_ell_cp_from_coo + end interface + contains subroutine d_oacc_ell_free(a) From e6fa1d17a2f134b792f7968d2a95bb61a4a95e84 Mon Sep 17 00:00:00 2001 From: tloloum Date: Tue, 30 Jul 2024 14:25:01 +0200 Subject: [PATCH 20/39] oacc hll --- openacc/Makefile | 7 +- openacc/impl/Makefile | 13 + openacc/impl/psb_d_oacc_hll_allocate_mnnz.F90 | 53 +++ openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 | 85 +++++ openacc/impl/psb_d_oacc_hll_cp_from_fmt.F90 | 24 ++ openacc/impl/psb_d_oacc_hll_csmm.F90 | 86 +++++ openacc/impl/psb_d_oacc_hll_csmv.F90 | 83 +++++ openacc/impl/psb_d_oacc_hll_inner_vect_sv.F90 | 86 +++++ openacc/impl/psb_d_oacc_hll_mold.F90 | 34 ++ openacc/impl/psb_d_oacc_hll_mv_from_coo.F90 | 25 ++ openacc/impl/psb_d_oacc_hll_mv_from_fmt.F90 | 24 ++ openacc/impl/psb_d_oacc_hll_reallocate_nz.F90 | 29 ++ openacc/impl/psb_d_oacc_hll_scal.F90 | 59 +++ openacc/impl/psb_d_oacc_hll_scals.F90 | 39 ++ openacc/impl/psb_d_oacc_hll_vect_mv.F90 | 67 ++++ openacc/psb_d_oacc_hll_mat_mod.F90 | 352 ++++++++++++++++++ 16 files changed, 1063 insertions(+), 3 deletions(-) create mode 100644 openacc/impl/psb_d_oacc_hll_allocate_mnnz.F90 create mode 100644 openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 create mode 100644 openacc/impl/psb_d_oacc_hll_cp_from_fmt.F90 create mode 100644 openacc/impl/psb_d_oacc_hll_csmm.F90 create mode 100644 openacc/impl/psb_d_oacc_hll_csmv.F90 create mode 100644 openacc/impl/psb_d_oacc_hll_inner_vect_sv.F90 create mode 100644 openacc/impl/psb_d_oacc_hll_mold.F90 create mode 100644 openacc/impl/psb_d_oacc_hll_mv_from_coo.F90 create mode 100644 openacc/impl/psb_d_oacc_hll_mv_from_fmt.F90 create mode 100644 openacc/impl/psb_d_oacc_hll_reallocate_nz.F90 create mode 100644 openacc/impl/psb_d_oacc_hll_scal.F90 create mode 100644 openacc/impl/psb_d_oacc_hll_scals.F90 create mode 100644 openacc/impl/psb_d_oacc_hll_vect_mv.F90 create mode 100644 openacc/psb_d_oacc_hll_mat_mod.F90 diff --git a/openacc/Makefile b/openacc/Makefile index cba6b1f7..e7e99072 100644 --- a/openacc/Makefile +++ b/openacc/Makefile @@ -22,7 +22,7 @@ FOBJS= psb_i_oacc_vect_mod.o \ psb_d_oacc_vect_mod.o psb_d_oacc_csr_mat_mod.o \ psb_c_oacc_vect_mod.o psb_c_oacc_csr_mat_mod.o \ psb_z_oacc_vect_mod.o psb_z_oacc_csr_mat_mod.o \ - psb_d_oacc_ell_mat_mod.o \ + psb_d_oacc_ell_mat_mod.o psb_d_oacc_hll_mat_mod.o\ psb_oacc_mod.o psb_oacc_env_mod.o @@ -49,7 +49,7 @@ psb_oacc_mod.o : psb_i_oacc_vect_mod.o \ psb_d_oacc_vect_mod.o psb_d_oacc_csr_mat_mod.o \ psb_c_oacc_vect_mod.o psb_c_oacc_csr_mat_mod.o \ psb_z_oacc_vect_mod.o psb_z_oacc_csr_mat_mod.o \ - psb_d_oacc_ell_mat_mod.o \ + psb_d_oacc_ell_mat_mod.o psb_d_oacc_hll_mat_mod.o \ psb_oacc_env_mod.o psb_s_oacc_vect_mod.o psb_d_oacc_vect_mod.o \ @@ -61,10 +61,11 @@ psb_d_oacc_csr_mat_mod.o: psb_d_oacc_vect_mod.o psb_c_oacc_csr_mat_mod.o: psb_c_oacc_vect_mod.o psb_z_oacc_csr_mat_mod.o: psb_z_oacc_vect_mod.o psb_d_oacc_ell_mat_mod.o: psb_d_oacc_vect_mod.o +psb_d_oacc_hll_mat_mod.o: psb_d_oacc_vect_mod.o clean: cclean iclean - /bin/rm -f $(FOBJS) *$(.mod) *.a + /bin/rm -f $(FOBJS) *$(.mod) *.a *.smod veryclean: clean cclean: /bin/rm -f $(COBJS) diff --git a/openacc/impl/Makefile b/openacc/impl/Makefile index a38c703d..c0a157e8 100755 --- a/openacc/impl/Makefile +++ b/openacc/impl/Makefile @@ -84,6 +84,19 @@ psb_d_oacc_ell_cp_from_fmt.o \ psb_d_oacc_ell_mv_from_coo.o \ psb_d_oacc_ell_mv_from_fmt.o \ psb_d_oacc_ell_mold.o \ +psb_d_oacc_hll_mold.o \ +psb_d_oacc_hll_mv_from_fmt.o \ +psb_d_oacc_hll_mv_from_coo.o \ +psb_d_oacc_hll_cp_from_fmt.o \ +psb_d_oacc_hll_cp_from_coo.o \ +psb_d_oacc_hll_allocate_mnnz.o \ +psb_d_oacc_hll_reallocate_nz.o \ +psb_d_oacc_hll_scal.o \ +psb_d_oacc_hll_scals.o \ +psb_d_oacc_hll_csmv.o \ +psb_d_oacc_hll_csmm.o \ +psb_d_oacc_hll_inner_vect_sv.o \ +psb_d_oacc_hll_vect_mv.o \ objs: $(OBJS) diff --git a/openacc/impl/psb_d_oacc_hll_allocate_mnnz.F90 b/openacc/impl/psb_d_oacc_hll_allocate_mnnz.F90 new file mode 100644 index 00000000..909ee90b --- /dev/null +++ b/openacc/impl/psb_d_oacc_hll_allocate_mnnz.F90 @@ -0,0 +1,53 @@ +submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_allocate_mnnz_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_hll_allocate_mnnz(m, n, a, nz) + implicit none + integer(psb_ipk_), intent(in) :: m, n + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act, nz_ + character(len=20) :: name='allocate_mnnz' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: hksz, nhacks + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(nz)) then + nz_ = nz + else + nz_ = 10 + end if + + call a%psb_d_hll_sparse_mat%allocate(m, n, nz_) + + hksz = a%hksz + nhacks = (m + hksz - 1) / hksz + + if (.not.allocated(a%val)) then + allocate(a%val(nz_ * m)) + allocate(a%ja(nz_ * m)) + allocate(a%irn(m)) + allocate(a%idiag(m)) + allocate(a%hkoffs(nhacks)) + end if + + a%val = 0.0_psb_dpk_ + a%ja = -1 + a%irn = 0 + a%idiag = 0 + a%hkoffs = 0 + + call a%set_dev() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_hll_allocate_mnnz +end submodule psb_d_oacc_hll_allocate_mnnz_impl diff --git a/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 b/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 new file mode 100644 index 00000000..fbe793d5 --- /dev/null +++ b/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 @@ -0,0 +1,85 @@ +submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_cp_from_coo_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_hll_cp_from_coo(a, b, info) + implicit none + + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, j, k, row, col, nz_per_row + real(psb_dpk_) :: value + integer(psb_ipk_), allocatable :: row_counts(:) + integer(psb_ipk_) :: hacksize, nza + + info = psb_success_ + hacksize = 32 ! Assuming a default hack size of 32 + + call a%set_nrows(b%get_nrows()) + call a%set_ncols(b%get_ncols()) + nz_per_row = a%nzt + + if (.not.allocated(a%val)) then + allocate(a%val(nz_per_row * a%get_nrows())) + allocate(a%ja(nz_per_row * a%get_nrows())) + allocate(a%irn(a%get_nrows())) + allocate(a%idiag(a%get_nrows())) + allocate(a%hkoffs((a%get_nrows() + hacksize - 1) / hacksize)) + end if + a%val = 0.0_psb_dpk_ + a%ja = -1 + a%irn = 0 + a%idiag = 0 + + allocate(row_counts(a%get_nrows())) + row_counts = 0 + + nza = b%get_nzeros() + + !$acc parallel loop present(b, a, row_counts) + do k = 1, nza + row = b%ia(k) + col = b%ja(k) + value = b%val(k) + if (row_counts(row) < nz_per_row) then + a%val(row_counts(row) + 1 + (row - 1) * nz_per_row) = value + a%ja(row_counts(row) + 1 + (row - 1) * nz_per_row) = col + row_counts(row) = row_counts(row) + 1 + else + info = psb_err_invalid_mat_state_ + goto 9999 + end if + end do + + a%irn = row_counts + + !$acc parallel loop present(a) + do i = 1, a%get_nrows() + do j = 1, nz_per_row + if (a%ja(j + (i - 1) * nz_per_row) == i) then + a%idiag(i) = j + exit + end if + end do + end do + + ! Calculate hkoffs for HLL format + !$acc parallel loop present(a) + do i = 1, size(a%hkoffs) + a%hkoffs(i) = (i - 1) * hacksize + end do + + deallocate(row_counts) + + call a%set_dev() + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_d_oacc_hll_cp_from_coo +end submodule psb_d_oacc_hll_cp_from_coo_impl diff --git a/openacc/impl/psb_d_oacc_hll_cp_from_fmt.F90 b/openacc/impl/psb_d_oacc_hll_cp_from_fmt.F90 new file mode 100644 index 00000000..fb99737c --- /dev/null +++ b/openacc/impl/psb_d_oacc_hll_cp_from_fmt.F90 @@ -0,0 +1,24 @@ +submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_cp_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_hll_cp_from_fmt(a, b, info) + implicit none + + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_d_coo_sparse_mat) + call a%cp_from_coo(b, info) + class default + call a%psb_d_hll_sparse_mat%cp_from_fmt(b, info) + if (info /= 0) return + + !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + end select + + end subroutine psb_d_oacc_hll_cp_from_fmt +end submodule psb_d_oacc_hll_cp_from_fmt_impl diff --git a/openacc/impl/psb_d_oacc_hll_csmm.F90 b/openacc/impl/psb_d_oacc_hll_csmm.F90 new file mode 100644 index 00000000..2dd6b53b --- /dev/null +++ b/openacc/impl/psb_d_oacc_hll_csmm.F90 @@ -0,0 +1,86 @@ +submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_csmm_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_hll_csmm(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + real(psb_dpk_), intent(in) :: x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i, j, m, n, k, nxy, nhacks + logical :: tra + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'd_oacc_hll_csmm' + logical, parameter :: debug = .false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) < n) then + info = 36 + call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) + goto 9999 + end if + + if (size(y,1) < m) then + info = 36 + call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) + goto 9999 + end if + + if (tra) then + call a%psb_d_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans) + else + nxy = min(size(x,2), size(y,2)) + nhacks = (a%get_nrows() + a%hksz - 1) / a%hksz + + !$acc parallel loop collapse(2) present(a, x, y) + do j = 1, nxy + do i = 1, m + y(i,j) = beta * y(i,j) + end do + end do + + !$acc parallel loop present(a, x, y) + do j = 1, nxy + do k = 1, nhacks + do i = a%hkoffs(k), a%hkoffs(k + 1) - 1 + y(a%irn(i), j) = y(a%irn(i), j) + alpha * a%val(i) * x(a%ja(i), j) + end do + end do + end do + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_hll_csmm +end submodule psb_d_oacc_hll_csmm_impl diff --git a/openacc/impl/psb_d_oacc_hll_csmv.F90 b/openacc/impl/psb_d_oacc_hll_csmv.F90 new file mode 100644 index 00000000..504cad19 --- /dev/null +++ b/openacc/impl/psb_d_oacc_hll_csmv.F90 @@ -0,0 +1,83 @@ +submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_csmv_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_hll_csmv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + real(psb_dpk_), intent(in) :: x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i, j, m, n, hksz, nhacks + logical :: tra + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'd_oacc_hll_csmv' + logical, parameter :: debug = .false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) < n) then + info = 36 + call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) + goto 9999 + end if + + if (size(y,1) < m) then + info = 36 + call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) + goto 9999 + end if + + if (tra) then + call a%psb_d_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans) + else + hksz = a%hksz + nhacks = (a%get_nrows() + hksz - 1) / hksz + + !$acc parallel loop present(a, x, y) + do i = 1, m + y(i) = beta * y(i) + end do + + !$acc parallel loop collapse(2) present(a, x, y) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + y(a%irn(j)) = y(a%irn(j)) + alpha * a%val(j) * x(a%ja(j)) + end do + end do + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_hll_csmv +end submodule psb_d_oacc_hll_csmv_impl diff --git a/openacc/impl/psb_d_oacc_hll_inner_vect_sv.F90 b/openacc/impl/psb_d_oacc_hll_inner_vect_sv.F90 new file mode 100644 index 00000000..ae1c3c94 --- /dev/null +++ b/openacc/impl/psb_d_oacc_hll_inner_vect_sv.F90 @@ -0,0 +1,86 @@ +submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_inner_vect_sv_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + real(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'd_oacc_hll_inner_vect_sv' + logical, parameter :: debug = .false. + integer(psb_ipk_) :: i, j, nhacks, hksz + + call psb_get_erraction(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra .or. (beta /= dzero)) then + call x%sync() + call y%sync() + call a%psb_d_hll_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans) + call y%set_host() + else + select type (xx => x) + type is (psb_d_vect_oacc) + select type(yy => y) + type is (psb_d_vect_oacc) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + nhacks = size(a%hkoffs) - 1 + hksz = a%hksz + !$acc parallel loop present(a, xx, yy) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i+1) - 1 + yy%v(a%irn(j)) = alpha * a%val(j) * xx%v(a%ja(j)) + beta * yy%v(a%irn(j)) + end do + end do + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_d_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_d_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + endif + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info, name, a_err = 'hll_vect_sv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + end subroutine psb_d_oacc_hll_inner_vect_sv +end submodule psb_d_oacc_hll_inner_vect_sv_impl diff --git a/openacc/impl/psb_d_oacc_hll_mold.F90 b/openacc/impl/psb_d_oacc_hll_mold.F90 new file mode 100644 index 00000000..89ead65b --- /dev/null +++ b/openacc/impl/psb_d_oacc_hll_mold.F90 @@ -0,0 +1,34 @@ +submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_mold_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_hll_mold(a, b, info) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'hll_mold' + logical, parameter :: debug = .false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b, stat=info) + end if + if (info == 0) allocate(psb_d_oacc_hll_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + + end subroutine psb_d_oacc_hll_mold +end submodule psb_d_oacc_hll_mold_impl diff --git a/openacc/impl/psb_d_oacc_hll_mv_from_coo.F90 b/openacc/impl/psb_d_oacc_hll_mv_from_coo.F90 new file mode 100644 index 00000000..7bf22c13 --- /dev/null +++ b/openacc/impl/psb_d_oacc_hll_mv_from_coo.F90 @@ -0,0 +1,25 @@ +submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_mv_from_coo_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_hll_mv_from_coo(a, b, info) + implicit none + + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_d_hll_sparse_mat%mv_from_coo(b, info) + if (info /= 0) goto 9999 + + !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_d_oacc_hll_mv_from_coo +end submodule psb_d_oacc_hll_mv_from_coo_impl diff --git a/openacc/impl/psb_d_oacc_hll_mv_from_fmt.F90 b/openacc/impl/psb_d_oacc_hll_mv_from_fmt.F90 new file mode 100644 index 00000000..e6615365 --- /dev/null +++ b/openacc/impl/psb_d_oacc_hll_mv_from_fmt.F90 @@ -0,0 +1,24 @@ +submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_mv_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_hll_mv_from_fmt(a, b, info) + implicit none + + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_d_coo_sparse_mat) + call a%mv_from_coo(b, info) + class default + call a%psb_d_hll_sparse_mat%mv_from_fmt(b, info) + if (info /= 0) return + + !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + end select + + end subroutine psb_d_oacc_hll_mv_from_fmt +end submodule psb_d_oacc_hll_mv_from_fmt_impl diff --git a/openacc/impl/psb_d_oacc_hll_reallocate_nz.F90 b/openacc/impl/psb_d_oacc_hll_reallocate_nz.F90 new file mode 100644 index 00000000..412409d1 --- /dev/null +++ b/openacc/impl/psb_d_oacc_hll_reallocate_nz.F90 @@ -0,0 +1,29 @@ +submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_reallocate_nz_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_hll_reallocate_nz(nz, a) + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_oacc_hll_reallocate_nz' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: hksz, nhacks + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_d_hll_sparse_mat%reallocate(nz) + + call a%set_dev() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_hll_reallocate_nz +end submodule psb_d_oacc_hll_reallocate_nz_impl diff --git a/openacc/impl/psb_d_oacc_hll_scal.F90 b/openacc/impl/psb_d_oacc_hll_scal.F90 new file mode 100644 index 00000000..50210f4b --- /dev/null +++ b/openacc/impl/psb_d_oacc_hll_scal.F90 @@ -0,0 +1,59 @@ +submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_scal_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_hll_scal(d, a, info, side) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'scal' + integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + hksz = a%hksz + nhacks = (a%get_nrows() + hksz - 1) / hksz + nzt = a%nzt + + if (present(side)) then + if (side == 'L') then + !$acc parallel loop collapse(2) present(a, d) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + k = (j - a%hkoffs(i)) / nzt + (i - 1) * hksz + 1 + a%val(j) = a%val(j) * d(k) + end do + end do + else if (side == 'R') then + !$acc parallel loop collapse(2) present(a, d) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + a%val(j) = a%val(j) * d(a%ja(j)) + end do + end do + end if + else + !$acc parallel loop collapse(2) present(a, d) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + a%val(j) = a%val(j) * d(j - a%hkoffs(i) + 1) + end do + end do + end if + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_hll_scal +end submodule psb_d_oacc_hll_scal_impl diff --git a/openacc/impl/psb_d_oacc_hll_scals.F90 b/openacc/impl/psb_d_oacc_hll_scals.F90 new file mode 100644 index 00000000..ccb6b1b8 --- /dev/null +++ b/openacc/impl/psb_d_oacc_hll_scals.F90 @@ -0,0 +1,39 @@ +submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_scals_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_hll_scals(d, a, info) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'scal' + integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + hksz = a%hksz + nhacks = (a%get_nrows() + hksz - 1) / hksz + nzt = a%nzt + + !$acc parallel loop collapse(2) present(a) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + a%val(j) = a%val(j) * d + end do + end do + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_hll_scals +end submodule psb_d_oacc_hll_scals_impl diff --git a/openacc/impl/psb_d_oacc_hll_vect_mv.F90 b/openacc/impl/psb_d_oacc_hll_vect_mv.F90 new file mode 100644 index 00000000..875b646f --- /dev/null +++ b/openacc/impl/psb_d_oacc_hll_vect_mv.F90 @@ -0,0 +1,67 @@ +submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_vect_mv_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans) + implicit none + + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: m, n, nhacks, hksz + + info = psb_success_ + m = a%get_nrows() + n = a%get_ncols() + nhacks = size(a%hkoffs) - 1 + hksz = a%hksz + + if ((n /= size(x%v)) .or. (m /= size(y%v))) then + write(0,*) 'Size error ', m, n, size(x%v), size(y%v) + info = psb_err_invalid_mat_state_ + return + end if + + if (a%is_host()) call a%sync() + if (x%is_host()) call x%sync() + if (y%is_host()) call y%sync() + + call inner_spmv(m, nhacks, hksz, alpha, a%val, a%ja, a%hkoffs, x%v, beta, y%v, info) + call y%set_dev() + + contains + + subroutine inner_spmv(m, nhacks, hksz, alpha, val, ja, hkoffs, x, beta, y, info) + implicit none + integer(psb_ipk_) :: m, nhacks, hksz + real(psb_dpk_), intent(in) :: alpha, beta + real(psb_dpk_) :: val(:), x(:), y(:) + integer(psb_ipk_) :: ja(:), hkoffs(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, j, idx, k + real(psb_dpk_) :: tmp + + info = 0 + + !$acc parallel loop present(val, ja, hkoffs, x, y) + do i = 1, nhacks + do k = 0, hksz - 1 + idx = hkoffs(i) + k + if (idx <= hkoffs(i + 1) - 1) then + tmp = 0.0_psb_dpk_ + !$acc loop seq + do j = hkoffs(i) + k, hkoffs(i + 1) - 1, hksz + if (ja(j) > 0) then + tmp = tmp + val(j) * x(ja(j)) + end if + end do + y(k + 1 + (i - 1) * hksz) = alpha * tmp + beta * y(k + 1 + (i - 1) * hksz) + end if + end do + end do + end subroutine inner_spmv + + end subroutine psb_d_oacc_hll_vect_mv +end submodule psb_d_oacc_hll_vect_mv_impl diff --git a/openacc/psb_d_oacc_hll_mat_mod.F90 b/openacc/psb_d_oacc_hll_mat_mod.F90 new file mode 100644 index 00000000..530af94a --- /dev/null +++ b/openacc/psb_d_oacc_hll_mat_mod.F90 @@ -0,0 +1,352 @@ +module psb_d_oacc_hll_mat_mod + use iso_c_binding + use psb_d_mat_mod + use psb_d_hll_mat_mod + use psb_d_oacc_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_d_hll_sparse_mat) :: psb_d_oacc_hll_sparse_mat + integer(psb_ipk_) :: devstate = is_host + contains + procedure, nopass :: get_fmt => d_oacc_hll_get_fmt + procedure, pass(a) :: sizeof => d_oacc_hll_sizeof + procedure, pass(a) :: is_host => d_oacc_hll_is_host + procedure, pass(a) :: is_sync => d_oacc_hll_is_sync + procedure, pass(a) :: is_dev => d_oacc_hll_is_dev + procedure, pass(a) :: set_host => d_oacc_hll_set_host + procedure, pass(a) :: set_sync => d_oacc_hll_set_sync + procedure, pass(a) :: set_dev => d_oacc_hll_set_dev + procedure, pass(a) :: sync_space => d_oacc_hll_sync_space + procedure, pass(a) :: sync => d_oacc_hll_sync + procedure, pass(a) :: free => d_oacc_hll_free + procedure, pass(a) :: vect_mv => psb_d_oacc_hll_vect_mv + procedure, pass(a) :: in_vect_sv => psb_d_oacc_hll_inner_vect_sv + procedure, pass(a) :: csmm => psb_d_oacc_hll_csmm + procedure, pass(a) :: csmv => psb_d_oacc_hll_csmv + procedure, pass(a) :: scals => psb_d_oacc_hll_scals + procedure, pass(a) :: scalv => psb_d_oacc_hll_scal + procedure, pass(a) :: reallocate_nz => psb_d_oacc_hll_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_oacc_hll_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_d_oacc_hll_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_oacc_hll_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_oacc_hll_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_oacc_hll_mv_from_fmt + procedure, pass(a) :: mold => psb_d_oacc_hll_mold + + end type psb_d_oacc_hll_sparse_mat + + interface + module subroutine psb_d_oacc_hll_mold(a,b,info) + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_hll_mold + end interface + + interface + module subroutine psb_d_oacc_hll_cp_from_fmt(a,b,info) + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_hll_cp_from_fmt + end interface + + interface + module subroutine psb_d_oacc_hll_mv_from_coo(a,b,info) + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_hll_mv_from_coo + end interface + + interface + module subroutine psb_d_oacc_hll_mv_from_fmt(a,b,info) + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_hll_mv_from_fmt + end interface + + interface + module subroutine psb_d_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_hll_vect_mv + end interface + + interface + module subroutine psb_d_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x,y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_hll_inner_vect_sv + end interface + + interface + module subroutine psb_d_oacc_hll_csmm(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_hll_csmm + end interface + + interface + module subroutine psb_d_oacc_hll_csmv(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_hll_csmv + end interface + + interface + module subroutine psb_d_oacc_hll_scals(d, a, info) + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_hll_scals + end interface + + interface + module subroutine psb_d_oacc_hll_scal(d,a,info,side) + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: side + end subroutine psb_d_oacc_hll_scal + end interface + + interface + module subroutine psb_d_oacc_hll_reallocate_nz(nz,a) + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + end subroutine psb_d_oacc_hll_reallocate_nz + end interface + + interface + module subroutine psb_d_oacc_hll_allocate_mnnz(m,n,a,nz) + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: m,n + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_oacc_hll_allocate_mnnz + end interface + + interface + module subroutine psb_d_oacc_hll_cp_from_coo(a,b,info) + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_hll_cp_from_coo + end interface + + contains + + subroutine d_oacc_hll_free(a) + use psb_base_mod + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + if (allocated(a%val)) then + !$acc exit data delete(a%val) + end if + if (allocated(a%ja)) then + !$acc exit data delete(a%ja) + end if + if (allocated(a%irn)) then + !$acc exit data delete(a%irn) + end if + if (allocated(a%idiag)) then + !$acc exit data delete(a%idiag) + end if + if (allocated(a%hkoffs)) then + !$acc exit data delete(a%hkoffs) + end if + + call a%psb_d_hll_sparse_mat%free() + + return + end subroutine d_oacc_hll_free + + function d_oacc_hll_sizeof(a) result(res) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%ja) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%hkoffs) + end function d_oacc_hll_sizeof + + + + function d_oacc_hll_is_host(a) result(res) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function d_oacc_hll_is_host + + function d_oacc_hll_is_sync(a) result(res) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function d_oacc_hll_is_sync + + function d_oacc_hll_is_dev(a) result(res) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function d_oacc_hll_is_dev + + subroutine d_oacc_hll_set_host(a) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine d_oacc_hll_set_host + + subroutine d_oacc_hll_set_sync(a) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine d_oacc_hll_set_sync + + subroutine d_oacc_hll_set_dev(a) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine d_oacc_hll_set_dev + + function d_oacc_hll_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLL_oacc' + end function d_oacc_hll_get_fmt + + subroutine d_oacc_hll_sync_space(a) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + + if (allocated(a%val)) then + call d_oacc_create_dev(a%val) + end if + if (allocated(a%ja)) then + call i_oacc_create_dev(a%ja) + end if + if (allocated(a%irn)) then + call i_oacc_create_dev_scalar(a%irn) + end if + if (allocated(a%idiag)) then + call i_oacc_create_dev_scalar(a%idiag) + end if + if (allocated(a%hkoffs)) then + call i_oacc_create_dev_scalar(a%hkoffs) + end if + + contains + subroutine d_oacc_create_dev(v) + implicit none + real(psb_dpk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine d_oacc_create_dev + + subroutine i_oacc_create_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev + + subroutine i_oacc_create_dev_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev_scalar + + end subroutine d_oacc_hll_sync_space + + + subroutine d_oacc_hll_sync(a) + implicit none + class(psb_d_oacc_hll_sparse_mat), target, intent(in) :: a + class(psb_d_oacc_hll_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (a%is_dev()) then + call d_oacc_hll_to_host(a%val) + call i_oacc_hll_to_host(a%ja) + call i_oacc_hll_to_host_scalar(a%irn) + call i_oacc_hll_to_host_scalar(a%idiag) + call i_oacc_hll_to_host_scalar(a%hkoffs) + else if (a%is_host()) then + call d_oacc_hll_to_dev(a%val) + call i_oacc_hll_to_dev(a%ja) + call i_oacc_hll_to_dev_scalar(a%irn) + call i_oacc_hll_to_dev_scalar(a%idiag) + call i_oacc_hll_to_dev_scalar(a%hkoffs) + end if + call tmpa%set_sync() + end subroutine d_oacc_hll_sync + + subroutine d_oacc_hll_to_host(v) + implicit none + real(psb_dpk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine d_oacc_hll_to_host + + subroutine d_oacc_hll_to_dev(v) + implicit none + real(psb_dpk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine d_oacc_hll_to_dev + + subroutine i_oacc_hll_to_host(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine i_oacc_hll_to_host + + subroutine i_oacc_hll_to_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine i_oacc_hll_to_dev + + subroutine i_oacc_hll_to_host_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine i_oacc_hll_to_host_scalar + + subroutine i_oacc_hll_to_dev_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine i_oacc_hll_to_dev_scalar + + +end module psb_d_oacc_hll_mat_mod \ No newline at end of file From 55d1067ec2bed450305073a174b4364ce590129e Mon Sep 17 00:00:00 2001 From: tloloum Date: Tue, 6 Aug 2024 11:42:52 +0200 Subject: [PATCH 21/39] collapse loop --- openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 | 2 +- openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 | 2 +- openacc/impl/psb_d_oacc_hll_csmv.F90 | 5 ++++- openacc/impl/psb_d_oacc_hll_scal.F90 | 9 ++++++--- openacc/impl/psb_d_oacc_hll_scals.F90 | 3 ++- 5 files changed, 14 insertions(+), 7 deletions(-) diff --git a/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 b/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 index 560d3451..4e8402e7 100644 --- a/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 +++ b/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 @@ -47,7 +47,7 @@ contains row_counts(row) = row_counts(row) + 1 else info = psb_err_invalid_mat_state_ - goto 9999 + !goto 9999 end if end do diff --git a/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 b/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 index fbe793d5..4a258c74 100644 --- a/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 +++ b/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 @@ -48,7 +48,7 @@ contains row_counts(row) = row_counts(row) + 1 else info = psb_err_invalid_mat_state_ - goto 9999 + !goto 9999 end if end do diff --git a/openacc/impl/psb_d_oacc_hll_csmv.F90 b/openacc/impl/psb_d_oacc_hll_csmv.F90 index 504cad19..bccd3d6d 100644 --- a/openacc/impl/psb_d_oacc_hll_csmv.F90 +++ b/openacc/impl/psb_d_oacc_hll_csmv.F90 @@ -64,8 +64,11 @@ contains do i = 1, m y(i) = beta * y(i) end do - +#if (__GNUC__ <= 13) + !$acc parallel loop present(a, x, y) +#else !$acc parallel loop collapse(2) present(a, x, y) +#endif do i = 1, nhacks do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 y(a%irn(j)) = y(a%irn(j)) + alpha * a%val(j) * x(a%ja(j)) diff --git a/openacc/impl/psb_d_oacc_hll_scal.F90 b/openacc/impl/psb_d_oacc_hll_scal.F90 index 50210f4b..c0c284ef 100644 --- a/openacc/impl/psb_d_oacc_hll_scal.F90 +++ b/openacc/impl/psb_d_oacc_hll_scal.F90 @@ -23,7 +23,8 @@ contains if (present(side)) then if (side == 'L') then - !$acc parallel loop collapse(2) present(a, d) + ! $ a parallel loop collapse(2) present(a, d) + !$acc parallel loop present(a, d) do i = 1, nhacks do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 k = (j - a%hkoffs(i)) / nzt + (i - 1) * hksz + 1 @@ -31,7 +32,8 @@ contains end do end do else if (side == 'R') then - !$acc parallel loop collapse(2) present(a, d) + ! $ a parallel loop collapse(2) present(a, d) + !$acc parallel loop present(a, d) do i = 1, nhacks do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 a%val(j) = a%val(j) * d(a%ja(j)) @@ -39,7 +41,8 @@ contains end do end if else - !$acc parallel loop collapse(2) present(a, d) + ! $ a parallel loop collapse(2) present(a, d) + !$acc parallel loop present(a, d) do i = 1, nhacks do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 a%val(j) = a%val(j) * d(j - a%hkoffs(i) + 1) diff --git a/openacc/impl/psb_d_oacc_hll_scals.F90 b/openacc/impl/psb_d_oacc_hll_scals.F90 index ccb6b1b8..1e3457b5 100644 --- a/openacc/impl/psb_d_oacc_hll_scals.F90 +++ b/openacc/impl/psb_d_oacc_hll_scals.F90 @@ -20,7 +20,8 @@ contains nhacks = (a%get_nrows() + hksz - 1) / hksz nzt = a%nzt - !$acc parallel loop collapse(2) present(a) + ! $ a parallel loop collapse(2) present(a) + !$acc parallel loop present(a) do i = 1, nhacks do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 a%val(j) = a%val(j) * d From a28d3b048b38248729dd9d59388553138cb71fb3 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 7 Aug 2024 10:30:11 +0200 Subject: [PATCH 22/39] Fix configure for OpenACC (added warning message) --- config/pac.m4 | 50 -------------------------------------------------- configure | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++- configure.ac | 12 +++++++++++- 3 files changed, 61 insertions(+), 52 deletions(-) diff --git a/config/pac.m4 b/config/pac.m4 index c23de4fb..83356540 100644 --- a/config/pac.m4 +++ b/config/pac.m4 @@ -2234,56 +2234,6 @@ AC_HELP_STRING([--with-cudacc], [A comma-separated list of CCs to compile to, fo [pac_cv_cudacc='']) ]) -AC_DEFUN(PAC_ARG_WITH_LIBRSB, - [SAVE_LIBS="$LIBS" - SAVE_CPPFLAGS="$CPPFLAGS" - - AC_ARG_WITH(librsb, - AC_HELP_STRING([--with-librsb], [The directory for LIBRSB, for example, - --with-librsb=/opt/packages/librsb]), - [pac_cv_librsb_dir=$withval], - [pac_cv_librsb_dir='']) - - if test "x$pac_cv_librsb_dir" != "x"; then - LIBS="-L$pac_cv_librsb_dir $LIBS" - RSB_INCLUDES="-I$pac_cv_librsb_dir" - # CPPFLAGS="$GPU_INCLUDES $CUDA_INCLUDES $CPPFLAGS" - RSB_LIBDIR="-L$pac_cv_librsb_dir" - fi - #AC_MSG_CHECKING([librsb dir $pac_cv_librsb_dir]) - AC_CHECK_HEADER([$pac_cv_librsb_dir/rsb.h], - [pac_rsb_header_ok=yes], - [pac_rsb_header_ok=no; RSB_INCLUDES=""]) - - if test "x$pac_rsb_header_ok" == "xyes" ; then - RSB_LIBS="-lrsb $RSB_LIBDIR" - # LIBS="$GPU_LIBS $CUDA_LIBS -lm $LIBS"; - # AC_MSG_CHECKING([for spgpuCreate in $GPU_LIBS]) - # AC_TRY_LINK_FUNC(spgpuCreate, - # [pac_cv_have_spgpu=yes;pac_gpu_lib_ok=yes; ], - # [pac_cv_have_spgpu=no;pac_gpu_lib_ok=no; GPU_LIBS=""]) - # AC_MSG_RESULT($pac_gpu_lib_ok) - # if test "x$pac_cv_have_spgpu" == "xyes" ; then - # AC_MSG_NOTICE([Have found SPGPU]) - RSBLIBNAME="librsb.a"; - LIBRSB_DIR="$pac_cv_librsb_dir"; - # SPGPU_DEFINES="-DHAVE_SPGPU"; - LIBRSB_INCDIR="$LIBRSB_DIR"; - LIBRSB_INCLUDES="-I$LIBRSB_INCDIR"; - LIBRSB_LIBS="-lrsb -L$LIBRSB_DIR"; - # CUDA_DIR="$pac_cv_cuda_dir"; - LIBRSB_DEFINES="-DHAVE_RSB"; - LRSB=-lpsb_rsb - # CUDA_INCLUDES="-I$pac_cv_cuda_dir/include" - # CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib" - FDEFINES="$LIBRSB_DEFINES $psblas_cv_define_prepend $FDEFINES"; - CDEFINES="$LIBRSB_DEFINES $CDEFINES";#CDEFINES="-DHAVE_SPGPU -DHAVE_CUDA $CDEFINES"; - fi -# fi -LIBS="$SAVE_LIBS" -CPPFLAGS="$SAVE_CPPFLAGS" -]) -dnl dnl @synopsis PAC_CHECK_CUDA_VERSION dnl diff --git a/configure b/configure index 1c3c80c6..04408e2a 100755 --- a/configure +++ b/configure @@ -653,6 +653,12 @@ ac_subst_vars='am__EXEEXT_FALSE am__EXEEXT_TRUE LTLIBOBJS LIBOBJS +LRSB +LIBRSB_DEFINES +LIBRSB_DIR +LIBRSB_INCDIR +LIBRSB_INCLUDES +LIBRSB_LIBS LCUDA CUDALD CUDAD @@ -5841,7 +5847,7 @@ fi # we just gave the user the chance to append values to these variables -############################################################################### + if test -n "$ac_tool_prefix"; then @@ -10985,6 +10991,27 @@ fi #FLINK="$FLINK $FCOPENACC"; fi +############################################################################### +LIBRSB_DIR="$pac_cv_librsb_dir"; +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for LIBRSB install dir" >&5 +printf %s "checking for LIBRSB install dir... " >&6; } +case $LIBRSB_DIR in + /*) ;; + *) esac +pac_cv_status_file="$LIBRSB_DIR/librsb.a" +if test ! -f "$pac_cv_status_file" ; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + #AC_MSG_ERROR([Could not find an installation in $LIBRSB_DIR.]) +else + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $LIBRSB_DIR" >&5 +printf "%s\n" "$LIBRSB_DIR" >&6; } + RSBTARGETLIB=rsbd; + RSBTARGETOBJ=rsbobj; +fi + + + ############################################################################### @@ -11082,6 +11109,12 @@ FDEFINES=$(PSBFDEFINES) $(CUDA_DEFINES) + + + + + + @@ -12422,6 +12455,9 @@ fi CUDA : ${HAVE_CUDA} CUDA_CC : ${pac_cv_cudacc} + OPENACC : ${pac_cv_openacc} + FCOPENACC : ${FCOPENACC} + BLAS : ${BLAS_LIBS} METIS usable : ${psblas_cv_have_metis} @@ -12455,6 +12491,9 @@ printf "%s\n" "$as_me: CUDA : ${HAVE_CUDA} CUDA_CC : ${pac_cv_cudacc} + OPENACC : ${pac_cv_openacc} + FCOPENACC : ${FCOPENACC} + BLAS : ${BLAS_LIBS} METIS usable : ${psblas_cv_have_metis} @@ -12471,6 +12510,16 @@ printf "%s\n" "$as_me: If you are satisfied, run 'make' to build ${PACKAGE_NAME} and its documentation; otherwise type ./configure --help=short for a complete list of configure options specific to ${PACKAGE_NAME}. " >&6;} +if test x"${pac_cv_openacc}" == x"yes" ; then + if test x"${FCOPENACC}" == x ; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: + WARNING: OpenACC enabled, but no choice for FCOPENACC compile flag. + You may want to rerun configure with --with-fcopenacc= " >&5 +printf "%s\n" "$as_me: + WARNING: OpenACC enabled, but no choice for FCOPENACC compile flag. + You may want to rerun configure with --with-fcopenacc= " >&6;} + fi +fi ############################################################################### diff --git a/configure.ac b/configure.ac index c5a19006..47b41406 100755 --- a/configure.ac +++ b/configure.ac @@ -860,7 +860,7 @@ if test x"$pac_cv_openacc" == x"yes" ; then fi ############################################################################### -PAC_ARG_WITH_LIBRSB +dnl PAC_ARG_WITH_LIBRSB() LIBRSB_DIR="$pac_cv_librsb_dir"; AC_MSG_CHECKING([for LIBRSB install dir]) case $LIBRSB_DIR in @@ -1023,6 +1023,9 @@ AC_MSG_NOTICE([ CUDA : ${HAVE_CUDA} CUDA_CC : ${pac_cv_cudacc} + OPENACC : ${pac_cv_openacc} + FCOPENACC : ${FCOPENACC} + BLAS : ${BLAS_LIBS} METIS usable : ${psblas_cv_have_metis} @@ -1042,6 +1045,13 @@ dnl Note : we should use LDLIBS sooner or later! dnl To install the program and its documentation, run 'make install' if you are root, dnl or run 'su -c "make install"' if you are not root. ]) +if test x"${pac_cv_openacc}" == x"yes" ; then + if test x"${FCOPENACC}" == x ; then + AC_MSG_NOTICE([ + WARNING: OpenACC enabled, but no choice for FCOPENACC compile flag. + You may want to rerun configure with --with-fcopenacc= ]) + fi +fi ############################################################################### From 464baceb139735f2e28289f5ecabfa6400762ebd Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 7 Aug 2024 10:54:19 +0200 Subject: [PATCH 23/39] HLL loop nest cannot run with collapse --- openacc/impl/psb_d_oacc_hll_csmv.F90 | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/openacc/impl/psb_d_oacc_hll_csmv.F90 b/openacc/impl/psb_d_oacc_hll_csmv.F90 index bccd3d6d..d38fca61 100644 --- a/openacc/impl/psb_d_oacc_hll_csmv.F90 +++ b/openacc/impl/psb_d_oacc_hll_csmv.F90 @@ -64,11 +64,9 @@ contains do i = 1, m y(i) = beta * y(i) end do -#if (__GNUC__ <= 13) + ! This loop nest cannot be run with collapse, since + ! the inner loop extent varies. !$acc parallel loop present(a, x, y) -#else - !$acc parallel loop collapse(2) present(a, x, y) -#endif do i = 1, nhacks do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 y(a%irn(j)) = y(a%irn(j)) + alpha * a%val(j) * x(a%ja(j)) From 03aaa090db31ada5478a0a8bd4af07f339bab657 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 7 Aug 2024 17:04:04 +0200 Subject: [PATCH 24/39] New AX_OPENACC macro and supporting flags --- config/ax_c_openacc.m4 | 104 +++++++++++ config/ax_cxx_openacc.m4 | 104 +++++++++++ config/ax_fc_openacc.m4 | 108 +++++++++++ configure | 286 +++++++++++++++++++++++++++++- configure.ac | 32 +++- test/openacc/Makefile | 11 +- test/openacc/psb_d_oacc_pde3d.F90 | 3 +- 7 files changed, 639 insertions(+), 9 deletions(-) create mode 100644 config/ax_c_openacc.m4 create mode 100644 config/ax_cxx_openacc.m4 create mode 100644 config/ax_fc_openacc.m4 diff --git a/config/ax_c_openacc.m4 b/config/ax_c_openacc.m4 new file mode 100644 index 00000000..52f645a9 --- /dev/null +++ b/config/ax_c_openacc.m4 @@ -0,0 +1,104 @@ +# AC_OPENACC +# --------- +# Check which options need to be passed to the C compiler to support Openacc. +# Set the OPENACC_CFLAGS / OPENACC_CXXFLAGS / OPENACC_FFLAGS variable to these +# options. +# The options are necessary at compile time (so the #pragmas are understood) +# and at link time (so the appropriate library is linked with). +# This macro takes care to not produce redundant options if $CC $CFLAGS already +# supports Openacc. +# +# For each candidate option, we do a compile test first, then a link test; +# if the compile test succeeds but the link test fails, that means we have +# found the correct option but it doesn't work because the libraries are +# broken. (This can happen, for instance, with SunPRO C and a bad combination +# of operating system patches.) +# +# Several of the options in our candidate list can be misinterpreted by +# compilers that don't use them to activate Openacc support; for example, +# many compilers understand "-openacc" to mean "write output to a file +# named 'penmp'" rather than "enable Openacc". We can't completely avoid +# the possibility of clobbering files named 'penmp' or 'mp' in configure's +# working directory; therefore, this macro will bomb out if any such file +# already exists when it's invoked. +AC_DEFUN([AX_C_OPENACC], +[AC_REQUIRE([_AX_OPENACC_SAFE_WD])]dnl +[AC_ARG_ENABLE([openacc], + [AS_HELP_STRING([--disable-openacc], [do not use Openacc])])]dnl +[ + OPENACC_[]_AC_LANG_PREFIX[]FLAGS= + if test "$enable_openacc" != no; then + AC_LANG_PUSH([C]) + AC_CACHE_CHECK([for $[]_AC_CC[] option to support Openacc], + [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc], + [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='not found' + dnl Try these flags: + dnl (on by default) '' + dnl GCC >= 4.2 -fopenacc + dnl SunPRO C -xopenacc + dnl Intel C -openacc + dnl SGI C, PGI C -mp + dnl Tru64 Compaq C -omp + dnl IBM XL C (AIX, Linux) -qsmp=omp + dnl Cray CCE -homp + dnl NEC SX -Popenacc + dnl Lahey Fortran (Linux) --openacc + for ac_option in '' -fopenacc -openacc -acc; do + + ac_save_[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS + _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $ac_option" + AC_COMPILE_IFELSE([ +#ifndef _OPENACC +#error "OpenACC not supported" +#endif +#include + int main (void) { acc_init (0); return 0;} +], + [AC_LINK_IFELSE([ +#ifndef _OPENACC +#error "OpenACC not supported" +#endif +#include + int main (void) { acc_init (0); return 0;} +], + [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc=$ac_option], + [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'])]) + _AC_LANG_PREFIX[]FLAGS=$ac_save_[]_AC_LANG_PREFIX[]FLAGS + + if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'not found'; then + break + fi + done + if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = 'not found'; then + ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported' + elif test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = ''; then + ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='none needed' + fi + dnl _AX_OPENACC_SAFE_WD checked that these files did not exist before we + dnl started probing for Openacc support, so if they exist now, they were + dnl created by the probe loop and it's safe to delete them. + rm -f penmp mp]) + if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'unsupported' && \ + test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'none needed'; then + OPENACC_[]_AC_LANG_PREFIX[]FLAGS="$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" + fi + AC_LANG_POP([C]) + fi +]) + +# _AC_OPENACC_SAFE_WD +# ------------------ +# AC_REQUIREd by AC_OPENACC. Checks both at autoconf time and at +# configure time for files that AC_OPENACC clobbers. +AC_DEFUN([_AX_OPENACC_SAFE_WD], +[m4_syscmd([test ! -e penmp && test ! -e mp])]dnl +[m4_if(sysval, [0], [], [m4_fatal(m4_normalize( + [AX_OPENACC clobbers files named 'mp' and 'penmp'. + To use AX_OPENACC you must not have either of these files + at the top level of your source tree.]))])]dnl +[if test -e penmp || test -e mp; then + AC_MSG_ERROR(m4_normalize( + [AX@&t@_OPENACC clobbers files named 'mp' and 'penmp'. + Aborting configure because one of these files already exists.])) +fi]) + diff --git a/config/ax_cxx_openacc.m4 b/config/ax_cxx_openacc.m4 new file mode 100644 index 00000000..5a2ad278 --- /dev/null +++ b/config/ax_cxx_openacc.m4 @@ -0,0 +1,104 @@ +# AC_OPENACC +# --------- +# Check which options need to be passed to the C compiler to support Openacc. +# Set the OPENACC_CFLAGS / OPENACC_CXXFLAGS / OPENACC_FFLAGS variable to these +# options. +# The options are necessary at compile time (so the #pragmas are understood) +# and at link time (so the appropriate library is linked with). +# This macro takes care to not produce redundant options if $CC $CFLAGS already +# supports Openacc. +# +# For each candidate option, we do a compile test first, then a link test; +# if the compile test succeeds but the link test fails, that means we have +# found the correct option but it doesn't work because the libraries are +# broken. (This can happen, for instance, with SunPRO C and a bad combination +# of operating system patches.) +# +# Several of the options in our candidate list can be misinterpreted by +# compilers that don't use them to activate Openacc support; for example, +# many compilers understand "-openacc" to mean "write output to a file +# named 'penmp'" rather than "enable Openacc". We can't completely avoid +# the possibility of clobbering files named 'penmp' or 'mp' in configure's +# working directory; therefore, this macro will bomb out if any such file +# already exists when it's invoked. +AC_DEFUN([AX_CXX_OPENACC], +[AC_REQUIRE([_AX_OPENACC_SAFE_WD])]dnl +[AC_ARG_ENABLE([openacc], + [AS_HELP_STRING([--disable-openacc], [do not use Openacc])])]dnl +[ + OPENACC_[]_AC_LANG_PREFIX[]FLAGS= + if test "$enable_openacc" != no; then + AC_LANG_PUSH([C++]) + AC_CACHE_CHECK([for $[]_AC_CC[] option to support Openacc], + [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc], + [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='not found' + dnl Try these flags: + dnl (on by default) '' + dnl GCC >= 4.2 -fopenacc + dnl SunPRO C -xopenacc + dnl Intel C -openacc + dnl SGI C, PGI C -mp + dnl Tru64 Compaq C -omp + dnl IBM XL C (AIX, Linux) -qsmp=omp + dnl Cray CCE -homp + dnl NEC SX -Popenacc + dnl Lahey Fortran (Linux) --openacc + for ac_option in '' -fopenacc -openacc -acc; do + + ac_save_[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS + _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $ac_option" + AC_COMPILE_IFELSE([ +#ifndef _OPENACC +#error "OpenACC not supported" +#endif +#include + int main (void) { acc_init (acc_get_device_type()); return 0;} +], + [AC_LINK_IFELSE([ +#ifndef _OPENACC +#error "OpenACC not supported" +#endif +#include + int main (void) { acc_init (acc_get_device_type()); return 0;} +], + [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc=$ac_option], + [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'])]) + _AC_LANG_PREFIX[]FLAGS=$ac_save_[]_AC_LANG_PREFIX[]FLAGS + + if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'not found'; then + break + fi + done + if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = 'not found'; then + ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported' + elif test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = ''; then + ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='none needed' + fi + dnl _AX_OPENACC_SAFE_WD checked that these files did not exist before we + dnl started probing for Openacc support, so if they exist now, they were + dnl created by the probe loop and it's safe to delete them. + rm -f penmp mp]) + if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'unsupported' && \ + test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'none needed'; then + OPENACC_[]_AC_LANG_PREFIX[]FLAGS="$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" + fi + AC_LANG_POP([C++]) + fi +]) + +dnl _AC_OPENACC_SAFE_WD +dnl ------------------ +dnl AC_REQUIREd by AC_OPENACC. Checks both at autoconf time and at +dnl configure time for files that AC_OPENACC clobbers. +dnl AC_DEFUN([_AX_OPENACC_SAFE_WD], +dnl [m4_syscmd([test ! -e penmp && test ! -e mp])]dnl +dnl [m4_if(sysval, [0], [], [m4_fatal(m4_normalize( +dnl [AX_OPENACC clobbers files named 'mp' and 'penmp'. +dnl To use AX_OPENACC you must not have either of these files +dnl at the top level of your source tree.]))])]dnl +dnl [if test -e penmp || test -e mp; then +dnl AC_MSG_ERROR(m4_normalize( +dnl [AX@&t@_OPENACC clobbers files named 'mp' and 'penmp'. +dnl Aborting configure because one of these files already exists.])) +dnl fi]) + diff --git a/config/ax_fc_openacc.m4 b/config/ax_fc_openacc.m4 new file mode 100644 index 00000000..59775b2b --- /dev/null +++ b/config/ax_fc_openacc.m4 @@ -0,0 +1,108 @@ +# AC_OPENACC +# --------- +# Check which options need to be passed to the C compiler to support Openacc. +# Set the OPENACC_CFLAGS / OPENACC_CXXFLAGS / OPENACC_FFLAGS variable to these +# options. +# The options are necessary at compile time (so the #pragmas are understood) +# and at link time (so the appropriate library is linked with). +# This macro takes care to not produce redundant options if $CC $CFLAGS already +# supports Openacc. +# +# For each candidate option, we do a compile test first, then a link test; +# if the compile test succeeds but the link test fails, that means we have +# found the correct option but it doesn't work because the libraries are +# broken. (This can happen, for instance, with SunPRO C and a bad combination +# of operating system patches.) +# +# Several of the options in our candidate list can be misinterpreted by +# compilers that don't use them to activate Openacc support; for example, +# many compilers understand "-openacc" to mean "write output to a file +# named 'penmp'" rather than "enable Openacc". We can't completely avoid +# the possibility of clobbering files named 'penmp' or 'mp' in configure's +# working directory; therefore, this macro will bomb out if any such file +# already exists when it's invoked. +AC_DEFUN([AX_FC_OPENACC], +[AC_REQUIRE([_AX_OPENACC_SAFE_WD])]dnl +[AC_ARG_ENABLE([openacc], + [AS_HELP_STRING([--disable-openacc], [do not use Openacc])])]dnl +[ + OPENACC_[]_AC_LANG_PREFIX[]FLAGS= + if test "$enable_openacc" != no; then + AC_LANG_PUSH([Fortran]) + AC_CACHE_CHECK([for $[]_AC_CC[] option to support Openacc], + [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc], + [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='not found' + dnl Try these flags: + dnl (on by default) '' + dnl GCC >= 4.2 -fopenacc + dnl SunPRO C -xopenacc + dnl Intel C -openacc + dnl SGI C, PGI C -mp + dnl Tru64 Compaq C -omp + dnl IBM XL C (AIX, Linux) -qsmp=omp + dnl Cray CCE -homp + dnl NEC SX -Popenacc + dnl Lahey Fortran (Linux) --openacc + for ac_option in '' -fopenacc -openacc -acc; do + + ac_save_[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS + _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $ac_option" + AC_COMPILE_IFELSE([ + program main + use openacc + implicit none + integer tid, np + tid = 42 + call acc_init(0) + end +], + [AC_LINK_IFELSE([ + program main + use openacc + implicit none + integer tid, np + tid = 42 + call acc_init(0) + end +], + [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc=$ac_option], + [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'])]) + _AC_LANG_PREFIX[]FLAGS=$ac_save_[]_AC_LANG_PREFIX[]FLAGS + + if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'unsupported'; then + break + fi + done + if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = 'not found'; then + ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported' + elif test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = ''; then + ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='none needed' + fi + dnl _AX_OPENACC_SAFE_WD checked that these files did not exist before we + dnl started probing for Openacc support, so if they exist now, they were + dnl created by the probe loop and it's safe to delete them. + rm -f penmp mp]) + if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'unsupported' && \ + test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'none needed'; then + OPENACC_[]_AC_LANG_PREFIX[]FLAGS="$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" + fi + AC_LANG_POP([Fortran]) + fi +]) + +# _AC_OPENACC_SAFE_WD +# ------------------ +# AC_REQUIREd by AC_OPENACC. Checks both at autoconf time and at +# configure time for files that AC_OPENACC clobbers. +AC_DEFUN([_AX_OPENACC_SAFE_WD], +[m4_syscmd([test ! -e penmp && test ! -e mp])]dnl +[m4_if(sysval, [0], [], [m4_fatal(m4_normalize( + [AX_OPENACC clobbers files named 'mp' and 'penmp'. + To use AX_OPENACC you must not have either of these files + at the top level of your source tree.]))])]dnl +[if test -e penmp || test -e mp; then + AC_MSG_ERROR(m4_normalize( + [AX@&t@_OPENACC clobbers files named 'mp' and 'penmp'. + Aborting configure because one of these files already exists.])) +fi]) + diff --git a/configure b/configure index 04408e2a..3022ff53 100755 --- a/configure +++ b/configure @@ -852,6 +852,7 @@ with_amdlibdir with_cuda with_cudacc enable_openacc +with_extraopenacc with_ccopenacc with_cxxopenacc with_fcopenacc @@ -1506,6 +1507,7 @@ Optional Features: --enable-openmp Specify whether to enable openmp. --disable-openmp do not use OpenMP --enable-openacc Specify whether to enable openacc. + --disable-openacc do not use Openacc Optional Packages: --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] @@ -1551,6 +1553,8 @@ Optional Packages: --with-cuda=DIR Specify the CUDA install directory. --with-cudacc A comma-separated list of CCs to compile to, for example, --with-cudacc=50,60,70,75 + --with-extraopenacc additional [EXTRAOPENACC] flags to be added: will + prepend to [EXTRAOPENACC] --with-ccopenacc additional [CCOPENACC] flags to be added: will prepend to [CCOPENACC] --with-cxxopenacc additional [CXXOPENACC] flags to be added: will @@ -10925,6 +10929,284 @@ fi if test x"$pac_cv_openacc" == x"yes" ; then +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether additional EXTRAOPENACC flags should be added (should be invoked only once)" >&5 +printf %s "checking whether additional EXTRAOPENACC flags should be added (should be invoked only once)... " >&6; } + +# Check whether --with-extraopenacc was given. +if test ${with_extraopenacc+y} +then : + withval=$with_extraopenacc; +EXTRAOPENACC="${withval} ${EXTRAOPENACC}" +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: EXTRAOPENACC = ${EXTRAOPENACC}" >&5 +printf "%s\n" "EXTRAOPENACC = ${EXTRAOPENACC}" >&6; } + +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + +fi + + + if test -e penmp || test -e mp; then + as_fn_error $? "AX_OPENACC clobbers files named 'mp' and 'penmp'. Aborting configure because one of these files already exists." "$LINENO" 5 +fi +# Check whether --enable-openacc was given. +if test ${enable_openacc+y} +then : + enableval=$enable_openacc; +fi + + OPENACC_CFLAGS= + if test "$enable_openacc" != no; then + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to support Openacc" >&5 +printf %s "checking for $CC option to support Openacc... " >&6; } +if test ${ax_cv_prog_c_openacc+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ax_cv_prog_c_openacc='not found' + for ac_option in '' -fopenacc -openacc -acc; do + + ac_save_CFLAGS=$CFLAGS + CFLAGS="$CFLAGS $ac_option" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#ifndef _OPENACC +#error "OpenACC not supported" +#endif +#include + int main (void) { acc_init (0); return 0;} + +_ACEOF +if ac_fn_c_try_compile "$LINENO" +then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#ifndef _OPENACC +#error "OpenACC not supported" +#endif +#include + int main (void) { acc_init (0); return 0;} + +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ax_cv_prog_c_openacc=$ac_option +else $as_nop + ax_cv_prog_c_openacc='unsupported' +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CFLAGS=$ac_save_CFLAGS + + if test "$ax_cv_prog_c_openacc" != 'not found'; then + break + fi + done + if test "$ax_cv_prog_c_openacc" = 'not found'; then + ac_cv_prog_c_openacc='unsupported' + elif test "$ax_cv_prog_c_openacc" = ''; then + ac_cv_prog_c_openacc='none needed' + fi + rm -f penmp mp +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_prog_c_openacc" >&5 +printf "%s\n" "$ax_cv_prog_c_openacc" >&6; } + if test "$ax_cv_prog_c_openacc" != 'unsupported' && \ + test "$ax_cv_prog_c_openacc" != 'none needed'; then + OPENACC_CFLAGS="$ax_cv_prog_c_openacc" + fi + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + fi + + CCOPENACC="$ax_cv_prog_c_openacc"; + # Check whether --enable-openacc was given. +if test ${enable_openacc+y} +then : + enableval=$enable_openacc; +fi + + OPENACC_CFLAGS= + if test "$enable_openacc" != no; then + ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CXX option to support Openacc" >&5 +printf %s "checking for $CXX option to support Openacc... " >&6; } +if test ${ax_cv_prog_cxx_openacc+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ax_cv_prog_cxx_openacc='not found' + for ac_option in '' -fopenacc -openacc -acc; do + + ac_save_CXXFLAGS=$CXXFLAGS + CXXFLAGS="$CXXFLAGS $ac_option" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#ifndef _OPENACC +#error "OpenACC not supported" +#endif +#include + int main (void) { acc_init (acc_get_device_type()); return 0;} + +_ACEOF +if ac_fn_cxx_try_compile "$LINENO" +then : + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +#ifndef _OPENACC +#error "OpenACC not supported" +#endif +#include + int main (void) { acc_init (acc_get_device_type()); return 0;} + +_ACEOF +if ac_fn_cxx_try_link "$LINENO" +then : + ax_cv_prog_cxx_openacc=$ac_option +else $as_nop + ax_cv_prog_cxx_openacc='unsupported' +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + CXXFLAGS=$ac_save_CXXFLAGS + + if test "$ax_cv_prog_cxx_openacc" != 'not found'; then + break + fi + done + if test "$ax_cv_prog_cxx_openacc" = 'not found'; then + ac_cv_prog_cxx_openacc='unsupported' + elif test "$ax_cv_prog_cxx_openacc" = ''; then + ac_cv_prog_cxx_openacc='none needed' + fi + rm -f penmp mp +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_prog_cxx_openacc" >&5 +printf "%s\n" "$ax_cv_prog_cxx_openacc" >&6; } + if test "$ax_cv_prog_cxx_openacc" != 'unsupported' && \ + test "$ax_cv_prog_cxx_openacc" != 'none needed'; then + OPENACC_CXXFLAGS="$ax_cv_prog_cxx_openacc" + fi + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + fi + + CXXOPENACC="$ax_cv_prog_cxx_openacc"; + # Check whether --enable-openacc was given. +if test ${enable_openacc+y} +then : + enableval=$enable_openacc; +fi + + OPENACC_CFLAGS= + if test "$enable_openacc" != no; then + ac_ext=${ac_fc_srcext-f} +ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5' +ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_fc_compiler_gnu + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $FC option to support Openacc" >&5 +printf %s "checking for $FC option to support Openacc... " >&6; } +if test ${ax_cv_prog_fc_openacc+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ax_cv_prog_fc_openacc='not found' + for ac_option in '' -fopenacc -openacc -acc; do + + ac_save_FCFLAGS=$FCFLAGS + FCFLAGS="$FCFLAGS $ac_option" + cat > conftest.$ac_ext <<_ACEOF + + program main + use openacc + implicit none + integer tid, np + tid = 42 + call acc_init(0) + end + +_ACEOF +if ac_fn_fc_try_compile "$LINENO" +then : + cat > conftest.$ac_ext <<_ACEOF + + program main + use openacc + implicit none + integer tid, np + tid = 42 + call acc_init(0) + end + +_ACEOF +if ac_fn_fc_try_link "$LINENO" +then : + ax_cv_prog_fc_openacc=$ac_option +else $as_nop + ax_cv_prog_fc_openacc='unsupported' +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + FCFLAGS=$ac_save_FCFLAGS + + if test "$ax_cv_prog_fc_openacc" != 'unsupported'; then + break + fi + done + if test "$ax_cv_prog_fc_openacc" = 'not found'; then + ac_cv_prog_fc_openacc='unsupported' + elif test "$ax_cv_prog_fc_openacc" = ''; then + ac_cv_prog_fc_openacc='none needed' + fi + rm -f penmp mp +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_prog_fc_openacc" >&5 +printf "%s\n" "$ax_cv_prog_fc_openacc" >&6; } + if test "$ax_cv_prog_fc_openacc" != 'unsupported' && \ + test "$ax_cv_prog_fc_openacc" != 'none needed'; then + OPENACC_FCFLAGS="$ax_cv_prog_fc_openacc" + fi + ac_ext=c +ac_cpp='$CPP $CPPFLAGS' +ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_c_compiler_gnu + + fi + + FCOPENACC="$ax_cv_prog_fc_openacc"; + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether additional CCOPENACC flags should be added (should be invoked only once)" >&5 printf %s "checking whether additional CCOPENACC flags should be added (should be invoked only once)... " >&6; } @@ -10981,7 +11263,9 @@ printf "%s\n" "no" >&6; } fi - + CCOPENACC="$CCOPENACC $EXTRAOPENACC"; + CXXOPENACC="$CXXOPENACC $EXTRAOPENACC"; + FCOPENACC="$FCOPENACC $EXTRAOPENACC"; OACCD=oaccd; OACCLD=oaccld; diff --git a/configure.ac b/configure.ac index 47b41406..c702d4b1 100755 --- a/configure.ac +++ b/configure.ac @@ -845,11 +845,35 @@ if test "x$pac_cv_ipk_size" != "x4"; then fi PAC_ARG_OPENACC() +dnl AC_ARG_ENABLE([openacc], +dnl [AS_HELP_STRING([--disable-openacc], [do not use Openacc])]) if test x"$pac_cv_openacc" == x"yes" ; then - PAC_ARG_WITH_FLAGS(ccopenacc,CCOPENACC) + PAC_ARG_WITH_FLAGS(extraopenacc,EXTRAOPENACC) + dnl if test false; then + AX_C_OPENACC() + CCOPENACC="$ax_cv_prog_c_openacc"; + AX_CXX_OPENACC() + CXXOPENACC="$ax_cv_prog_cxx_openacc"; + AX_FC_OPENACC() + FCOPENACC="$ax_cv_prog_fc_openacc"; + dnl AX_OPENACC() + dnl + dnl CXXOPENACC="$ax_cv_prog_cxx_openacc"; + dnl FCOPENACC="$ax_cv_prog_fc_openacc"; + dnl else +dnl AC_MSG_NOTICE([OpenACC 1 flags CC $CCOPENACC CXX $CXXOPENACC FC $FCOPENACC]) + PAC_ARG_WITH_FLAGS(ccopenacc,CCOPENACC) PAC_ARG_WITH_FLAGS(cxxopenacc,CXXOPENACC) PAC_ARG_WITH_FLAGS(fcopenacc,FCOPENACC) - +dnl AC_MSG_NOTICE([OpenACC 2 flags CC $CCOPENACC CXX $CXXOPENACC FC $FCOPENACC]) +dnl CCOPENACC="$ax_cv_prog_c_openacc"; +dnl CXXOPENACC="$ax_cv_prog_cxx_openacc"; +dnl FCOPENACC="$ax_cv_prog_fc_openacc"; +dnl fi + CCOPENACC="$CCOPENACC $EXTRAOPENACC"; + CXXOPENACC="$CXXOPENACC $EXTRAOPENACC"; + FCOPENACC="$FCOPENACC $EXTRAOPENACC"; +dnl AC_MSG_NOTICE([OpenACC 3 flags CC $CCOPENACC CXX $CXXOPENACC FC $FCOPENACC]) OACCD=oaccd; OACCLD=oaccld; @@ -1025,7 +1049,9 @@ AC_MSG_NOTICE([ OPENACC : ${pac_cv_openacc} FCOPENACC : ${FCOPENACC} - + OACCD : ${OACCD} + OACCLD : ${OACCLD} + BLAS : ${BLAS_LIBS} METIS usable : ${psblas_cv_have_metis} diff --git a/test/openacc/Makefile b/test/openacc/Makefile index a2827b30..d0bd4880 100644 --- a/test/openacc/Makefile +++ b/test/openacc/Makefile @@ -17,14 +17,17 @@ FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FMFLAG). $(FMFLAG)$(PSBMODDIR) FFLAGS=-O0 -march=native -fopenacc -foffload=nvptx-none="-march=sm_70" CFLAGS=-O0 -march=native -SRCS=vectoacc.F90 datavect.F90 +VTC=vectoacc.o +DVT=datavect.o CSRC=timers.c OBJS=$(SRCS:.F90=.o) $(CSRC:.c=.o) -all: dir $(OBJS) - $(FC) $(FFLAGS) $(OBJS) -o datavect $(FINCLUDES) $(PSBLAS_LIB) $(LDLIBS) - /bin/mv datavect $(EXEDIR) +all: dir psb_d_oacc_pde3d + +#$(OBJS) +# $(FC) $(FFLAGS) $(OBJS) -o datavect $(FINCLUDES) $(PSBLAS_LIB) $(LDLIBS) +# /bin/mv datavect $(EXEDIR) dir: @if test ! -d $(EXEDIR); then mkdir $(EXEDIR); fi diff --git a/test/openacc/psb_d_oacc_pde3d.F90 b/test/openacc/psb_d_oacc_pde3d.F90 index ac992884..7c0cb4ef 100644 --- a/test/openacc/psb_d_oacc_pde3d.F90 +++ b/test/openacc/psb_d_oacc_pde3d.F90 @@ -733,7 +733,8 @@ program psb_d_oacc_pde3d end if ! get parameters - call get_parms(ctxt, kmethd, ptype, afmt, agfmt, idim, istopc, itmax, itrace, irst, ipart, parms) + call get_parms(ctxt, kmethd, ptype, afmt, agfmt, idim, istopc,& + & itmax, itrace, irst, ipart, parms) ! allocate and fill in the coefficient matrix, rhs and initial guess call psb_barrier(ctxt) From ff8513b4c60985649da87f4ffeb8d214a5e18ca7 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 7 Aug 2024 17:04:29 +0200 Subject: [PATCH 25/39] Ignore .smod files in git --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 7227f784..d190e4c7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ *.a *.o *.mod +*.smod *~ # header files generated From 2982aaee27b501e18b6a8125a263a19f03b65897 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Thu, 8 Aug 2024 11:07:28 +0200 Subject: [PATCH 26/39] Implementation in OpenACC for ELL and HLL into templates. Merge from development --- base/modules/auxil/psi_c_serial_mod.f90 | 8 +- base/modules/auxil/psi_d_serial_mod.f90 | 8 +- base/modules/auxil/psi_e_serial_mod.f90 | 8 +- base/modules/auxil/psi_i2_serial_mod.f90 | 8 +- base/modules/auxil/psi_m_serial_mod.f90 | 8 +- base/modules/auxil/psi_s_serial_mod.f90 | 8 +- base/modules/auxil/psi_z_serial_mod.f90 | 8 +- base/modules/psblas/psb_c_psblas_mod.F90 | 8 +- base/modules/psblas/psb_d_psblas_mod.F90 | 8 +- base/modules/psblas/psb_s_psblas_mod.F90 | 8 +- base/modules/psblas/psb_z_psblas_mod.F90 | 8 +- base/modules/serial/psb_c_base_vect_mod.F90 | 14 +- base/modules/serial/psb_c_vect_mod.F90 | 8 +- base/modules/serial/psb_d_base_vect_mod.F90 | 14 +- base/modules/serial/psb_d_vect_mod.F90 | 8 +- base/modules/serial/psb_s_base_vect_mod.F90 | 14 +- base/modules/serial/psb_s_vect_mod.F90 | 8 +- base/modules/serial/psb_z_base_vect_mod.F90 | 14 +- base/modules/serial/psb_z_vect_mod.F90 | 8 +- base/psblas/psb_caxpby.f90 | 8 +- base/psblas/psb_daxpby.f90 | 8 +- base/psblas/psb_saxpby.f90 | 8 +- base/psblas/psb_zaxpby.f90 | 8 +- base/serial/psi_c_serial_impl.F90 | 8 +- base/serial/psi_d_serial_impl.F90 | 8 +- base/serial/psi_e_serial_impl.F90 | 8 +- base/serial/psi_i2_serial_impl.F90 | 8 +- base/serial/psi_m_serial_impl.F90 | 8 +- base/serial/psi_s_serial_impl.F90 | 8 +- base/serial/psi_z_serial_impl.F90 | 8 +- cuda/psb_c_cuda_vect_mod.F90 | 8 +- cuda/psb_c_vectordev_mod.F90 | 8 +- cuda/psb_d_cuda_vect_mod.F90 | 8 +- cuda/psb_d_vectordev_mod.F90 | 8 +- cuda/psb_s_cuda_vect_mod.F90 | 8 +- cuda/psb_s_vectordev_mod.F90 | 8 +- cuda/psb_z_cuda_vect_mod.F90 | 8 +- cuda/psb_z_vectordev_mod.F90 | 8 +- openacc/Makefile | 27 +- openacc/impl/Makefile | 84 +- openacc/impl/psb_c_oacc_ell_allocate_mnnz.F90 | 47 + openacc/impl/psb_c_oacc_ell_cp_from_coo.F90 | 78 ++ openacc/impl/psb_c_oacc_ell_cp_from_fmt.F90 | 24 + openacc/impl/psb_c_oacc_ell_csmm.F90 | 86 ++ openacc/impl/psb_c_oacc_ell_csmv.F90 | 82 ++ openacc/impl/psb_c_oacc_ell_inner_vect_sv.F90 | 85 ++ openacc/impl/psb_c_oacc_ell_mold.F90 | 34 + openacc/impl/psb_c_oacc_ell_mv_from_coo.F90 | 25 + openacc/impl/psb_c_oacc_ell_mv_from_fmt.F90 | 24 + openacc/impl/psb_c_oacc_ell_reallocate_nz.F90 | 28 + openacc/impl/psb_c_oacc_ell_scal.F90 | 58 ++ openacc/impl/psb_c_oacc_ell_scals.F90 | 39 + openacc/impl/psb_c_oacc_ell_vect_mv.F90 | 66 ++ openacc/impl/psb_c_oacc_hll_allocate_mnnz.F90 | 53 + openacc/impl/psb_c_oacc_hll_cp_from_coo.F90 | 85 ++ openacc/impl/psb_c_oacc_hll_cp_from_fmt.F90 | 24 + openacc/impl/psb_c_oacc_hll_csmm.F90 | 86 ++ openacc/impl/psb_c_oacc_hll_csmv.F90 | 84 ++ openacc/impl/psb_c_oacc_hll_inner_vect_sv.F90 | 86 ++ openacc/impl/psb_c_oacc_hll_mold.F90 | 34 + openacc/impl/psb_c_oacc_hll_mv_from_coo.F90 | 25 + openacc/impl/psb_c_oacc_hll_mv_from_fmt.F90 | 24 + openacc/impl/psb_c_oacc_hll_reallocate_nz.F90 | 29 + openacc/impl/psb_c_oacc_hll_scal.F90 | 62 ++ openacc/impl/psb_c_oacc_hll_scals.F90 | 40 + openacc/impl/psb_c_oacc_hll_vect_mv.F90 | 67 ++ openacc/impl/psb_d_oacc_ell_allocate_mnnz.F90 | 2 +- openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 | 2 +- openacc/impl/psb_d_oacc_hll_allocate_mnnz.F90 | 2 +- openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 | 2 +- openacc/impl/psb_s_oacc_ell_allocate_mnnz.F90 | 47 + openacc/impl/psb_s_oacc_ell_cp_from_coo.F90 | 78 ++ openacc/impl/psb_s_oacc_ell_cp_from_fmt.F90 | 24 + openacc/impl/psb_s_oacc_ell_csmm.F90 | 86 ++ openacc/impl/psb_s_oacc_ell_csmv.F90 | 82 ++ openacc/impl/psb_s_oacc_ell_inner_vect_sv.F90 | 85 ++ openacc/impl/psb_s_oacc_ell_mold.F90 | 34 + openacc/impl/psb_s_oacc_ell_mv_from_coo.F90 | 25 + openacc/impl/psb_s_oacc_ell_mv_from_fmt.F90 | 24 + openacc/impl/psb_s_oacc_ell_reallocate_nz.F90 | 28 + openacc/impl/psb_s_oacc_ell_scal.F90 | 58 ++ openacc/impl/psb_s_oacc_ell_scals.F90 | 39 + openacc/impl/psb_s_oacc_ell_vect_mv.F90 | 66 ++ openacc/impl/psb_s_oacc_hll_allocate_mnnz.F90 | 53 + openacc/impl/psb_s_oacc_hll_cp_from_coo.F90 | 85 ++ openacc/impl/psb_s_oacc_hll_cp_from_fmt.F90 | 24 + openacc/impl/psb_s_oacc_hll_csmm.F90 | 86 ++ openacc/impl/psb_s_oacc_hll_csmv.F90 | 84 ++ openacc/impl/psb_s_oacc_hll_inner_vect_sv.F90 | 86 ++ openacc/impl/psb_s_oacc_hll_mold.F90 | 34 + openacc/impl/psb_s_oacc_hll_mv_from_coo.F90 | 25 + openacc/impl/psb_s_oacc_hll_mv_from_fmt.F90 | 24 + openacc/impl/psb_s_oacc_hll_reallocate_nz.F90 | 29 + openacc/impl/psb_s_oacc_hll_scal.F90 | 62 ++ openacc/impl/psb_s_oacc_hll_scals.F90 | 40 + openacc/impl/psb_s_oacc_hll_vect_mv.F90 | 67 ++ openacc/impl/psb_z_oacc_ell_allocate_mnnz.F90 | 47 + openacc/impl/psb_z_oacc_ell_cp_from_coo.F90 | 78 ++ openacc/impl/psb_z_oacc_ell_cp_from_fmt.F90 | 24 + openacc/impl/psb_z_oacc_ell_csmm.F90 | 86 ++ openacc/impl/psb_z_oacc_ell_csmv.F90 | 82 ++ openacc/impl/psb_z_oacc_ell_inner_vect_sv.F90 | 85 ++ openacc/impl/psb_z_oacc_ell_mold.F90 | 34 + openacc/impl/psb_z_oacc_ell_mv_from_coo.F90 | 25 + openacc/impl/psb_z_oacc_ell_mv_from_fmt.F90 | 24 + openacc/impl/psb_z_oacc_ell_reallocate_nz.F90 | 28 + openacc/impl/psb_z_oacc_ell_scal.F90 | 58 ++ openacc/impl/psb_z_oacc_ell_scals.F90 | 39 + openacc/impl/psb_z_oacc_ell_vect_mv.F90 | 66 ++ openacc/impl/psb_z_oacc_hll_allocate_mnnz.F90 | 53 + openacc/impl/psb_z_oacc_hll_cp_from_coo.F90 | 85 ++ openacc/impl/psb_z_oacc_hll_cp_from_fmt.F90 | 24 + openacc/impl/psb_z_oacc_hll_csmm.F90 | 86 ++ openacc/impl/psb_z_oacc_hll_csmv.F90 | 84 ++ openacc/impl/psb_z_oacc_hll_inner_vect_sv.F90 | 86 ++ openacc/impl/psb_z_oacc_hll_mold.F90 | 34 + openacc/impl/psb_z_oacc_hll_mv_from_coo.F90 | 25 + openacc/impl/psb_z_oacc_hll_mv_from_fmt.F90 | 24 + openacc/impl/psb_z_oacc_hll_reallocate_nz.F90 | 29 + openacc/impl/psb_z_oacc_hll_scal.F90 | 62 ++ openacc/impl/psb_z_oacc_hll_scals.F90 | 40 + openacc/impl/psb_z_oacc_hll_vect_mv.F90 | 67 ++ openacc/psb_c_oacc_ell_mat_mod.F90 | 341 +++++++ openacc/psb_c_oacc_hll_mat_mod.F90 | 352 +++++++ openacc/psb_c_oacc_vect_mod.F90 | 33 +- openacc/psb_d_oacc_ell_mat_mod.F90 | 682 +++++++------ openacc/psb_d_oacc_hll_mat_mod.F90 | 702 +++++++------- openacc/psb_d_oacc_vect_mod.F90 | 33 +- openacc/psb_i_oacc_vect_mod.F90 | 916 +++++++++--------- openacc/psb_l_oacc_vect_mod.F90 | 507 ++++++++++ openacc/psb_oacc_mod.F90 | 9 + openacc/psb_s_oacc_ell_mat_mod.F90 | 341 +++++++ openacc/psb_s_oacc_hll_mat_mod.F90 | 352 +++++++ openacc/psb_s_oacc_vect_mod.F90 | 33 +- openacc/psb_z_oacc_ell_mat_mod.F90 | 341 +++++++ openacc/psb_z_oacc_hll_mat_mod.F90 | 352 +++++++ openacc/psb_z_oacc_vect_mod.F90 | 33 +- 137 files changed, 8195 insertions(+), 1404 deletions(-) create mode 100644 openacc/impl/psb_c_oacc_ell_allocate_mnnz.F90 create mode 100644 openacc/impl/psb_c_oacc_ell_cp_from_coo.F90 create mode 100644 openacc/impl/psb_c_oacc_ell_cp_from_fmt.F90 create mode 100644 openacc/impl/psb_c_oacc_ell_csmm.F90 create mode 100644 openacc/impl/psb_c_oacc_ell_csmv.F90 create mode 100644 openacc/impl/psb_c_oacc_ell_inner_vect_sv.F90 create mode 100644 openacc/impl/psb_c_oacc_ell_mold.F90 create mode 100644 openacc/impl/psb_c_oacc_ell_mv_from_coo.F90 create mode 100644 openacc/impl/psb_c_oacc_ell_mv_from_fmt.F90 create mode 100644 openacc/impl/psb_c_oacc_ell_reallocate_nz.F90 create mode 100644 openacc/impl/psb_c_oacc_ell_scal.F90 create mode 100644 openacc/impl/psb_c_oacc_ell_scals.F90 create mode 100644 openacc/impl/psb_c_oacc_ell_vect_mv.F90 create mode 100644 openacc/impl/psb_c_oacc_hll_allocate_mnnz.F90 create mode 100644 openacc/impl/psb_c_oacc_hll_cp_from_coo.F90 create mode 100644 openacc/impl/psb_c_oacc_hll_cp_from_fmt.F90 create mode 100644 openacc/impl/psb_c_oacc_hll_csmm.F90 create mode 100644 openacc/impl/psb_c_oacc_hll_csmv.F90 create mode 100644 openacc/impl/psb_c_oacc_hll_inner_vect_sv.F90 create mode 100644 openacc/impl/psb_c_oacc_hll_mold.F90 create mode 100644 openacc/impl/psb_c_oacc_hll_mv_from_coo.F90 create mode 100644 openacc/impl/psb_c_oacc_hll_mv_from_fmt.F90 create mode 100644 openacc/impl/psb_c_oacc_hll_reallocate_nz.F90 create mode 100644 openacc/impl/psb_c_oacc_hll_scal.F90 create mode 100644 openacc/impl/psb_c_oacc_hll_scals.F90 create mode 100644 openacc/impl/psb_c_oacc_hll_vect_mv.F90 create mode 100644 openacc/impl/psb_s_oacc_ell_allocate_mnnz.F90 create mode 100644 openacc/impl/psb_s_oacc_ell_cp_from_coo.F90 create mode 100644 openacc/impl/psb_s_oacc_ell_cp_from_fmt.F90 create mode 100644 openacc/impl/psb_s_oacc_ell_csmm.F90 create mode 100644 openacc/impl/psb_s_oacc_ell_csmv.F90 create mode 100644 openacc/impl/psb_s_oacc_ell_inner_vect_sv.F90 create mode 100644 openacc/impl/psb_s_oacc_ell_mold.F90 create mode 100644 openacc/impl/psb_s_oacc_ell_mv_from_coo.F90 create mode 100644 openacc/impl/psb_s_oacc_ell_mv_from_fmt.F90 create mode 100644 openacc/impl/psb_s_oacc_ell_reallocate_nz.F90 create mode 100644 openacc/impl/psb_s_oacc_ell_scal.F90 create mode 100644 openacc/impl/psb_s_oacc_ell_scals.F90 create mode 100644 openacc/impl/psb_s_oacc_ell_vect_mv.F90 create mode 100644 openacc/impl/psb_s_oacc_hll_allocate_mnnz.F90 create mode 100644 openacc/impl/psb_s_oacc_hll_cp_from_coo.F90 create mode 100644 openacc/impl/psb_s_oacc_hll_cp_from_fmt.F90 create mode 100644 openacc/impl/psb_s_oacc_hll_csmm.F90 create mode 100644 openacc/impl/psb_s_oacc_hll_csmv.F90 create mode 100644 openacc/impl/psb_s_oacc_hll_inner_vect_sv.F90 create mode 100644 openacc/impl/psb_s_oacc_hll_mold.F90 create mode 100644 openacc/impl/psb_s_oacc_hll_mv_from_coo.F90 create mode 100644 openacc/impl/psb_s_oacc_hll_mv_from_fmt.F90 create mode 100644 openacc/impl/psb_s_oacc_hll_reallocate_nz.F90 create mode 100644 openacc/impl/psb_s_oacc_hll_scal.F90 create mode 100644 openacc/impl/psb_s_oacc_hll_scals.F90 create mode 100644 openacc/impl/psb_s_oacc_hll_vect_mv.F90 create mode 100644 openacc/impl/psb_z_oacc_ell_allocate_mnnz.F90 create mode 100644 openacc/impl/psb_z_oacc_ell_cp_from_coo.F90 create mode 100644 openacc/impl/psb_z_oacc_ell_cp_from_fmt.F90 create mode 100644 openacc/impl/psb_z_oacc_ell_csmm.F90 create mode 100644 openacc/impl/psb_z_oacc_ell_csmv.F90 create mode 100644 openacc/impl/psb_z_oacc_ell_inner_vect_sv.F90 create mode 100644 openacc/impl/psb_z_oacc_ell_mold.F90 create mode 100644 openacc/impl/psb_z_oacc_ell_mv_from_coo.F90 create mode 100644 openacc/impl/psb_z_oacc_ell_mv_from_fmt.F90 create mode 100644 openacc/impl/psb_z_oacc_ell_reallocate_nz.F90 create mode 100644 openacc/impl/psb_z_oacc_ell_scal.F90 create mode 100644 openacc/impl/psb_z_oacc_ell_scals.F90 create mode 100644 openacc/impl/psb_z_oacc_ell_vect_mv.F90 create mode 100644 openacc/impl/psb_z_oacc_hll_allocate_mnnz.F90 create mode 100644 openacc/impl/psb_z_oacc_hll_cp_from_coo.F90 create mode 100644 openacc/impl/psb_z_oacc_hll_cp_from_fmt.F90 create mode 100644 openacc/impl/psb_z_oacc_hll_csmm.F90 create mode 100644 openacc/impl/psb_z_oacc_hll_csmv.F90 create mode 100644 openacc/impl/psb_z_oacc_hll_inner_vect_sv.F90 create mode 100644 openacc/impl/psb_z_oacc_hll_mold.F90 create mode 100644 openacc/impl/psb_z_oacc_hll_mv_from_coo.F90 create mode 100644 openacc/impl/psb_z_oacc_hll_mv_from_fmt.F90 create mode 100644 openacc/impl/psb_z_oacc_hll_reallocate_nz.F90 create mode 100644 openacc/impl/psb_z_oacc_hll_scal.F90 create mode 100644 openacc/impl/psb_z_oacc_hll_scals.F90 create mode 100644 openacc/impl/psb_z_oacc_hll_vect_mv.F90 create mode 100644 openacc/psb_c_oacc_ell_mat_mod.F90 create mode 100644 openacc/psb_c_oacc_hll_mat_mod.F90 create mode 100644 openacc/psb_l_oacc_vect_mod.F90 create mode 100644 openacc/psb_s_oacc_ell_mat_mod.F90 create mode 100644 openacc/psb_s_oacc_hll_mat_mod.F90 create mode 100644 openacc/psb_z_oacc_ell_mat_mod.F90 create mode 100644 openacc/psb_z_oacc_hll_mat_mod.F90 diff --git a/base/modules/auxil/psi_c_serial_mod.f90 b/base/modules/auxil/psi_c_serial_mod.f90 index 3fe001c8..38b740a7 100644 --- a/base/modules/auxil/psi_c_serial_mod.f90 +++ b/base/modules/auxil/psi_c_serial_mod.f90 @@ -99,8 +99,8 @@ module psi_c_serial_mod end subroutine psi_caxpbyv2 end interface psb_geaxpby - interface psi_upd_xyz - subroutine psi_c_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + interface psi_abgdxyz + subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) import :: psb_ipk_, psb_spk_ implicit none integer(psb_ipk_), intent(in) :: m @@ -109,8 +109,8 @@ module psi_c_serial_mod complex(psb_spk_), intent (inout) :: z(:) complex(psb_spk_), intent (in) :: alpha, beta,gamma,delta integer(psb_ipk_), intent(out) :: info - end subroutine psi_c_upd_xyz - end interface psi_upd_xyz + end subroutine psi_cabgdxyz + end interface psi_abgdxyz interface psi_xyzw subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info) diff --git a/base/modules/auxil/psi_d_serial_mod.f90 b/base/modules/auxil/psi_d_serial_mod.f90 index a08263df..1d65c5f6 100644 --- a/base/modules/auxil/psi_d_serial_mod.f90 +++ b/base/modules/auxil/psi_d_serial_mod.f90 @@ -99,8 +99,8 @@ module psi_d_serial_mod end subroutine psi_daxpbyv2 end interface psb_geaxpby - interface psi_upd_xyz - subroutine psi_d_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + interface psi_abgdxyz + subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) import :: psb_ipk_, psb_dpk_ implicit none integer(psb_ipk_), intent(in) :: m @@ -109,8 +109,8 @@ module psi_d_serial_mod real(psb_dpk_), intent (inout) :: z(:) real(psb_dpk_), intent (in) :: alpha, beta,gamma,delta integer(psb_ipk_), intent(out) :: info - end subroutine psi_d_upd_xyz - end interface psi_upd_xyz + end subroutine psi_dabgdxyz + end interface psi_abgdxyz interface psi_xyzw subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info) diff --git a/base/modules/auxil/psi_e_serial_mod.f90 b/base/modules/auxil/psi_e_serial_mod.f90 index 1f1bebd7..6f4e8c06 100644 --- a/base/modules/auxil/psi_e_serial_mod.f90 +++ b/base/modules/auxil/psi_e_serial_mod.f90 @@ -99,8 +99,8 @@ module psi_e_serial_mod end subroutine psi_eaxpbyv2 end interface psb_geaxpby - interface psi_upd_xyz - subroutine psi_e_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + interface psi_abgdxyz + subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none integer(psb_ipk_), intent(in) :: m @@ -109,8 +109,8 @@ module psi_e_serial_mod integer(psb_epk_), intent (inout) :: z(:) integer(psb_epk_), intent (in) :: alpha, beta,gamma,delta integer(psb_ipk_), intent(out) :: info - end subroutine psi_e_upd_xyz - end interface psi_upd_xyz + end subroutine psi_eabgdxyz + end interface psi_abgdxyz interface psi_xyzw subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info) diff --git a/base/modules/auxil/psi_i2_serial_mod.f90 b/base/modules/auxil/psi_i2_serial_mod.f90 index 770d3256..ffa14059 100644 --- a/base/modules/auxil/psi_i2_serial_mod.f90 +++ b/base/modules/auxil/psi_i2_serial_mod.f90 @@ -99,8 +99,8 @@ module psi_i2_serial_mod end subroutine psi_i2axpbyv2 end interface psb_geaxpby - interface psi_upd_xyz - subroutine psi_i2_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + interface psi_abgdxyz + subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none integer(psb_ipk_), intent(in) :: m @@ -109,8 +109,8 @@ module psi_i2_serial_mod integer(psb_i2pk_), intent (inout) :: z(:) integer(psb_i2pk_), intent (in) :: alpha, beta,gamma,delta integer(psb_ipk_), intent(out) :: info - end subroutine psi_i2_upd_xyz - end interface psi_upd_xyz + end subroutine psi_i2abgdxyz + end interface psi_abgdxyz interface psi_xyzw subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info) diff --git a/base/modules/auxil/psi_m_serial_mod.f90 b/base/modules/auxil/psi_m_serial_mod.f90 index 3583cccc..5661fdbf 100644 --- a/base/modules/auxil/psi_m_serial_mod.f90 +++ b/base/modules/auxil/psi_m_serial_mod.f90 @@ -99,8 +99,8 @@ module psi_m_serial_mod end subroutine psi_maxpbyv2 end interface psb_geaxpby - interface psi_upd_xyz - subroutine psi_m_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + interface psi_abgdxyz + subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none integer(psb_ipk_), intent(in) :: m @@ -109,8 +109,8 @@ module psi_m_serial_mod integer(psb_mpk_), intent (inout) :: z(:) integer(psb_mpk_), intent (in) :: alpha, beta,gamma,delta integer(psb_ipk_), intent(out) :: info - end subroutine psi_m_upd_xyz - end interface psi_upd_xyz + end subroutine psi_mabgdxyz + end interface psi_abgdxyz interface psi_xyzw subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info) diff --git a/base/modules/auxil/psi_s_serial_mod.f90 b/base/modules/auxil/psi_s_serial_mod.f90 index 3e0c6d91..5cc17d58 100644 --- a/base/modules/auxil/psi_s_serial_mod.f90 +++ b/base/modules/auxil/psi_s_serial_mod.f90 @@ -99,8 +99,8 @@ module psi_s_serial_mod end subroutine psi_saxpbyv2 end interface psb_geaxpby - interface psi_upd_xyz - subroutine psi_s_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + interface psi_abgdxyz + subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) import :: psb_ipk_, psb_spk_ implicit none integer(psb_ipk_), intent(in) :: m @@ -109,8 +109,8 @@ module psi_s_serial_mod real(psb_spk_), intent (inout) :: z(:) real(psb_spk_), intent (in) :: alpha, beta,gamma,delta integer(psb_ipk_), intent(out) :: info - end subroutine psi_s_upd_xyz - end interface psi_upd_xyz + end subroutine psi_sabgdxyz + end interface psi_abgdxyz interface psi_xyzw subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info) diff --git a/base/modules/auxil/psi_z_serial_mod.f90 b/base/modules/auxil/psi_z_serial_mod.f90 index a8ea734e..8a3f053d 100644 --- a/base/modules/auxil/psi_z_serial_mod.f90 +++ b/base/modules/auxil/psi_z_serial_mod.f90 @@ -99,8 +99,8 @@ module psi_z_serial_mod end subroutine psi_zaxpbyv2 end interface psb_geaxpby - interface psi_upd_xyz - subroutine psi_z_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + interface psi_abgdxyz + subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) import :: psb_ipk_, psb_dpk_ implicit none integer(psb_ipk_), intent(in) :: m @@ -109,8 +109,8 @@ module psi_z_serial_mod complex(psb_dpk_), intent (inout) :: z(:) complex(psb_dpk_), intent (in) :: alpha, beta,gamma,delta integer(psb_ipk_), intent(out) :: info - end subroutine psi_z_upd_xyz - end interface psi_upd_xyz + end subroutine psi_zabgdxyz + end interface psi_abgdxyz interface psi_xyzw subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info) diff --git a/base/modules/psblas/psb_c_psblas_mod.F90 b/base/modules/psblas/psb_c_psblas_mod.F90 index 591dec09..7f7f937c 100644 --- a/base/modules/psblas/psb_c_psblas_mod.F90 +++ b/base/modules/psblas/psb_c_psblas_mod.F90 @@ -143,8 +143,8 @@ module psb_c_psblas_mod end subroutine psb_caxpby end interface - interface psb_upd_xyz - subroutine psb_c_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& + interface psb_abgdxyz + subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) import :: psb_desc_type, psb_spk_, psb_ipk_, & & psb_c_vect_type, psb_cspmat_type @@ -154,8 +154,8 @@ module psb_c_psblas_mod complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta type(psb_desc_type), intent (in) :: desc_a integer(psb_ipk_), intent(out) :: info - end subroutine psb_c_upd_xyz_vect - end interface psb_upd_xyz + end subroutine psb_cabgdxyz_vect + end interface psb_abgdxyz interface psb_geamax function psb_camax(x, desc_a, info, jx,global) diff --git a/base/modules/psblas/psb_d_psblas_mod.F90 b/base/modules/psblas/psb_d_psblas_mod.F90 index b200bc8a..12090956 100644 --- a/base/modules/psblas/psb_d_psblas_mod.F90 +++ b/base/modules/psblas/psb_d_psblas_mod.F90 @@ -143,8 +143,8 @@ module psb_d_psblas_mod end subroutine psb_daxpby end interface - interface psb_upd_xyz - subroutine psb_d_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& + interface psb_abgdxyz + subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) import :: psb_desc_type, psb_dpk_, psb_ipk_, & & psb_d_vect_type, psb_dspmat_type @@ -154,8 +154,8 @@ module psb_d_psblas_mod real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta type(psb_desc_type), intent (in) :: desc_a integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_upd_xyz_vect - end interface psb_upd_xyz + end subroutine psb_dabgdxyz_vect + end interface psb_abgdxyz interface psb_geamax function psb_damax(x, desc_a, info, jx,global) diff --git a/base/modules/psblas/psb_s_psblas_mod.F90 b/base/modules/psblas/psb_s_psblas_mod.F90 index a60da025..7a7ce783 100644 --- a/base/modules/psblas/psb_s_psblas_mod.F90 +++ b/base/modules/psblas/psb_s_psblas_mod.F90 @@ -143,8 +143,8 @@ module psb_s_psblas_mod end subroutine psb_saxpby end interface - interface psb_upd_xyz - subroutine psb_s_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& + interface psb_abgdxyz + subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) import :: psb_desc_type, psb_spk_, psb_ipk_, & & psb_s_vect_type, psb_sspmat_type @@ -154,8 +154,8 @@ module psb_s_psblas_mod real(psb_spk_), intent (in) :: alpha, beta, gamma, delta type(psb_desc_type), intent (in) :: desc_a integer(psb_ipk_), intent(out) :: info - end subroutine psb_s_upd_xyz_vect - end interface psb_upd_xyz + end subroutine psb_sabgdxyz_vect + end interface psb_abgdxyz interface psb_geamax function psb_samax(x, desc_a, info, jx,global) diff --git a/base/modules/psblas/psb_z_psblas_mod.F90 b/base/modules/psblas/psb_z_psblas_mod.F90 index 241df2b9..bcfe9caa 100644 --- a/base/modules/psblas/psb_z_psblas_mod.F90 +++ b/base/modules/psblas/psb_z_psblas_mod.F90 @@ -143,8 +143,8 @@ module psb_z_psblas_mod end subroutine psb_zaxpby end interface - interface psb_upd_xyz - subroutine psb_z_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& + interface psb_abgdxyz + subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) import :: psb_desc_type, psb_dpk_, psb_ipk_, & & psb_z_vect_type, psb_zspmat_type @@ -154,8 +154,8 @@ module psb_z_psblas_mod complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta type(psb_desc_type), intent (in) :: desc_a integer(psb_ipk_), intent(out) :: info - end subroutine psb_z_upd_xyz_vect - end interface psb_upd_xyz + end subroutine psb_zabgdxyz_vect + end interface psb_abgdxyz interface psb_geamax function psb_zamax(x, desc_a, info, jx,global) diff --git a/base/modules/serial/psb_c_base_vect_mod.F90 b/base/modules/serial/psb_c_base_vect_mod.F90 index 4dac86d6..41bab5ab 100644 --- a/base/modules/serial/psb_c_base_vect_mod.F90 +++ b/base/modules/serial/psb_c_base_vect_mod.F90 @@ -155,7 +155,7 @@ module psb_c_base_vect_mod procedure, pass(z) :: axpby_v2 => c_base_axpby_v2 procedure, pass(z) :: axpby_a2 => c_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 - procedure, pass(z) :: upd_xyz => c_base_upd_xyz + procedure, pass(z) :: abgdxyz => c_base_abgdxyz procedure, pass(w) :: xyzw => c_base_xyzw ! @@ -1130,12 +1130,12 @@ contains end subroutine c_base_axpby_a2 ! - ! UPD_XYZ is invoked via Z, hence the structure below. + ! ABGDXYZ is invoked via Z, hence the structure below. ! ! - !> Function base_upd_xyz + !> Function base_abgdxyz !! \memberof psb_c_base_vect_type - !! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta + !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param beta scalar beta @@ -1146,7 +1146,7 @@ contains !! \param z The class(base_vect) to be added !! \param info return code !! - subroutine c_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + subroutine c_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -1159,11 +1159,11 @@ contains if (x%is_dev().and.(alpha/=czero)) call x%sync() if (y%is_dev().and.(beta/=czero)) call y%sync() if (z%is_dev().and.(delta/=czero)) call z%sync() - call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) call y%set_host() call z%set_host() - end subroutine c_base_upd_xyz + end subroutine c_base_abgdxyz subroutine c_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info) use psi_serial_mod diff --git a/base/modules/serial/psb_c_vect_mod.F90 b/base/modules/serial/psb_c_vect_mod.F90 index 1e9510f2..865f9456 100644 --- a/base/modules/serial/psb_c_vect_mod.F90 +++ b/base/modules/serial/psb_c_vect_mod.F90 @@ -102,7 +102,7 @@ module psb_c_vect_mod procedure, pass(z) :: axpby_v2 => c_vect_axpby_v2 procedure, pass(z) :: axpby_a2 => c_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 - procedure, pass(z) :: upd_xyz => c_vect_upd_xyz + procedure, pass(z) :: abgdxyz => c_vect_abgdxyz procedure, pass(z) :: xyzw => c_vect_xyzw procedure, pass(y) :: mlt_v => c_vect_mlt_v @@ -774,7 +774,7 @@ contains end subroutine c_vect_axpby_a2 - subroutine c_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info) + subroutine c_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -785,9 +785,9 @@ contains integer(psb_ipk_), intent(out) :: info if (allocated(z%v)) & - call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info) + call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info) - end subroutine c_vect_upd_xyz + end subroutine c_vect_abgdxyz subroutine c_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info) use psi_serial_mod diff --git a/base/modules/serial/psb_d_base_vect_mod.F90 b/base/modules/serial/psb_d_base_vect_mod.F90 index f07b5aed..1ad1ffa5 100644 --- a/base/modules/serial/psb_d_base_vect_mod.F90 +++ b/base/modules/serial/psb_d_base_vect_mod.F90 @@ -155,7 +155,7 @@ module psb_d_base_vect_mod procedure, pass(z) :: axpby_v2 => d_base_axpby_v2 procedure, pass(z) :: axpby_a2 => d_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 - procedure, pass(z) :: upd_xyz => d_base_upd_xyz + procedure, pass(z) :: abgdxyz => d_base_abgdxyz procedure, pass(w) :: xyzw => d_base_xyzw ! @@ -1137,12 +1137,12 @@ contains end subroutine d_base_axpby_a2 ! - ! UPD_XYZ is invoked via Z, hence the structure below. + ! ABGDXYZ is invoked via Z, hence the structure below. ! ! - !> Function base_upd_xyz + !> Function base_abgdxyz !! \memberof psb_d_base_vect_type - !! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta + !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param beta scalar beta @@ -1153,7 +1153,7 @@ contains !! \param z The class(base_vect) to be added !! \param info return code !! - subroutine d_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + subroutine d_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -1166,11 +1166,11 @@ contains if (x%is_dev().and.(alpha/=dzero)) call x%sync() if (y%is_dev().and.(beta/=dzero)) call y%sync() if (z%is_dev().and.(delta/=dzero)) call z%sync() - call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) call y%set_host() call z%set_host() - end subroutine d_base_upd_xyz + end subroutine d_base_abgdxyz subroutine d_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info) use psi_serial_mod diff --git a/base/modules/serial/psb_d_vect_mod.F90 b/base/modules/serial/psb_d_vect_mod.F90 index ae3062dd..55dd8230 100644 --- a/base/modules/serial/psb_d_vect_mod.F90 +++ b/base/modules/serial/psb_d_vect_mod.F90 @@ -102,7 +102,7 @@ module psb_d_vect_mod procedure, pass(z) :: axpby_v2 => d_vect_axpby_v2 procedure, pass(z) :: axpby_a2 => d_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 - procedure, pass(z) :: upd_xyz => d_vect_upd_xyz + procedure, pass(z) :: abgdxyz => d_vect_abgdxyz procedure, pass(z) :: xyzw => d_vect_xyzw procedure, pass(y) :: mlt_v => d_vect_mlt_v @@ -781,7 +781,7 @@ contains end subroutine d_vect_axpby_a2 - subroutine d_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info) + subroutine d_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -792,9 +792,9 @@ contains integer(psb_ipk_), intent(out) :: info if (allocated(z%v)) & - call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info) + call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info) - end subroutine d_vect_upd_xyz + end subroutine d_vect_abgdxyz subroutine d_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info) use psi_serial_mod diff --git a/base/modules/serial/psb_s_base_vect_mod.F90 b/base/modules/serial/psb_s_base_vect_mod.F90 index 596cd634..26b82c31 100644 --- a/base/modules/serial/psb_s_base_vect_mod.F90 +++ b/base/modules/serial/psb_s_base_vect_mod.F90 @@ -155,7 +155,7 @@ module psb_s_base_vect_mod procedure, pass(z) :: axpby_v2 => s_base_axpby_v2 procedure, pass(z) :: axpby_a2 => s_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 - procedure, pass(z) :: upd_xyz => s_base_upd_xyz + procedure, pass(z) :: abgdxyz => s_base_abgdxyz procedure, pass(w) :: xyzw => s_base_xyzw ! @@ -1137,12 +1137,12 @@ contains end subroutine s_base_axpby_a2 ! - ! UPD_XYZ is invoked via Z, hence the structure below. + ! ABGDXYZ is invoked via Z, hence the structure below. ! ! - !> Function base_upd_xyz + !> Function base_abgdxyz !! \memberof psb_s_base_vect_type - !! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta + !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param beta scalar beta @@ -1153,7 +1153,7 @@ contains !! \param z The class(base_vect) to be added !! \param info return code !! - subroutine s_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + subroutine s_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -1166,11 +1166,11 @@ contains if (x%is_dev().and.(alpha/=szero)) call x%sync() if (y%is_dev().and.(beta/=szero)) call y%sync() if (z%is_dev().and.(delta/=szero)) call z%sync() - call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) call y%set_host() call z%set_host() - end subroutine s_base_upd_xyz + end subroutine s_base_abgdxyz subroutine s_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info) use psi_serial_mod diff --git a/base/modules/serial/psb_s_vect_mod.F90 b/base/modules/serial/psb_s_vect_mod.F90 index cad4659c..a50b2a0a 100644 --- a/base/modules/serial/psb_s_vect_mod.F90 +++ b/base/modules/serial/psb_s_vect_mod.F90 @@ -102,7 +102,7 @@ module psb_s_vect_mod procedure, pass(z) :: axpby_v2 => s_vect_axpby_v2 procedure, pass(z) :: axpby_a2 => s_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 - procedure, pass(z) :: upd_xyz => s_vect_upd_xyz + procedure, pass(z) :: abgdxyz => s_vect_abgdxyz procedure, pass(z) :: xyzw => s_vect_xyzw procedure, pass(y) :: mlt_v => s_vect_mlt_v @@ -781,7 +781,7 @@ contains end subroutine s_vect_axpby_a2 - subroutine s_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info) + subroutine s_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -792,9 +792,9 @@ contains integer(psb_ipk_), intent(out) :: info if (allocated(z%v)) & - call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info) + call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info) - end subroutine s_vect_upd_xyz + end subroutine s_vect_abgdxyz subroutine s_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info) use psi_serial_mod diff --git a/base/modules/serial/psb_z_base_vect_mod.F90 b/base/modules/serial/psb_z_base_vect_mod.F90 index 1bbdfba1..a3afc9c1 100644 --- a/base/modules/serial/psb_z_base_vect_mod.F90 +++ b/base/modules/serial/psb_z_base_vect_mod.F90 @@ -155,7 +155,7 @@ module psb_z_base_vect_mod procedure, pass(z) :: axpby_v2 => z_base_axpby_v2 procedure, pass(z) :: axpby_a2 => z_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 - procedure, pass(z) :: upd_xyz => z_base_upd_xyz + procedure, pass(z) :: abgdxyz => z_base_abgdxyz procedure, pass(w) :: xyzw => z_base_xyzw ! @@ -1130,12 +1130,12 @@ contains end subroutine z_base_axpby_a2 ! - ! UPD_XYZ is invoked via Z, hence the structure below. + ! ABGDXYZ is invoked via Z, hence the structure below. ! ! - !> Function base_upd_xyz + !> Function base_abgdxyz !! \memberof psb_z_base_vect_type - !! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta + !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param beta scalar beta @@ -1146,7 +1146,7 @@ contains !! \param z The class(base_vect) to be added !! \param info return code !! - subroutine z_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + subroutine z_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -1159,11 +1159,11 @@ contains if (x%is_dev().and.(alpha/=zzero)) call x%sync() if (y%is_dev().and.(beta/=zzero)) call y%sync() if (z%is_dev().and.(delta/=zzero)) call z%sync() - call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) call y%set_host() call z%set_host() - end subroutine z_base_upd_xyz + end subroutine z_base_abgdxyz subroutine z_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info) use psi_serial_mod diff --git a/base/modules/serial/psb_z_vect_mod.F90 b/base/modules/serial/psb_z_vect_mod.F90 index 48f2e947..21e0c546 100644 --- a/base/modules/serial/psb_z_vect_mod.F90 +++ b/base/modules/serial/psb_z_vect_mod.F90 @@ -102,7 +102,7 @@ module psb_z_vect_mod procedure, pass(z) :: axpby_v2 => z_vect_axpby_v2 procedure, pass(z) :: axpby_a2 => z_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 - procedure, pass(z) :: upd_xyz => z_vect_upd_xyz + procedure, pass(z) :: abgdxyz => z_vect_abgdxyz procedure, pass(z) :: xyzw => z_vect_xyzw procedure, pass(y) :: mlt_v => z_vect_mlt_v @@ -774,7 +774,7 @@ contains end subroutine z_vect_axpby_a2 - subroutine z_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info) + subroutine z_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -785,9 +785,9 @@ contains integer(psb_ipk_), intent(out) :: info if (allocated(z%v)) & - call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info) + call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info) - end subroutine z_vect_upd_xyz + end subroutine z_vect_abgdxyz subroutine z_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info) use psi_serial_mod diff --git a/base/psblas/psb_caxpby.f90 b/base/psblas/psb_caxpby.f90 index 5d80ef00..7c22bb06 100644 --- a/base/psblas/psb_caxpby.f90 +++ b/base/psblas/psb_caxpby.f90 @@ -743,9 +743,9 @@ subroutine psb_caddconst_vect(x,b,z,desc_a,info) end subroutine psb_caddconst_vect -subroutine psb_c_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& +subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) - use psb_base_mod, psb_protect_name => psb_c_upd_xyz_vect + use psb_base_mod, psb_protect_name => psb_cabgdxyz_vect implicit none type(psb_c_vect_type), intent (inout) :: x type(psb_c_vect_type), intent (inout) :: y @@ -812,7 +812,7 @@ subroutine psb_c_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& end if if(desc_a%get_local_rows() > 0) then - call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info) + call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info) end if call psb_erractionrestore(err_act) @@ -822,5 +822,5 @@ subroutine psb_c_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& return -end subroutine psb_c_upd_xyz_vect +end subroutine psb_cabgdxyz_vect diff --git a/base/psblas/psb_daxpby.f90 b/base/psblas/psb_daxpby.f90 index 38ebe465..1de77647 100644 --- a/base/psblas/psb_daxpby.f90 +++ b/base/psblas/psb_daxpby.f90 @@ -743,9 +743,9 @@ subroutine psb_daddconst_vect(x,b,z,desc_a,info) end subroutine psb_daddconst_vect -subroutine psb_d_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& +subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) - use psb_base_mod, psb_protect_name => psb_d_upd_xyz_vect + use psb_base_mod, psb_protect_name => psb_dabgdxyz_vect implicit none type(psb_d_vect_type), intent (inout) :: x type(psb_d_vect_type), intent (inout) :: y @@ -812,7 +812,7 @@ subroutine psb_d_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& end if if(desc_a%get_local_rows() > 0) then - call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info) + call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info) end if call psb_erractionrestore(err_act) @@ -822,5 +822,5 @@ subroutine psb_d_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& return -end subroutine psb_d_upd_xyz_vect +end subroutine psb_dabgdxyz_vect diff --git a/base/psblas/psb_saxpby.f90 b/base/psblas/psb_saxpby.f90 index 0055fdbe..1b1f24e6 100644 --- a/base/psblas/psb_saxpby.f90 +++ b/base/psblas/psb_saxpby.f90 @@ -743,9 +743,9 @@ subroutine psb_saddconst_vect(x,b,z,desc_a,info) end subroutine psb_saddconst_vect -subroutine psb_s_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& +subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) - use psb_base_mod, psb_protect_name => psb_s_upd_xyz_vect + use psb_base_mod, psb_protect_name => psb_sabgdxyz_vect implicit none type(psb_s_vect_type), intent (inout) :: x type(psb_s_vect_type), intent (inout) :: y @@ -812,7 +812,7 @@ subroutine psb_s_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& end if if(desc_a%get_local_rows() > 0) then - call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info) + call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info) end if call psb_erractionrestore(err_act) @@ -822,5 +822,5 @@ subroutine psb_s_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& return -end subroutine psb_s_upd_xyz_vect +end subroutine psb_sabgdxyz_vect diff --git a/base/psblas/psb_zaxpby.f90 b/base/psblas/psb_zaxpby.f90 index e93488e3..0f37a1f4 100644 --- a/base/psblas/psb_zaxpby.f90 +++ b/base/psblas/psb_zaxpby.f90 @@ -743,9 +743,9 @@ subroutine psb_zaddconst_vect(x,b,z,desc_a,info) end subroutine psb_zaddconst_vect -subroutine psb_z_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& +subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) - use psb_base_mod, psb_protect_name => psb_z_upd_xyz_vect + use psb_base_mod, psb_protect_name => psb_zabgdxyz_vect implicit none type(psb_z_vect_type), intent (inout) :: x type(psb_z_vect_type), intent (inout) :: y @@ -812,7 +812,7 @@ subroutine psb_z_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& end if if(desc_a%get_local_rows() > 0) then - call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info) + call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info) end if call psb_erractionrestore(err_act) @@ -822,5 +822,5 @@ subroutine psb_z_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& return -end subroutine psb_z_upd_xyz_vect +end subroutine psb_zabgdxyz_vect diff --git a/base/serial/psi_c_serial_impl.F90 b/base/serial/psi_c_serial_impl.F90 index e3f1d9a3..e230a1e0 100644 --- a/base/serial/psi_c_serial_impl.F90 +++ b/base/serial/psi_c_serial_impl.F90 @@ -1568,7 +1568,7 @@ subroutine caxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) end subroutine caxpbyv2 -subroutine psi_c_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) +subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) use psb_const_mod use psb_error_mod implicit none @@ -1582,7 +1582,7 @@ subroutine psi_c_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='c_upd_xyz' + name='cabgdxyz' info = psb_success_ if (m.lt.0) then @@ -1791,7 +1791,7 @@ subroutine psi_c_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_serror() return -end subroutine psi_c_upd_xyz +end subroutine psi_cabgdxyz subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info) use psb_const_mod @@ -1808,7 +1808,7 @@ subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='c_xyzw' + name='cabgdxyz' info = psb_success_ if (m.lt.0) then diff --git a/base/serial/psi_d_serial_impl.F90 b/base/serial/psi_d_serial_impl.F90 index d6a9a31d..bf1b2917 100644 --- a/base/serial/psi_d_serial_impl.F90 +++ b/base/serial/psi_d_serial_impl.F90 @@ -1568,7 +1568,7 @@ subroutine daxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) end subroutine daxpbyv2 -subroutine psi_d_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) +subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) use psb_const_mod use psb_error_mod implicit none @@ -1582,7 +1582,7 @@ subroutine psi_d_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='d_upd_xyz' + name='dabgdxyz' info = psb_success_ if (m.lt.0) then @@ -1791,7 +1791,7 @@ subroutine psi_d_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_serror() return -end subroutine psi_d_upd_xyz +end subroutine psi_dabgdxyz subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info) use psb_const_mod @@ -1808,7 +1808,7 @@ subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='d_xyzw' + name='dabgdxyz' info = psb_success_ if (m.lt.0) then diff --git a/base/serial/psi_e_serial_impl.F90 b/base/serial/psi_e_serial_impl.F90 index 37b11a94..911ab4ec 100644 --- a/base/serial/psi_e_serial_impl.F90 +++ b/base/serial/psi_e_serial_impl.F90 @@ -1568,7 +1568,7 @@ subroutine eaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) end subroutine eaxpbyv2 -subroutine psi_e_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) +subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) use psb_const_mod use psb_error_mod implicit none @@ -1582,7 +1582,7 @@ subroutine psi_e_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='e_upd_xyz' + name='eabgdxyz' info = psb_success_ if (m.lt.0) then @@ -1791,7 +1791,7 @@ subroutine psi_e_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_serror() return -end subroutine psi_e_upd_xyz +end subroutine psi_eabgdxyz subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info) use psb_const_mod @@ -1808,7 +1808,7 @@ subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='e_xyzw' + name='eabgdxyz' info = psb_success_ if (m.lt.0) then diff --git a/base/serial/psi_i2_serial_impl.F90 b/base/serial/psi_i2_serial_impl.F90 index c20cd60b..fb42dfcd 100644 --- a/base/serial/psi_i2_serial_impl.F90 +++ b/base/serial/psi_i2_serial_impl.F90 @@ -1568,7 +1568,7 @@ subroutine i2axpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) end subroutine i2axpbyv2 -subroutine psi_i2_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) +subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) use psb_const_mod use psb_error_mod implicit none @@ -1582,7 +1582,7 @@ subroutine psi_i2_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='i2_upd_xyz' + name='i2abgdxyz' info = psb_success_ if (m.lt.0) then @@ -1791,7 +1791,7 @@ subroutine psi_i2_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_serror() return -end subroutine psi_i2_upd_xyz +end subroutine psi_i2abgdxyz subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info) use psb_const_mod @@ -1808,7 +1808,7 @@ subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='i2_xyzw' + name='i2abgdxyz' info = psb_success_ if (m.lt.0) then diff --git a/base/serial/psi_m_serial_impl.F90 b/base/serial/psi_m_serial_impl.F90 index 55913a16..346fd897 100644 --- a/base/serial/psi_m_serial_impl.F90 +++ b/base/serial/psi_m_serial_impl.F90 @@ -1568,7 +1568,7 @@ subroutine maxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) end subroutine maxpbyv2 -subroutine psi_m_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) +subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) use psb_const_mod use psb_error_mod implicit none @@ -1582,7 +1582,7 @@ subroutine psi_m_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='m_upd_xyz' + name='mabgdxyz' info = psb_success_ if (m.lt.0) then @@ -1791,7 +1791,7 @@ subroutine psi_m_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_serror() return -end subroutine psi_m_upd_xyz +end subroutine psi_mabgdxyz subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info) use psb_const_mod @@ -1808,7 +1808,7 @@ subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='m_xyzw' + name='mabgdxyz' info = psb_success_ if (m.lt.0) then diff --git a/base/serial/psi_s_serial_impl.F90 b/base/serial/psi_s_serial_impl.F90 index c3846c8e..52f86bcd 100644 --- a/base/serial/psi_s_serial_impl.F90 +++ b/base/serial/psi_s_serial_impl.F90 @@ -1568,7 +1568,7 @@ subroutine saxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) end subroutine saxpbyv2 -subroutine psi_s_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) +subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) use psb_const_mod use psb_error_mod implicit none @@ -1582,7 +1582,7 @@ subroutine psi_s_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='s_upd_xyz' + name='sabgdxyz' info = psb_success_ if (m.lt.0) then @@ -1791,7 +1791,7 @@ subroutine psi_s_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_serror() return -end subroutine psi_s_upd_xyz +end subroutine psi_sabgdxyz subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info) use psb_const_mod @@ -1808,7 +1808,7 @@ subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='s_xyzw' + name='sabgdxyz' info = psb_success_ if (m.lt.0) then diff --git a/base/serial/psi_z_serial_impl.F90 b/base/serial/psi_z_serial_impl.F90 index 763eae22..7e680273 100644 --- a/base/serial/psi_z_serial_impl.F90 +++ b/base/serial/psi_z_serial_impl.F90 @@ -1568,7 +1568,7 @@ subroutine zaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) end subroutine zaxpbyv2 -subroutine psi_z_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) +subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) use psb_const_mod use psb_error_mod implicit none @@ -1582,7 +1582,7 @@ subroutine psi_z_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='z_upd_xyz' + name='zabgdxyz' info = psb_success_ if (m.lt.0) then @@ -1791,7 +1791,7 @@ subroutine psi_z_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_serror() return -end subroutine psi_z_upd_xyz +end subroutine psi_zabgdxyz subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info) use psb_const_mod @@ -1808,7 +1808,7 @@ subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='z_xyzw' + name='zabgdxyz' info = psb_success_ if (m.lt.0) then diff --git a/cuda/psb_c_cuda_vect_mod.F90 b/cuda/psb_c_cuda_vect_mod.F90 index 45fafe0a..2c2a4f61 100644 --- a/cuda/psb_c_cuda_vect_mod.F90 +++ b/cuda/psb_c_cuda_vect_mod.F90 @@ -90,7 +90,7 @@ module psb_c_cuda_vect_mod procedure, pass(x) :: dot_a => c_cuda_dot_a procedure, pass(y) :: axpby_v => c_cuda_axpby_v procedure, pass(y) :: axpby_a => c_cuda_axpby_a - procedure, pass(z) :: upd_xyz => c_cuda_upd_xyz + procedure, pass(z) :: abgdxyz => c_cuda_abgdxyz procedure, pass(y) :: mlt_v => c_cuda_mlt_v procedure, pass(y) :: mlt_a => c_cuda_mlt_a procedure, pass(z) :: mlt_a_2 => c_cuda_mlt_a_2 @@ -912,7 +912,7 @@ contains end subroutine c_cuda_axpby_v - subroutine c_cuda_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + subroutine c_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -946,7 +946,7 @@ contains if ((nx d_cuda_dot_a procedure, pass(y) :: axpby_v => d_cuda_axpby_v procedure, pass(y) :: axpby_a => d_cuda_axpby_a - procedure, pass(z) :: upd_xyz => d_cuda_upd_xyz + procedure, pass(z) :: abgdxyz => d_cuda_abgdxyz procedure, pass(y) :: mlt_v => d_cuda_mlt_v procedure, pass(y) :: mlt_a => d_cuda_mlt_a procedure, pass(z) :: mlt_a_2 => d_cuda_mlt_a_2 @@ -912,7 +912,7 @@ contains end subroutine d_cuda_axpby_v - subroutine d_cuda_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + subroutine d_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -946,7 +946,7 @@ contains if ((nx s_cuda_dot_a procedure, pass(y) :: axpby_v => s_cuda_axpby_v procedure, pass(y) :: axpby_a => s_cuda_axpby_a - procedure, pass(z) :: upd_xyz => s_cuda_upd_xyz + procedure, pass(z) :: abgdxyz => s_cuda_abgdxyz procedure, pass(y) :: mlt_v => s_cuda_mlt_v procedure, pass(y) :: mlt_a => s_cuda_mlt_a procedure, pass(z) :: mlt_a_2 => s_cuda_mlt_a_2 @@ -912,7 +912,7 @@ contains end subroutine s_cuda_axpby_v - subroutine s_cuda_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + subroutine s_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -946,7 +946,7 @@ contains if ((nx z_cuda_dot_a procedure, pass(y) :: axpby_v => z_cuda_axpby_v procedure, pass(y) :: axpby_a => z_cuda_axpby_a - procedure, pass(z) :: upd_xyz => z_cuda_upd_xyz + procedure, pass(z) :: abgdxyz => z_cuda_abgdxyz procedure, pass(y) :: mlt_v => z_cuda_mlt_v procedure, pass(y) :: mlt_a => z_cuda_mlt_a procedure, pass(z) :: mlt_a_2 => z_cuda_mlt_a_2 @@ -912,7 +912,7 @@ contains end subroutine z_cuda_axpby_v - subroutine z_cuda_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) + subroutine z_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -946,7 +946,7 @@ contains if ((nx x) + type is (psb_c_vect_oacc) + select type(yy => y) + type is (psb_c_vect_oacc) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + nzt = a%nzt + !$acc parallel loop present(a, xx, yy) + do i = 1, size(a%val, 1) + do j = 1, nzt + yy%v(i) = alpha * a%val(i, j) * xx%v(a%ja(i, j)) + beta * yy%v(i) + end do + end do + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_c_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_c_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + endif + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info, name, a_err = 'ell_vect_sv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + end subroutine psb_c_oacc_ell_inner_vect_sv +end submodule psb_c_oacc_ell_inner_vect_sv_impl diff --git a/openacc/impl/psb_c_oacc_ell_mold.F90 b/openacc/impl/psb_c_oacc_ell_mold.F90 new file mode 100644 index 00000000..88331d1d --- /dev/null +++ b/openacc/impl/psb_c_oacc_ell_mold.F90 @@ -0,0 +1,34 @@ +submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_mold_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_ell_mold(a, b, info) + implicit none + class(psb_c_oacc_ell_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'ell_mold' + logical, parameter :: debug = .false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b, stat=info) + end if + if (info == 0) allocate(psb_c_oacc_ell_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + + end subroutine psb_c_oacc_ell_mold +end submodule psb_c_oacc_ell_mold_impl diff --git a/openacc/impl/psb_c_oacc_ell_mv_from_coo.F90 b/openacc/impl/psb_c_oacc_ell_mv_from_coo.F90 new file mode 100644 index 00000000..7e703aa2 --- /dev/null +++ b/openacc/impl/psb_c_oacc_ell_mv_from_coo.F90 @@ -0,0 +1,25 @@ +submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_mv_from_coo_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_ell_mv_from_coo(a, b, info) + implicit none + + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_c_ell_sparse_mat%mv_from_coo(b, info) + if (info /= 0) goto 9999 + + !$acc update device(a%val, a%ja, a%irn, a%idiag) + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_c_oacc_ell_mv_from_coo +end submodule psb_c_oacc_ell_mv_from_coo_impl diff --git a/openacc/impl/psb_c_oacc_ell_mv_from_fmt.F90 b/openacc/impl/psb_c_oacc_ell_mv_from_fmt.F90 new file mode 100644 index 00000000..7d1f790d --- /dev/null +++ b/openacc/impl/psb_c_oacc_ell_mv_from_fmt.F90 @@ -0,0 +1,24 @@ +submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_mv_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_ell_mv_from_fmt(a, b, info) + implicit none + + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_c_coo_sparse_mat) + call a%mv_from_coo(b, info) + class default + call a%psb_c_ell_sparse_mat%mv_from_fmt(b, info) + if (info /= 0) return + + !$acc update device(a%val, a%ja, a%irn, a%idiag) + end select + + end subroutine psb_c_oacc_ell_mv_from_fmt +end submodule psb_c_oacc_ell_mv_from_fmt_impl diff --git a/openacc/impl/psb_c_oacc_ell_reallocate_nz.F90 b/openacc/impl/psb_c_oacc_ell_reallocate_nz.F90 new file mode 100644 index 00000000..9f21c5df --- /dev/null +++ b/openacc/impl/psb_c_oacc_ell_reallocate_nz.F90 @@ -0,0 +1,28 @@ +submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_reallocate_nz_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_ell_reallocate_nz(nz, a) + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_oacc_ell_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_c_ell_sparse_mat%reallocate(nz) + + call a%set_dev() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_c_oacc_ell_reallocate_nz +end submodule psb_c_oacc_ell_reallocate_nz_impl diff --git a/openacc/impl/psb_c_oacc_ell_scal.F90 b/openacc/impl/psb_c_oacc_ell_scal.F90 new file mode 100644 index 00000000..b3ea90fb --- /dev/null +++ b/openacc/impl/psb_c_oacc_ell_scal.F90 @@ -0,0 +1,58 @@ +submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_scal_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_ell_scal(d, a, info, side) + implicit none + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i, j, m, nzt + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + m = a%get_nrows() + nzt = a%nzt + + if (present(side)) then + if (side == 'L') then + !$acc parallel loop collapse(2) present(a, d) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d(i) + end do + end do + else if (side == 'R') then + !$acc parallel loop collapse(2) present(a, d) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d(a%ja(i, j)) + end do + end do + end if + else + !$acc parallel loop collapse(2) present(a, d) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d(j) + end do + end do + end if + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_c_oacc_ell_scal +end submodule psb_c_oacc_ell_scal_impl diff --git a/openacc/impl/psb_c_oacc_ell_scals.F90 b/openacc/impl/psb_c_oacc_ell_scals.F90 new file mode 100644 index 00000000..f067f253 --- /dev/null +++ b/openacc/impl/psb_c_oacc_ell_scals.F90 @@ -0,0 +1,39 @@ +submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_scals_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_ell_scals(d, a, info) + implicit none + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i, j, nzt, m + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + m = a%get_nrows() + nzt = a%nzt + + !$acc parallel loop collapse(2) present(a) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d + end do + end do + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_c_oacc_ell_scals +end submodule psb_c_oacc_ell_scals_impl diff --git a/openacc/impl/psb_c_oacc_ell_vect_mv.F90 b/openacc/impl/psb_c_oacc_ell_vect_mv.F90 new file mode 100644 index 00000000..e65d00ba --- /dev/null +++ b/openacc/impl/psb_c_oacc_ell_vect_mv.F90 @@ -0,0 +1,66 @@ +submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_vect_mv_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans) + implicit none + + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_oacc_ell_sparse_mat), intent(in) :: a + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: m, n, nzt + + info = psb_success_ + m = a%get_nrows() + n = a%get_ncols() + nzt = a%nzt + + if ((n /= size(x%v)) .or. (m /= size(y%v))) then + write(0,*) 'Size error ', m, n, size(x%v), size(y%v) + info = psb_err_invalid_mat_state_ + return + end if + + if (a%is_host()) call a%sync() + if (x%is_host()) call x%sync() + if (y%is_host()) call y%sync() + + call inner_spmv(m, n, nzt, alpha, a%val, a%ja, x%v, beta, y%v, info) + call y%set_dev() + + contains + + subroutine inner_spmv(m, n, nzt, alpha, val, ja, x, beta, y, info) + implicit none + integer(psb_ipk_) :: m, n, nzt + complex(psb_spk_), intent(in) :: alpha, beta + complex(psb_spk_) :: val(:,:), x(:), y(:) + integer(psb_ipk_) :: ja(:,:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, j, ii, isz + complex(psb_spk_) :: tmp + integer(psb_ipk_), parameter :: vsz = 256 + + info = 0 + + !$acc parallel loop vector_length(vsz) private(isz) + do ii = 1, m, vsz + isz = min(vsz, m - ii + 1) + !$acc loop independent private(tmp) + do i = ii, ii + isz - 1 + tmp = 0.0_psb_dpk_ + !$acc loop seq + do j = 1, nzt + if (ja(i,j) > 0) then + tmp = tmp + val(i,j) * x(ja(i,j)) + end if + end do + y(i) = alpha * tmp + beta * y(i) + end do + end do + end subroutine inner_spmv + + end subroutine psb_c_oacc_ell_vect_mv +end submodule psb_c_oacc_ell_vect_mv_impl diff --git a/openacc/impl/psb_c_oacc_hll_allocate_mnnz.F90 b/openacc/impl/psb_c_oacc_hll_allocate_mnnz.F90 new file mode 100644 index 00000000..0840d0d6 --- /dev/null +++ b/openacc/impl/psb_c_oacc_hll_allocate_mnnz.F90 @@ -0,0 +1,53 @@ +submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_allocate_mnnz_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_hll_allocate_mnnz(m, n, a, nz) + implicit none + integer(psb_ipk_), intent(in) :: m, n + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act, nz_ + character(len=20) :: name='allocate_mnnz' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: hksz, nhacks + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(nz)) then + nz_ = nz + else + nz_ = 10 + end if + + call a%psb_c_hll_sparse_mat%allocate(m, n, nz_) + + hksz = a%hksz + nhacks = (m + hksz - 1) / hksz + + if (.not.allocated(a%val)) then + allocate(a%val(nz_ * m)) + allocate(a%ja(nz_ * m)) + allocate(a%irn(m)) + allocate(a%idiag(m)) + allocate(a%hkoffs(nhacks)) + end if + + a%val = czero + a%ja = -1 + a%irn = 0 + a%idiag = 0 + a%hkoffs = 0 + + call a%set_dev() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_c_oacc_hll_allocate_mnnz +end submodule psb_c_oacc_hll_allocate_mnnz_impl diff --git a/openacc/impl/psb_c_oacc_hll_cp_from_coo.F90 b/openacc/impl/psb_c_oacc_hll_cp_from_coo.F90 new file mode 100644 index 00000000..4c12cdf8 --- /dev/null +++ b/openacc/impl/psb_c_oacc_hll_cp_from_coo.F90 @@ -0,0 +1,85 @@ +submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_cp_from_coo_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_hll_cp_from_coo(a, b, info) + implicit none + + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, j, k, row, col, nz_per_row + complex(psb_spk_) :: value + integer(psb_ipk_), allocatable :: row_counts(:) + integer(psb_ipk_) :: hacksize, nza + + info = psb_success_ + hacksize = 32 ! Assuming a default hack size of 32 + + call a%set_nrows(b%get_nrows()) + call a%set_ncols(b%get_ncols()) + nz_per_row = a%nzt + + if (.not.allocated(a%val)) then + allocate(a%val(nz_per_row * a%get_nrows())) + allocate(a%ja(nz_per_row * a%get_nrows())) + allocate(a%irn(a%get_nrows())) + allocate(a%idiag(a%get_nrows())) + allocate(a%hkoffs((a%get_nrows() + hacksize - 1) / hacksize)) + end if + a%val = czero + a%ja = -1 + a%irn = 0 + a%idiag = 0 + + allocate(row_counts(a%get_nrows())) + row_counts = 0 + + nza = b%get_nzeros() + + !$acc parallel loop present(b, a, row_counts) + do k = 1, nza + row = b%ia(k) + col = b%ja(k) + value = b%val(k) + if (row_counts(row) < nz_per_row) then + a%val(row_counts(row) + 1 + (row - 1) * nz_per_row) = value + a%ja(row_counts(row) + 1 + (row - 1) * nz_per_row) = col + row_counts(row) = row_counts(row) + 1 + else + info = psb_err_invalid_mat_state_ + !goto 9999 + end if + end do + + a%irn = row_counts + + !$acc parallel loop present(a) + do i = 1, a%get_nrows() + do j = 1, nz_per_row + if (a%ja(j + (i - 1) * nz_per_row) == i) then + a%idiag(i) = j + exit + end if + end do + end do + + ! Calculate hkoffs for HLL format + !$acc parallel loop present(a) + do i = 1, size(a%hkoffs) + a%hkoffs(i) = (i - 1) * hacksize + end do + + deallocate(row_counts) + + call a%set_dev() + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_c_oacc_hll_cp_from_coo +end submodule psb_c_oacc_hll_cp_from_coo_impl diff --git a/openacc/impl/psb_c_oacc_hll_cp_from_fmt.F90 b/openacc/impl/psb_c_oacc_hll_cp_from_fmt.F90 new file mode 100644 index 00000000..af6cc1d5 --- /dev/null +++ b/openacc/impl/psb_c_oacc_hll_cp_from_fmt.F90 @@ -0,0 +1,24 @@ +submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_cp_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_hll_cp_from_fmt(a, b, info) + implicit none + + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_c_coo_sparse_mat) + call a%cp_from_coo(b, info) + class default + call a%psb_c_hll_sparse_mat%cp_from_fmt(b, info) + if (info /= 0) return + + !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + end select + + end subroutine psb_c_oacc_hll_cp_from_fmt +end submodule psb_c_oacc_hll_cp_from_fmt_impl diff --git a/openacc/impl/psb_c_oacc_hll_csmm.F90 b/openacc/impl/psb_c_oacc_hll_csmm.F90 new file mode 100644 index 00000000..6b0fc637 --- /dev/null +++ b/openacc/impl/psb_c_oacc_hll_csmm.F90 @@ -0,0 +1,86 @@ +submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_csmm_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_hll_csmm(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + complex(psb_spk_), intent(in) :: x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i, j, m, n, k, nxy, nhacks + logical :: tra + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'c_oacc_hll_csmm' + logical, parameter :: debug = .false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) < n) then + info = 36 + call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) + goto 9999 + end if + + if (size(y,1) < m) then + info = 36 + call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) + goto 9999 + end if + + if (tra) then + call a%psb_c_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans) + else + nxy = min(size(x,2), size(y,2)) + nhacks = (a%get_nrows() + a%hksz - 1) / a%hksz + + !$acc parallel loop collapse(2) present(a, x, y) + do j = 1, nxy + do i = 1, m + y(i,j) = beta * y(i,j) + end do + end do + + !$acc parallel loop present(a, x, y) + do j = 1, nxy + do k = 1, nhacks + do i = a%hkoffs(k), a%hkoffs(k + 1) - 1 + y(a%irn(i), j) = y(a%irn(i), j) + alpha * a%val(i) * x(a%ja(i), j) + end do + end do + end do + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_c_oacc_hll_csmm +end submodule psb_c_oacc_hll_csmm_impl diff --git a/openacc/impl/psb_c_oacc_hll_csmv.F90 b/openacc/impl/psb_c_oacc_hll_csmv.F90 new file mode 100644 index 00000000..f32e37b7 --- /dev/null +++ b/openacc/impl/psb_c_oacc_hll_csmv.F90 @@ -0,0 +1,84 @@ +submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_csmv_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_hll_csmv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + complex(psb_spk_), intent(in) :: x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i, j, m, n, hksz, nhacks + logical :: tra + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'c_oacc_hll_csmv' + logical, parameter :: debug = .false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) < n) then + info = 36 + call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) + goto 9999 + end if + + if (size(y,1) < m) then + info = 36 + call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) + goto 9999 + end if + + if (tra) then + call a%psb_c_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans) + else + hksz = a%hksz + nhacks = (a%get_nrows() + hksz - 1) / hksz + + !$acc parallel loop present(a, x, y) + do i = 1, m + y(i) = beta * y(i) + end do + ! This loop nest cannot be run with collapse, since + ! the inner loop extent varies. + !$acc parallel loop present(a, x, y) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + y(a%irn(j)) = y(a%irn(j)) + alpha * a%val(j) * x(a%ja(j)) + end do + end do + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_c_oacc_hll_csmv +end submodule psb_c_oacc_hll_csmv_impl diff --git a/openacc/impl/psb_c_oacc_hll_inner_vect_sv.F90 b/openacc/impl/psb_c_oacc_hll_inner_vect_sv.F90 new file mode 100644 index 00000000..a8d486b2 --- /dev/null +++ b/openacc/impl/psb_c_oacc_hll_inner_vect_sv.F90 @@ -0,0 +1,86 @@ +submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_inner_vect_sv_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + complex(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'c_oacc_hll_inner_vect_sv' + logical, parameter :: debug = .false. + integer(psb_ipk_) :: i, j, nhacks, hksz + + call psb_get_erraction(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra .or. (beta /= dzero)) then + call x%sync() + call y%sync() + call a%psb_c_hll_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans) + call y%set_host() + else + select type (xx => x) + type is (psb_c_vect_oacc) + select type(yy => y) + type is (psb_c_vect_oacc) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + nhacks = size(a%hkoffs) - 1 + hksz = a%hksz + !$acc parallel loop present(a, xx, yy) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i+1) - 1 + yy%v(a%irn(j)) = alpha * a%val(j) * xx%v(a%ja(j)) + beta * yy%v(a%irn(j)) + end do + end do + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_c_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_c_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + endif + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info, name, a_err = 'hll_vect_sv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + end subroutine psb_c_oacc_hll_inner_vect_sv +end submodule psb_c_oacc_hll_inner_vect_sv_impl diff --git a/openacc/impl/psb_c_oacc_hll_mold.F90 b/openacc/impl/psb_c_oacc_hll_mold.F90 new file mode 100644 index 00000000..f480f3ab --- /dev/null +++ b/openacc/impl/psb_c_oacc_hll_mold.F90 @@ -0,0 +1,34 @@ +submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_mold_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_hll_mold(a, b, info) + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'hll_mold' + logical, parameter :: debug = .false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b, stat=info) + end if + if (info == 0) allocate(psb_c_oacc_hll_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + + end subroutine psb_c_oacc_hll_mold +end submodule psb_c_oacc_hll_mold_impl diff --git a/openacc/impl/psb_c_oacc_hll_mv_from_coo.F90 b/openacc/impl/psb_c_oacc_hll_mv_from_coo.F90 new file mode 100644 index 00000000..dec52d40 --- /dev/null +++ b/openacc/impl/psb_c_oacc_hll_mv_from_coo.F90 @@ -0,0 +1,25 @@ +submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_mv_from_coo_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_hll_mv_from_coo(a, b, info) + implicit none + + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_c_hll_sparse_mat%mv_from_coo(b, info) + if (info /= 0) goto 9999 + + !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_c_oacc_hll_mv_from_coo +end submodule psb_c_oacc_hll_mv_from_coo_impl diff --git a/openacc/impl/psb_c_oacc_hll_mv_from_fmt.F90 b/openacc/impl/psb_c_oacc_hll_mv_from_fmt.F90 new file mode 100644 index 00000000..f2a064cb --- /dev/null +++ b/openacc/impl/psb_c_oacc_hll_mv_from_fmt.F90 @@ -0,0 +1,24 @@ +submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_mv_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_hll_mv_from_fmt(a, b, info) + implicit none + + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_c_coo_sparse_mat) + call a%mv_from_coo(b, info) + class default + call a%psb_c_hll_sparse_mat%mv_from_fmt(b, info) + if (info /= 0) return + + !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + end select + + end subroutine psb_c_oacc_hll_mv_from_fmt +end submodule psb_c_oacc_hll_mv_from_fmt_impl diff --git a/openacc/impl/psb_c_oacc_hll_reallocate_nz.F90 b/openacc/impl/psb_c_oacc_hll_reallocate_nz.F90 new file mode 100644 index 00000000..52983d4e --- /dev/null +++ b/openacc/impl/psb_c_oacc_hll_reallocate_nz.F90 @@ -0,0 +1,29 @@ +submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_reallocate_nz_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_hll_reallocate_nz(nz, a) + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='c_oacc_hll_reallocate_nz' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: hksz, nhacks + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_c_hll_sparse_mat%reallocate(nz) + + call a%set_dev() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_c_oacc_hll_reallocate_nz +end submodule psb_c_oacc_hll_reallocate_nz_impl diff --git a/openacc/impl/psb_c_oacc_hll_scal.F90 b/openacc/impl/psb_c_oacc_hll_scal.F90 new file mode 100644 index 00000000..527a0ec1 --- /dev/null +++ b/openacc/impl/psb_c_oacc_hll_scal.F90 @@ -0,0 +1,62 @@ +submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_scal_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_hll_scal(d, a, info, side) + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'scal' + integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + hksz = a%hksz + nhacks = (a%get_nrows() + hksz - 1) / hksz + nzt = a%nzt + + if (present(side)) then + if (side == 'L') then + ! $ a parallel loop collapse(2) present(a, d) + !$acc parallel loop present(a, d) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + k = (j - a%hkoffs(i)) / nzt + (i - 1) * hksz + 1 + a%val(j) = a%val(j) * d(k) + end do + end do + else if (side == 'R') then + ! $ a parallel loop collapse(2) present(a, d) + !$acc parallel loop present(a, d) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + a%val(j) = a%val(j) * d(a%ja(j)) + end do + end do + end if + else + ! $ a parallel loop collapse(2) present(a, d) + !$acc parallel loop present(a, d) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + a%val(j) = a%val(j) * d(j - a%hkoffs(i) + 1) + end do + end do + end if + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_c_oacc_hll_scal +end submodule psb_c_oacc_hll_scal_impl diff --git a/openacc/impl/psb_c_oacc_hll_scals.F90 b/openacc/impl/psb_c_oacc_hll_scals.F90 new file mode 100644 index 00000000..00f24721 --- /dev/null +++ b/openacc/impl/psb_c_oacc_hll_scals.F90 @@ -0,0 +1,40 @@ +submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_scals_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_hll_scals(d, a, info) + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'scal' + integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + hksz = a%hksz + nhacks = (a%get_nrows() + hksz - 1) / hksz + nzt = a%nzt + + ! $ a parallel loop collapse(2) present(a) + !$acc parallel loop present(a) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + a%val(j) = a%val(j) * d + end do + end do + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_c_oacc_hll_scals +end submodule psb_c_oacc_hll_scals_impl diff --git a/openacc/impl/psb_c_oacc_hll_vect_mv.F90 b/openacc/impl/psb_c_oacc_hll_vect_mv.F90 new file mode 100644 index 00000000..3b74d11a --- /dev/null +++ b/openacc/impl/psb_c_oacc_hll_vect_mv.F90 @@ -0,0 +1,67 @@ +submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_vect_mv_impl + use psb_base_mod +contains + module subroutine psb_c_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans) + implicit none + + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_oacc_hll_sparse_mat), intent(in) :: a + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: m, n, nhacks, hksz + + info = psb_success_ + m = a%get_nrows() + n = a%get_ncols() + nhacks = size(a%hkoffs) - 1 + hksz = a%hksz + + if ((n /= size(x%v)) .or. (m /= size(y%v))) then + write(0,*) 'Size error ', m, n, size(x%v), size(y%v) + info = psb_err_invalid_mat_state_ + return + end if + + if (a%is_host()) call a%sync() + if (x%is_host()) call x%sync() + if (y%is_host()) call y%sync() + + call inner_spmv(m, nhacks, hksz, alpha, a%val, a%ja, a%hkoffs, x%v, beta, y%v, info) + call y%set_dev() + + contains + + subroutine inner_spmv(m, nhacks, hksz, alpha, val, ja, hkoffs, x, beta, y, info) + implicit none + integer(psb_ipk_) :: m, nhacks, hksz + complex(psb_spk_), intent(in) :: alpha, beta + complex(psb_spk_) :: val(:), x(:), y(:) + integer(psb_ipk_) :: ja(:), hkoffs(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, j, idx, k + complex(psb_spk_) :: tmp + + info = 0 + + !$acc parallel loop present(val, ja, hkoffs, x, y) + do i = 1, nhacks + do k = 0, hksz - 1 + idx = hkoffs(i) + k + if (idx <= hkoffs(i + 1) - 1) then + tmp = 0.0_psb_dpk_ + !$acc loop seq + do j = hkoffs(i) + k, hkoffs(i + 1) - 1, hksz + if (ja(j) > 0) then + tmp = tmp + val(j) * x(ja(j)) + end if + end do + y(k + 1 + (i - 1) * hksz) = alpha * tmp + beta * y(k + 1 + (i - 1) * hksz) + end if + end do + end do + end subroutine inner_spmv + + end subroutine psb_c_oacc_hll_vect_mv +end submodule psb_c_oacc_hll_vect_mv_impl diff --git a/openacc/impl/psb_d_oacc_ell_allocate_mnnz.F90 b/openacc/impl/psb_d_oacc_ell_allocate_mnnz.F90 index 4923e12c..b46c5454 100644 --- a/openacc/impl/psb_d_oacc_ell_allocate_mnnz.F90 +++ b/openacc/impl/psb_d_oacc_ell_allocate_mnnz.F90 @@ -29,7 +29,7 @@ contains allocate(a%idiag(m)) end if - a%val = 0.0_psb_dpk_ + a%val = dzero a%ja = -1 a%irn = 0 a%idiag = 0 diff --git a/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 b/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 index 4e8402e7..c13d1edd 100644 --- a/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 +++ b/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 @@ -26,7 +26,7 @@ contains allocate(a%irn(a%get_nrows())) allocate(a%idiag(a%get_nrows())) end if - a%val = 0.0_psb_dpk_ + a%val = dzero a%ja = -1 a%irn = 0 a%idiag = 0 diff --git a/openacc/impl/psb_d_oacc_hll_allocate_mnnz.F90 b/openacc/impl/psb_d_oacc_hll_allocate_mnnz.F90 index 909ee90b..47a6933b 100644 --- a/openacc/impl/psb_d_oacc_hll_allocate_mnnz.F90 +++ b/openacc/impl/psb_d_oacc_hll_allocate_mnnz.F90 @@ -34,7 +34,7 @@ contains allocate(a%hkoffs(nhacks)) end if - a%val = 0.0_psb_dpk_ + a%val = dzero a%ja = -1 a%irn = 0 a%idiag = 0 diff --git a/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 b/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 index 4a258c74..18bd768b 100644 --- a/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 +++ b/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 @@ -27,7 +27,7 @@ contains allocate(a%idiag(a%get_nrows())) allocate(a%hkoffs((a%get_nrows() + hacksize - 1) / hacksize)) end if - a%val = 0.0_psb_dpk_ + a%val = dzero a%ja = -1 a%irn = 0 a%idiag = 0 diff --git a/openacc/impl/psb_s_oacc_ell_allocate_mnnz.F90 b/openacc/impl/psb_s_oacc_ell_allocate_mnnz.F90 new file mode 100644 index 00000000..38c19b78 --- /dev/null +++ b/openacc/impl/psb_s_oacc_ell_allocate_mnnz.F90 @@ -0,0 +1,47 @@ +submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_allocate_mnnz_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_ell_allocate_mnnz(m, n, a, nz) + implicit none + integer(psb_ipk_), intent(in) :: m, n + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act, nz_ + character(len=20) :: name='allocate_mnnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(nz)) then + nz_ = nz + else + nz_ = 10 + end if + + call a%psb_s_ell_sparse_mat%allocate(m, n, nz_) + + if (.not.allocated(a%val)) then + allocate(a%val(m, nz_)) + allocate(a%ja(m, nz_)) + allocate(a%irn(m)) + allocate(a%idiag(m)) + end if + + a%val = szero + a%ja = -1 + a%irn = 0 + a%idiag = 0 + + call a%set_dev() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_s_oacc_ell_allocate_mnnz +end submodule psb_s_oacc_ell_allocate_mnnz_impl diff --git a/openacc/impl/psb_s_oacc_ell_cp_from_coo.F90 b/openacc/impl/psb_s_oacc_ell_cp_from_coo.F90 new file mode 100644 index 00000000..9aaaff73 --- /dev/null +++ b/openacc/impl/psb_s_oacc_ell_cp_from_coo.F90 @@ -0,0 +1,78 @@ +submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_cp_from_coo_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_ell_cp_from_coo(a, b, info) + implicit none + + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, j, k, row, col, nz_per_row + real(psb_spk_) :: value + integer(psb_ipk_), allocatable :: row_counts(:) + integer(psb_ipk_) :: hacksize, nza + + info = psb_success_ + hacksize = 1 + + call a%set_nrows(b%get_nrows()) + call a%set_ncols(b%get_ncols()) + nz_per_row = a%nzt + + if (.not.allocated(a%val)) then + allocate(a%val(a%get_nrows(), nz_per_row)) + allocate(a%ja(a%get_nrows(), nz_per_row)) + allocate(a%irn(a%get_nrows())) + allocate(a%idiag(a%get_nrows())) + end if + a%val = szero + a%ja = -1 + a%irn = 0 + a%idiag = 0 + + allocate(row_counts(a%get_nrows())) + row_counts = 0 + + nza = b%get_nzeros() + + !$acc parallel loop present(b, a, row_counts) + do k = 1, nza + row = b%ia(k) + col = b%ja(k) + value = b%val(k) + if (row_counts(row) < nz_per_row) then + a%val(row, row_counts(row) + 1) = value + a%ja(row, row_counts(row) + 1) = col + row_counts(row) = row_counts(row) + 1 + else + info = psb_err_invalid_mat_state_ + !goto 9999 + end if + end do + + a%irn = row_counts + + !$acc parallel loop present(a) + do i = 1, a%get_nrows() + do j = 1, nz_per_row + if (a%ja(i, j) == i) then + a%idiag(i) = j + exit + end if + end do + end do + + deallocate(row_counts) + + call a%set_dev() + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_s_oacc_ell_cp_from_coo +end submodule psb_s_oacc_ell_cp_from_coo_impl diff --git a/openacc/impl/psb_s_oacc_ell_cp_from_fmt.F90 b/openacc/impl/psb_s_oacc_ell_cp_from_fmt.F90 new file mode 100644 index 00000000..d4c1a233 --- /dev/null +++ b/openacc/impl/psb_s_oacc_ell_cp_from_fmt.F90 @@ -0,0 +1,24 @@ +submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_cp_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_ell_cp_from_fmt(a, b, info) + implicit none + + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_s_coo_sparse_mat) + call a%cp_from_coo(b, info) + class default + call a%psb_s_ell_sparse_mat%cp_from_fmt(b, info) + if (info /= 0) return + + !$acc update device(a%val, a%ja, a%irn, a%idiag) + end select + + end subroutine psb_s_oacc_ell_cp_from_fmt +end submodule psb_s_oacc_ell_cp_from_fmt_impl diff --git a/openacc/impl/psb_s_oacc_ell_csmm.F90 b/openacc/impl/psb_s_oacc_ell_csmm.F90 new file mode 100644 index 00000000..63219384 --- /dev/null +++ b/openacc/impl/psb_s_oacc_ell_csmm.F90 @@ -0,0 +1,86 @@ +submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_csmm_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_ell_csmm(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + real(psb_spk_), intent(in) :: x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i, j, m, n, k, nxy, nzt + logical :: tra + integer(psb_ipk_) :: err_act + character(len=20) :: name = 's_oacc_ell_csmm' + logical, parameter :: debug = .false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) < n) then + info = 36 + call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) + goto 9999 + end if + + if (size(y,1) < m) then + info = 36 + call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) + goto 9999 + end if + + if (tra) then + call a%psb_s_ell_sparse_mat%spmm(alpha, x, beta, y, info, trans) + else + nxy = min(size(x,2), size(y,2)) + nzt = a%nzt + + !$acc parallel loop collapse(2) present(a, x, y) + do j = 1, nxy + do i = 1, m + y(i,j) = beta * y(i,j) + end do + end do + + !$acc parallel loop collapse(2) present(a, x, y) + do j = 1, nxy + do i = 1, n + do k = 1, nzt + y(i, j) = y(i, j) + alpha * a%val(i, k) * x(a%ja(i, k), j) + end do + end do + end do + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_s_oacc_ell_csmm +end submodule psb_s_oacc_ell_csmm_impl diff --git a/openacc/impl/psb_s_oacc_ell_csmv.F90 b/openacc/impl/psb_s_oacc_ell_csmv.F90 new file mode 100644 index 00000000..d4aaa9d4 --- /dev/null +++ b/openacc/impl/psb_s_oacc_ell_csmv.F90 @@ -0,0 +1,82 @@ +submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_csmv_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_ell_csmv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + real(psb_spk_), intent(in) :: x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i, j, m, n, nzt + logical :: tra + integer(psb_ipk_) :: err_act + character(len=20) :: name = 's_oacc_ell_csmv' + logical, parameter :: debug = .false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) < n) then + info = 36 + call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) + goto 9999 + end if + + if (size(y,1) < m) then + info = 36 + call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) + goto 9999 + end if + + if (tra) then + call a%psb_s_ell_sparse_mat%spmm(alpha, x, beta, y, info, trans) + else + nzt = a%nzt + + !$acc parallel loop present(a, x, y) + do i = 1, m + y(i) = beta * y(i) + end do + + !$acc parallel loop present(a, x, y) + do i = 1, m + do j = 1, nzt + y(i) = y(i) + alpha * a%val(i, j) * x(a%ja(i, j)) + end do + end do + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_s_oacc_ell_csmv +end submodule psb_s_oacc_ell_csmv_impl diff --git a/openacc/impl/psb_s_oacc_ell_inner_vect_sv.F90 b/openacc/impl/psb_s_oacc_ell_inner_vect_sv.F90 new file mode 100644 index 00000000..ba42af12 --- /dev/null +++ b/openacc/impl/psb_s_oacc_ell_inner_vect_sv.F90 @@ -0,0 +1,85 @@ +submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_inner_vect_sv_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_ell_inner_vect_sv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + real(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name = 's_oacc_ell_inner_vect_sv' + logical, parameter :: debug = .false. + integer(psb_ipk_) :: i, j, nzt + + call psb_get_erraction(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra .or. (beta /= dzero)) then + call x%sync() + call y%sync() + call a%psb_s_ell_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans) + call y%set_host() + else + select type (xx => x) + type is (psb_s_vect_oacc) + select type(yy => y) + type is (psb_s_vect_oacc) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + nzt = a%nzt + !$acc parallel loop present(a, xx, yy) + do i = 1, size(a%val, 1) + do j = 1, nzt + yy%v(i) = alpha * a%val(i, j) * xx%v(a%ja(i, j)) + beta * yy%v(i) + end do + end do + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_s_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_s_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + endif + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info, name, a_err = 'ell_vect_sv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + end subroutine psb_s_oacc_ell_inner_vect_sv +end submodule psb_s_oacc_ell_inner_vect_sv_impl diff --git a/openacc/impl/psb_s_oacc_ell_mold.F90 b/openacc/impl/psb_s_oacc_ell_mold.F90 new file mode 100644 index 00000000..92f18f25 --- /dev/null +++ b/openacc/impl/psb_s_oacc_ell_mold.F90 @@ -0,0 +1,34 @@ +submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_mold_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_ell_mold(a, b, info) + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'ell_mold' + logical, parameter :: debug = .false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b, stat=info) + end if + if (info == 0) allocate(psb_s_oacc_ell_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + + end subroutine psb_s_oacc_ell_mold +end submodule psb_s_oacc_ell_mold_impl diff --git a/openacc/impl/psb_s_oacc_ell_mv_from_coo.F90 b/openacc/impl/psb_s_oacc_ell_mv_from_coo.F90 new file mode 100644 index 00000000..d6bbec13 --- /dev/null +++ b/openacc/impl/psb_s_oacc_ell_mv_from_coo.F90 @@ -0,0 +1,25 @@ +submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_mv_from_coo_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_ell_mv_from_coo(a, b, info) + implicit none + + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_s_ell_sparse_mat%mv_from_coo(b, info) + if (info /= 0) goto 9999 + + !$acc update device(a%val, a%ja, a%irn, a%idiag) + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_s_oacc_ell_mv_from_coo +end submodule psb_s_oacc_ell_mv_from_coo_impl diff --git a/openacc/impl/psb_s_oacc_ell_mv_from_fmt.F90 b/openacc/impl/psb_s_oacc_ell_mv_from_fmt.F90 new file mode 100644 index 00000000..ebb82901 --- /dev/null +++ b/openacc/impl/psb_s_oacc_ell_mv_from_fmt.F90 @@ -0,0 +1,24 @@ +submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_mv_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_ell_mv_from_fmt(a, b, info) + implicit none + + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_s_coo_sparse_mat) + call a%mv_from_coo(b, info) + class default + call a%psb_s_ell_sparse_mat%mv_from_fmt(b, info) + if (info /= 0) return + + !$acc update device(a%val, a%ja, a%irn, a%idiag) + end select + + end subroutine psb_s_oacc_ell_mv_from_fmt +end submodule psb_s_oacc_ell_mv_from_fmt_impl diff --git a/openacc/impl/psb_s_oacc_ell_reallocate_nz.F90 b/openacc/impl/psb_s_oacc_ell_reallocate_nz.F90 new file mode 100644 index 00000000..373c2b67 --- /dev/null +++ b/openacc/impl/psb_s_oacc_ell_reallocate_nz.F90 @@ -0,0 +1,28 @@ +submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_reallocate_nz_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_ell_reallocate_nz(nz, a) + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_oacc_ell_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_s_ell_sparse_mat%reallocate(nz) + + call a%set_dev() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_s_oacc_ell_reallocate_nz +end submodule psb_s_oacc_ell_reallocate_nz_impl diff --git a/openacc/impl/psb_s_oacc_ell_scal.F90 b/openacc/impl/psb_s_oacc_ell_scal.F90 new file mode 100644 index 00000000..180d8f9a --- /dev/null +++ b/openacc/impl/psb_s_oacc_ell_scal.F90 @@ -0,0 +1,58 @@ +submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_scal_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_ell_scal(d, a, info, side) + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i, j, m, nzt + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + m = a%get_nrows() + nzt = a%nzt + + if (present(side)) then + if (side == 'L') then + !$acc parallel loop collapse(2) present(a, d) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d(i) + end do + end do + else if (side == 'R') then + !$acc parallel loop collapse(2) present(a, d) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d(a%ja(i, j)) + end do + end do + end if + else + !$acc parallel loop collapse(2) present(a, d) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d(j) + end do + end do + end if + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_s_oacc_ell_scal +end submodule psb_s_oacc_ell_scal_impl diff --git a/openacc/impl/psb_s_oacc_ell_scals.F90 b/openacc/impl/psb_s_oacc_ell_scals.F90 new file mode 100644 index 00000000..c1c305af --- /dev/null +++ b/openacc/impl/psb_s_oacc_ell_scals.F90 @@ -0,0 +1,39 @@ +submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_scals_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_ell_scals(d, a, info) + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i, j, nzt, m + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + m = a%get_nrows() + nzt = a%nzt + + !$acc parallel loop collapse(2) present(a) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d + end do + end do + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_s_oacc_ell_scals +end submodule psb_s_oacc_ell_scals_impl diff --git a/openacc/impl/psb_s_oacc_ell_vect_mv.F90 b/openacc/impl/psb_s_oacc_ell_vect_mv.F90 new file mode 100644 index 00000000..f48ba041 --- /dev/null +++ b/openacc/impl/psb_s_oacc_ell_vect_mv.F90 @@ -0,0 +1,66 @@ +submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_vect_mv_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans) + implicit none + + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_oacc_ell_sparse_mat), intent(in) :: a + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: m, n, nzt + + info = psb_success_ + m = a%get_nrows() + n = a%get_ncols() + nzt = a%nzt + + if ((n /= size(x%v)) .or. (m /= size(y%v))) then + write(0,*) 'Size error ', m, n, size(x%v), size(y%v) + info = psb_err_invalid_mat_state_ + return + end if + + if (a%is_host()) call a%sync() + if (x%is_host()) call x%sync() + if (y%is_host()) call y%sync() + + call inner_spmv(m, n, nzt, alpha, a%val, a%ja, x%v, beta, y%v, info) + call y%set_dev() + + contains + + subroutine inner_spmv(m, n, nzt, alpha, val, ja, x, beta, y, info) + implicit none + integer(psb_ipk_) :: m, n, nzt + real(psb_spk_), intent(in) :: alpha, beta + real(psb_spk_) :: val(:,:), x(:), y(:) + integer(psb_ipk_) :: ja(:,:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, j, ii, isz + real(psb_spk_) :: tmp + integer(psb_ipk_), parameter :: vsz = 256 + + info = 0 + + !$acc parallel loop vector_length(vsz) private(isz) + do ii = 1, m, vsz + isz = min(vsz, m - ii + 1) + !$acc loop independent private(tmp) + do i = ii, ii + isz - 1 + tmp = 0.0_psb_dpk_ + !$acc loop seq + do j = 1, nzt + if (ja(i,j) > 0) then + tmp = tmp + val(i,j) * x(ja(i,j)) + end if + end do + y(i) = alpha * tmp + beta * y(i) + end do + end do + end subroutine inner_spmv + + end subroutine psb_s_oacc_ell_vect_mv +end submodule psb_s_oacc_ell_vect_mv_impl diff --git a/openacc/impl/psb_s_oacc_hll_allocate_mnnz.F90 b/openacc/impl/psb_s_oacc_hll_allocate_mnnz.F90 new file mode 100644 index 00000000..c67ea621 --- /dev/null +++ b/openacc/impl/psb_s_oacc_hll_allocate_mnnz.F90 @@ -0,0 +1,53 @@ +submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_allocate_mnnz_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_hll_allocate_mnnz(m, n, a, nz) + implicit none + integer(psb_ipk_), intent(in) :: m, n + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act, nz_ + character(len=20) :: name='allocate_mnnz' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: hksz, nhacks + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(nz)) then + nz_ = nz + else + nz_ = 10 + end if + + call a%psb_s_hll_sparse_mat%allocate(m, n, nz_) + + hksz = a%hksz + nhacks = (m + hksz - 1) / hksz + + if (.not.allocated(a%val)) then + allocate(a%val(nz_ * m)) + allocate(a%ja(nz_ * m)) + allocate(a%irn(m)) + allocate(a%idiag(m)) + allocate(a%hkoffs(nhacks)) + end if + + a%val = szero + a%ja = -1 + a%irn = 0 + a%idiag = 0 + a%hkoffs = 0 + + call a%set_dev() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_s_oacc_hll_allocate_mnnz +end submodule psb_s_oacc_hll_allocate_mnnz_impl diff --git a/openacc/impl/psb_s_oacc_hll_cp_from_coo.F90 b/openacc/impl/psb_s_oacc_hll_cp_from_coo.F90 new file mode 100644 index 00000000..34a0b5d5 --- /dev/null +++ b/openacc/impl/psb_s_oacc_hll_cp_from_coo.F90 @@ -0,0 +1,85 @@ +submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_cp_from_coo_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_hll_cp_from_coo(a, b, info) + implicit none + + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, j, k, row, col, nz_per_row + real(psb_spk_) :: value + integer(psb_ipk_), allocatable :: row_counts(:) + integer(psb_ipk_) :: hacksize, nza + + info = psb_success_ + hacksize = 32 ! Assuming a default hack size of 32 + + call a%set_nrows(b%get_nrows()) + call a%set_ncols(b%get_ncols()) + nz_per_row = a%nzt + + if (.not.allocated(a%val)) then + allocate(a%val(nz_per_row * a%get_nrows())) + allocate(a%ja(nz_per_row * a%get_nrows())) + allocate(a%irn(a%get_nrows())) + allocate(a%idiag(a%get_nrows())) + allocate(a%hkoffs((a%get_nrows() + hacksize - 1) / hacksize)) + end if + a%val = szero + a%ja = -1 + a%irn = 0 + a%idiag = 0 + + allocate(row_counts(a%get_nrows())) + row_counts = 0 + + nza = b%get_nzeros() + + !$acc parallel loop present(b, a, row_counts) + do k = 1, nza + row = b%ia(k) + col = b%ja(k) + value = b%val(k) + if (row_counts(row) < nz_per_row) then + a%val(row_counts(row) + 1 + (row - 1) * nz_per_row) = value + a%ja(row_counts(row) + 1 + (row - 1) * nz_per_row) = col + row_counts(row) = row_counts(row) + 1 + else + info = psb_err_invalid_mat_state_ + !goto 9999 + end if + end do + + a%irn = row_counts + + !$acc parallel loop present(a) + do i = 1, a%get_nrows() + do j = 1, nz_per_row + if (a%ja(j + (i - 1) * nz_per_row) == i) then + a%idiag(i) = j + exit + end if + end do + end do + + ! Calculate hkoffs for HLL format + !$acc parallel loop present(a) + do i = 1, size(a%hkoffs) + a%hkoffs(i) = (i - 1) * hacksize + end do + + deallocate(row_counts) + + call a%set_dev() + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_s_oacc_hll_cp_from_coo +end submodule psb_s_oacc_hll_cp_from_coo_impl diff --git a/openacc/impl/psb_s_oacc_hll_cp_from_fmt.F90 b/openacc/impl/psb_s_oacc_hll_cp_from_fmt.F90 new file mode 100644 index 00000000..4d023f8b --- /dev/null +++ b/openacc/impl/psb_s_oacc_hll_cp_from_fmt.F90 @@ -0,0 +1,24 @@ +submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_cp_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_hll_cp_from_fmt(a, b, info) + implicit none + + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_s_coo_sparse_mat) + call a%cp_from_coo(b, info) + class default + call a%psb_s_hll_sparse_mat%cp_from_fmt(b, info) + if (info /= 0) return + + !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + end select + + end subroutine psb_s_oacc_hll_cp_from_fmt +end submodule psb_s_oacc_hll_cp_from_fmt_impl diff --git a/openacc/impl/psb_s_oacc_hll_csmm.F90 b/openacc/impl/psb_s_oacc_hll_csmm.F90 new file mode 100644 index 00000000..803071eb --- /dev/null +++ b/openacc/impl/psb_s_oacc_hll_csmm.F90 @@ -0,0 +1,86 @@ +submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_csmm_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_hll_csmm(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + real(psb_spk_), intent(in) :: x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i, j, m, n, k, nxy, nhacks + logical :: tra + integer(psb_ipk_) :: err_act + character(len=20) :: name = 's_oacc_hll_csmm' + logical, parameter :: debug = .false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) < n) then + info = 36 + call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) + goto 9999 + end if + + if (size(y,1) < m) then + info = 36 + call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) + goto 9999 + end if + + if (tra) then + call a%psb_s_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans) + else + nxy = min(size(x,2), size(y,2)) + nhacks = (a%get_nrows() + a%hksz - 1) / a%hksz + + !$acc parallel loop collapse(2) present(a, x, y) + do j = 1, nxy + do i = 1, m + y(i,j) = beta * y(i,j) + end do + end do + + !$acc parallel loop present(a, x, y) + do j = 1, nxy + do k = 1, nhacks + do i = a%hkoffs(k), a%hkoffs(k + 1) - 1 + y(a%irn(i), j) = y(a%irn(i), j) + alpha * a%val(i) * x(a%ja(i), j) + end do + end do + end do + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_s_oacc_hll_csmm +end submodule psb_s_oacc_hll_csmm_impl diff --git a/openacc/impl/psb_s_oacc_hll_csmv.F90 b/openacc/impl/psb_s_oacc_hll_csmv.F90 new file mode 100644 index 00000000..b3c0cae8 --- /dev/null +++ b/openacc/impl/psb_s_oacc_hll_csmv.F90 @@ -0,0 +1,84 @@ +submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_csmv_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_hll_csmv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + real(psb_spk_), intent(in) :: x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i, j, m, n, hksz, nhacks + logical :: tra + integer(psb_ipk_) :: err_act + character(len=20) :: name = 's_oacc_hll_csmv' + logical, parameter :: debug = .false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) < n) then + info = 36 + call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) + goto 9999 + end if + + if (size(y,1) < m) then + info = 36 + call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) + goto 9999 + end if + + if (tra) then + call a%psb_s_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans) + else + hksz = a%hksz + nhacks = (a%get_nrows() + hksz - 1) / hksz + + !$acc parallel loop present(a, x, y) + do i = 1, m + y(i) = beta * y(i) + end do + ! This loop nest cannot be run with collapse, since + ! the inner loop extent varies. + !$acc parallel loop present(a, x, y) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + y(a%irn(j)) = y(a%irn(j)) + alpha * a%val(j) * x(a%ja(j)) + end do + end do + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_s_oacc_hll_csmv +end submodule psb_s_oacc_hll_csmv_impl diff --git a/openacc/impl/psb_s_oacc_hll_inner_vect_sv.F90 b/openacc/impl/psb_s_oacc_hll_inner_vect_sv.F90 new file mode 100644 index 00000000..900b8982 --- /dev/null +++ b/openacc/impl/psb_s_oacc_hll_inner_vect_sv.F90 @@ -0,0 +1,86 @@ +submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_inner_vect_sv_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + real(psb_spk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name = 's_oacc_hll_inner_vect_sv' + logical, parameter :: debug = .false. + integer(psb_ipk_) :: i, j, nhacks, hksz + + call psb_get_erraction(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra .or. (beta /= dzero)) then + call x%sync() + call y%sync() + call a%psb_s_hll_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans) + call y%set_host() + else + select type (xx => x) + type is (psb_s_vect_oacc) + select type(yy => y) + type is (psb_s_vect_oacc) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + nhacks = size(a%hkoffs) - 1 + hksz = a%hksz + !$acc parallel loop present(a, xx, yy) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i+1) - 1 + yy%v(a%irn(j)) = alpha * a%val(j) * xx%v(a%ja(j)) + beta * yy%v(a%irn(j)) + end do + end do + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_s_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_s_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + endif + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info, name, a_err = 'hll_vect_sv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + end subroutine psb_s_oacc_hll_inner_vect_sv +end submodule psb_s_oacc_hll_inner_vect_sv_impl diff --git a/openacc/impl/psb_s_oacc_hll_mold.F90 b/openacc/impl/psb_s_oacc_hll_mold.F90 new file mode 100644 index 00000000..1e43b65b --- /dev/null +++ b/openacc/impl/psb_s_oacc_hll_mold.F90 @@ -0,0 +1,34 @@ +submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_mold_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_hll_mold(a, b, info) + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'hll_mold' + logical, parameter :: debug = .false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b, stat=info) + end if + if (info == 0) allocate(psb_s_oacc_hll_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + + end subroutine psb_s_oacc_hll_mold +end submodule psb_s_oacc_hll_mold_impl diff --git a/openacc/impl/psb_s_oacc_hll_mv_from_coo.F90 b/openacc/impl/psb_s_oacc_hll_mv_from_coo.F90 new file mode 100644 index 00000000..08b553b7 --- /dev/null +++ b/openacc/impl/psb_s_oacc_hll_mv_from_coo.F90 @@ -0,0 +1,25 @@ +submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_mv_from_coo_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_hll_mv_from_coo(a, b, info) + implicit none + + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_s_hll_sparse_mat%mv_from_coo(b, info) + if (info /= 0) goto 9999 + + !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_s_oacc_hll_mv_from_coo +end submodule psb_s_oacc_hll_mv_from_coo_impl diff --git a/openacc/impl/psb_s_oacc_hll_mv_from_fmt.F90 b/openacc/impl/psb_s_oacc_hll_mv_from_fmt.F90 new file mode 100644 index 00000000..d5867289 --- /dev/null +++ b/openacc/impl/psb_s_oacc_hll_mv_from_fmt.F90 @@ -0,0 +1,24 @@ +submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_mv_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_hll_mv_from_fmt(a, b, info) + implicit none + + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_s_coo_sparse_mat) + call a%mv_from_coo(b, info) + class default + call a%psb_s_hll_sparse_mat%mv_from_fmt(b, info) + if (info /= 0) return + + !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + end select + + end subroutine psb_s_oacc_hll_mv_from_fmt +end submodule psb_s_oacc_hll_mv_from_fmt_impl diff --git a/openacc/impl/psb_s_oacc_hll_reallocate_nz.F90 b/openacc/impl/psb_s_oacc_hll_reallocate_nz.F90 new file mode 100644 index 00000000..7768d1ed --- /dev/null +++ b/openacc/impl/psb_s_oacc_hll_reallocate_nz.F90 @@ -0,0 +1,29 @@ +submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_reallocate_nz_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_hll_reallocate_nz(nz, a) + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='s_oacc_hll_reallocate_nz' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: hksz, nhacks + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_s_hll_sparse_mat%reallocate(nz) + + call a%set_dev() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_s_oacc_hll_reallocate_nz +end submodule psb_s_oacc_hll_reallocate_nz_impl diff --git a/openacc/impl/psb_s_oacc_hll_scal.F90 b/openacc/impl/psb_s_oacc_hll_scal.F90 new file mode 100644 index 00000000..ae36465e --- /dev/null +++ b/openacc/impl/psb_s_oacc_hll_scal.F90 @@ -0,0 +1,62 @@ +submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_scal_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_hll_scal(d, a, info, side) + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'scal' + integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + hksz = a%hksz + nhacks = (a%get_nrows() + hksz - 1) / hksz + nzt = a%nzt + + if (present(side)) then + if (side == 'L') then + ! $ a parallel loop collapse(2) present(a, d) + !$acc parallel loop present(a, d) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + k = (j - a%hkoffs(i)) / nzt + (i - 1) * hksz + 1 + a%val(j) = a%val(j) * d(k) + end do + end do + else if (side == 'R') then + ! $ a parallel loop collapse(2) present(a, d) + !$acc parallel loop present(a, d) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + a%val(j) = a%val(j) * d(a%ja(j)) + end do + end do + end if + else + ! $ a parallel loop collapse(2) present(a, d) + !$acc parallel loop present(a, d) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + a%val(j) = a%val(j) * d(j - a%hkoffs(i) + 1) + end do + end do + end if + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_s_oacc_hll_scal +end submodule psb_s_oacc_hll_scal_impl diff --git a/openacc/impl/psb_s_oacc_hll_scals.F90 b/openacc/impl/psb_s_oacc_hll_scals.F90 new file mode 100644 index 00000000..360ea84d --- /dev/null +++ b/openacc/impl/psb_s_oacc_hll_scals.F90 @@ -0,0 +1,40 @@ +submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_scals_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_hll_scals(d, a, info) + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'scal' + integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + hksz = a%hksz + nhacks = (a%get_nrows() + hksz - 1) / hksz + nzt = a%nzt + + ! $ a parallel loop collapse(2) present(a) + !$acc parallel loop present(a) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + a%val(j) = a%val(j) * d + end do + end do + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_s_oacc_hll_scals +end submodule psb_s_oacc_hll_scals_impl diff --git a/openacc/impl/psb_s_oacc_hll_vect_mv.F90 b/openacc/impl/psb_s_oacc_hll_vect_mv.F90 new file mode 100644 index 00000000..9d9e9197 --- /dev/null +++ b/openacc/impl/psb_s_oacc_hll_vect_mv.F90 @@ -0,0 +1,67 @@ +submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_vect_mv_impl + use psb_base_mod +contains + module subroutine psb_s_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans) + implicit none + + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_oacc_hll_sparse_mat), intent(in) :: a + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: m, n, nhacks, hksz + + info = psb_success_ + m = a%get_nrows() + n = a%get_ncols() + nhacks = size(a%hkoffs) - 1 + hksz = a%hksz + + if ((n /= size(x%v)) .or. (m /= size(y%v))) then + write(0,*) 'Size error ', m, n, size(x%v), size(y%v) + info = psb_err_invalid_mat_state_ + return + end if + + if (a%is_host()) call a%sync() + if (x%is_host()) call x%sync() + if (y%is_host()) call y%sync() + + call inner_spmv(m, nhacks, hksz, alpha, a%val, a%ja, a%hkoffs, x%v, beta, y%v, info) + call y%set_dev() + + contains + + subroutine inner_spmv(m, nhacks, hksz, alpha, val, ja, hkoffs, x, beta, y, info) + implicit none + integer(psb_ipk_) :: m, nhacks, hksz + real(psb_spk_), intent(in) :: alpha, beta + real(psb_spk_) :: val(:), x(:), y(:) + integer(psb_ipk_) :: ja(:), hkoffs(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, j, idx, k + real(psb_spk_) :: tmp + + info = 0 + + !$acc parallel loop present(val, ja, hkoffs, x, y) + do i = 1, nhacks + do k = 0, hksz - 1 + idx = hkoffs(i) + k + if (idx <= hkoffs(i + 1) - 1) then + tmp = 0.0_psb_dpk_ + !$acc loop seq + do j = hkoffs(i) + k, hkoffs(i + 1) - 1, hksz + if (ja(j) > 0) then + tmp = tmp + val(j) * x(ja(j)) + end if + end do + y(k + 1 + (i - 1) * hksz) = alpha * tmp + beta * y(k + 1 + (i - 1) * hksz) + end if + end do + end do + end subroutine inner_spmv + + end subroutine psb_s_oacc_hll_vect_mv +end submodule psb_s_oacc_hll_vect_mv_impl diff --git a/openacc/impl/psb_z_oacc_ell_allocate_mnnz.F90 b/openacc/impl/psb_z_oacc_ell_allocate_mnnz.F90 new file mode 100644 index 00000000..48a5e202 --- /dev/null +++ b/openacc/impl/psb_z_oacc_ell_allocate_mnnz.F90 @@ -0,0 +1,47 @@ +submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_allocate_mnnz_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_ell_allocate_mnnz(m, n, a, nz) + implicit none + integer(psb_ipk_), intent(in) :: m, n + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act, nz_ + character(len=20) :: name='allocate_mnnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(nz)) then + nz_ = nz + else + nz_ = 10 + end if + + call a%psb_z_ell_sparse_mat%allocate(m, n, nz_) + + if (.not.allocated(a%val)) then + allocate(a%val(m, nz_)) + allocate(a%ja(m, nz_)) + allocate(a%irn(m)) + allocate(a%idiag(m)) + end if + + a%val = zzero + a%ja = -1 + a%irn = 0 + a%idiag = 0 + + call a%set_dev() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_z_oacc_ell_allocate_mnnz +end submodule psb_z_oacc_ell_allocate_mnnz_impl diff --git a/openacc/impl/psb_z_oacc_ell_cp_from_coo.F90 b/openacc/impl/psb_z_oacc_ell_cp_from_coo.F90 new file mode 100644 index 00000000..e4d3b731 --- /dev/null +++ b/openacc/impl/psb_z_oacc_ell_cp_from_coo.F90 @@ -0,0 +1,78 @@ +submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_cp_from_coo_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_ell_cp_from_coo(a, b, info) + implicit none + + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, j, k, row, col, nz_per_row + complex(psb_dpk_) :: value + integer(psb_ipk_), allocatable :: row_counts(:) + integer(psb_ipk_) :: hacksize, nza + + info = psb_success_ + hacksize = 1 + + call a%set_nrows(b%get_nrows()) + call a%set_ncols(b%get_ncols()) + nz_per_row = a%nzt + + if (.not.allocated(a%val)) then + allocate(a%val(a%get_nrows(), nz_per_row)) + allocate(a%ja(a%get_nrows(), nz_per_row)) + allocate(a%irn(a%get_nrows())) + allocate(a%idiag(a%get_nrows())) + end if + a%val = zzero + a%ja = -1 + a%irn = 0 + a%idiag = 0 + + allocate(row_counts(a%get_nrows())) + row_counts = 0 + + nza = b%get_nzeros() + + !$acc parallel loop present(b, a, row_counts) + do k = 1, nza + row = b%ia(k) + col = b%ja(k) + value = b%val(k) + if (row_counts(row) < nz_per_row) then + a%val(row, row_counts(row) + 1) = value + a%ja(row, row_counts(row) + 1) = col + row_counts(row) = row_counts(row) + 1 + else + info = psb_err_invalid_mat_state_ + !goto 9999 + end if + end do + + a%irn = row_counts + + !$acc parallel loop present(a) + do i = 1, a%get_nrows() + do j = 1, nz_per_row + if (a%ja(i, j) == i) then + a%idiag(i) = j + exit + end if + end do + end do + + deallocate(row_counts) + + call a%set_dev() + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_z_oacc_ell_cp_from_coo +end submodule psb_z_oacc_ell_cp_from_coo_impl diff --git a/openacc/impl/psb_z_oacc_ell_cp_from_fmt.F90 b/openacc/impl/psb_z_oacc_ell_cp_from_fmt.F90 new file mode 100644 index 00000000..98404ae2 --- /dev/null +++ b/openacc/impl/psb_z_oacc_ell_cp_from_fmt.F90 @@ -0,0 +1,24 @@ +submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_cp_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_ell_cp_from_fmt(a, b, info) + implicit none + + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_z_coo_sparse_mat) + call a%cp_from_coo(b, info) + class default + call a%psb_z_ell_sparse_mat%cp_from_fmt(b, info) + if (info /= 0) return + + !$acc update device(a%val, a%ja, a%irn, a%idiag) + end select + + end subroutine psb_z_oacc_ell_cp_from_fmt +end submodule psb_z_oacc_ell_cp_from_fmt_impl diff --git a/openacc/impl/psb_z_oacc_ell_csmm.F90 b/openacc/impl/psb_z_oacc_ell_csmm.F90 new file mode 100644 index 00000000..406ca8c5 --- /dev/null +++ b/openacc/impl/psb_z_oacc_ell_csmm.F90 @@ -0,0 +1,86 @@ +submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_csmm_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_ell_csmm(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + complex(psb_dpk_), intent(in) :: x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i, j, m, n, k, nxy, nzt + logical :: tra + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'z_oacc_ell_csmm' + logical, parameter :: debug = .false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) < n) then + info = 36 + call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) + goto 9999 + end if + + if (size(y,1) < m) then + info = 36 + call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) + goto 9999 + end if + + if (tra) then + call a%psb_z_ell_sparse_mat%spmm(alpha, x, beta, y, info, trans) + else + nxy = min(size(x,2), size(y,2)) + nzt = a%nzt + + !$acc parallel loop collapse(2) present(a, x, y) + do j = 1, nxy + do i = 1, m + y(i,j) = beta * y(i,j) + end do + end do + + !$acc parallel loop collapse(2) present(a, x, y) + do j = 1, nxy + do i = 1, n + do k = 1, nzt + y(i, j) = y(i, j) + alpha * a%val(i, k) * x(a%ja(i, k), j) + end do + end do + end do + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_z_oacc_ell_csmm +end submodule psb_z_oacc_ell_csmm_impl diff --git a/openacc/impl/psb_z_oacc_ell_csmv.F90 b/openacc/impl/psb_z_oacc_ell_csmv.F90 new file mode 100644 index 00000000..502dd4f1 --- /dev/null +++ b/openacc/impl/psb_z_oacc_ell_csmv.F90 @@ -0,0 +1,82 @@ +submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_csmv_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_ell_csmv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + complex(psb_dpk_), intent(in) :: x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i, j, m, n, nzt + logical :: tra + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'z_oacc_ell_csmv' + logical, parameter :: debug = .false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) < n) then + info = 36 + call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) + goto 9999 + end if + + if (size(y,1) < m) then + info = 36 + call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) + goto 9999 + end if + + if (tra) then + call a%psb_z_ell_sparse_mat%spmm(alpha, x, beta, y, info, trans) + else + nzt = a%nzt + + !$acc parallel loop present(a, x, y) + do i = 1, m + y(i) = beta * y(i) + end do + + !$acc parallel loop present(a, x, y) + do i = 1, m + do j = 1, nzt + y(i) = y(i) + alpha * a%val(i, j) * x(a%ja(i, j)) + end do + end do + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_z_oacc_ell_csmv +end submodule psb_z_oacc_ell_csmv_impl diff --git a/openacc/impl/psb_z_oacc_ell_inner_vect_sv.F90 b/openacc/impl/psb_z_oacc_ell_inner_vect_sv.F90 new file mode 100644 index 00000000..f445a6b4 --- /dev/null +++ b/openacc/impl/psb_z_oacc_ell_inner_vect_sv.F90 @@ -0,0 +1,85 @@ +submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_inner_vect_sv_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_ell_inner_vect_sv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + complex(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'z_oacc_ell_inner_vect_sv' + logical, parameter :: debug = .false. + integer(psb_ipk_) :: i, j, nzt + + call psb_get_erraction(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra .or. (beta /= dzero)) then + call x%sync() + call y%sync() + call a%psb_z_ell_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans) + call y%set_host() + else + select type (xx => x) + type is (psb_z_vect_oacc) + select type(yy => y) + type is (psb_z_vect_oacc) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + nzt = a%nzt + !$acc parallel loop present(a, xx, yy) + do i = 1, size(a%val, 1) + do j = 1, nzt + yy%v(i) = alpha * a%val(i, j) * xx%v(a%ja(i, j)) + beta * yy%v(i) + end do + end do + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_z_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_z_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + endif + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info, name, a_err = 'ell_vect_sv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + end subroutine psb_z_oacc_ell_inner_vect_sv +end submodule psb_z_oacc_ell_inner_vect_sv_impl diff --git a/openacc/impl/psb_z_oacc_ell_mold.F90 b/openacc/impl/psb_z_oacc_ell_mold.F90 new file mode 100644 index 00000000..fcc222de --- /dev/null +++ b/openacc/impl/psb_z_oacc_ell_mold.F90 @@ -0,0 +1,34 @@ +submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_mold_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_ell_mold(a, b, info) + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'ell_mold' + logical, parameter :: debug = .false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b, stat=info) + end if + if (info == 0) allocate(psb_z_oacc_ell_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + + end subroutine psb_z_oacc_ell_mold +end submodule psb_z_oacc_ell_mold_impl diff --git a/openacc/impl/psb_z_oacc_ell_mv_from_coo.F90 b/openacc/impl/psb_z_oacc_ell_mv_from_coo.F90 new file mode 100644 index 00000000..26388e5e --- /dev/null +++ b/openacc/impl/psb_z_oacc_ell_mv_from_coo.F90 @@ -0,0 +1,25 @@ +submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_mv_from_coo_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_ell_mv_from_coo(a, b, info) + implicit none + + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_z_ell_sparse_mat%mv_from_coo(b, info) + if (info /= 0) goto 9999 + + !$acc update device(a%val, a%ja, a%irn, a%idiag) + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_z_oacc_ell_mv_from_coo +end submodule psb_z_oacc_ell_mv_from_coo_impl diff --git a/openacc/impl/psb_z_oacc_ell_mv_from_fmt.F90 b/openacc/impl/psb_z_oacc_ell_mv_from_fmt.F90 new file mode 100644 index 00000000..e0f75828 --- /dev/null +++ b/openacc/impl/psb_z_oacc_ell_mv_from_fmt.F90 @@ -0,0 +1,24 @@ +submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_mv_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_ell_mv_from_fmt(a, b, info) + implicit none + + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_z_coo_sparse_mat) + call a%mv_from_coo(b, info) + class default + call a%psb_z_ell_sparse_mat%mv_from_fmt(b, info) + if (info /= 0) return + + !$acc update device(a%val, a%ja, a%irn, a%idiag) + end select + + end subroutine psb_z_oacc_ell_mv_from_fmt +end submodule psb_z_oacc_ell_mv_from_fmt_impl diff --git a/openacc/impl/psb_z_oacc_ell_reallocate_nz.F90 b/openacc/impl/psb_z_oacc_ell_reallocate_nz.F90 new file mode 100644 index 00000000..8fd3ad77 --- /dev/null +++ b/openacc/impl/psb_z_oacc_ell_reallocate_nz.F90 @@ -0,0 +1,28 @@ +submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_reallocate_nz_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_ell_reallocate_nz(nz, a) + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_oacc_ell_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_z_ell_sparse_mat%reallocate(nz) + + call a%set_dev() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_z_oacc_ell_reallocate_nz +end submodule psb_z_oacc_ell_reallocate_nz_impl diff --git a/openacc/impl/psb_z_oacc_ell_scal.F90 b/openacc/impl/psb_z_oacc_ell_scal.F90 new file mode 100644 index 00000000..e3d25ccb --- /dev/null +++ b/openacc/impl/psb_z_oacc_ell_scal.F90 @@ -0,0 +1,58 @@ +submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_scal_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_ell_scal(d, a, info, side) + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i, j, m, nzt + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + m = a%get_nrows() + nzt = a%nzt + + if (present(side)) then + if (side == 'L') then + !$acc parallel loop collapse(2) present(a, d) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d(i) + end do + end do + else if (side == 'R') then + !$acc parallel loop collapse(2) present(a, d) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d(a%ja(i, j)) + end do + end do + end if + else + !$acc parallel loop collapse(2) present(a, d) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d(j) + end do + end do + end if + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_z_oacc_ell_scal +end submodule psb_z_oacc_ell_scal_impl diff --git a/openacc/impl/psb_z_oacc_ell_scals.F90 b/openacc/impl/psb_z_oacc_ell_scals.F90 new file mode 100644 index 00000000..c382627a --- /dev/null +++ b/openacc/impl/psb_z_oacc_ell_scals.F90 @@ -0,0 +1,39 @@ +submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_scals_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_ell_scals(d, a, info) + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i, j, nzt, m + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + m = a%get_nrows() + nzt = a%nzt + + !$acc parallel loop collapse(2) present(a) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d + end do + end do + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_z_oacc_ell_scals +end submodule psb_z_oacc_ell_scals_impl diff --git a/openacc/impl/psb_z_oacc_ell_vect_mv.F90 b/openacc/impl/psb_z_oacc_ell_vect_mv.F90 new file mode 100644 index 00000000..ecb61adf --- /dev/null +++ b/openacc/impl/psb_z_oacc_ell_vect_mv.F90 @@ -0,0 +1,66 @@ +submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_vect_mv_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans) + implicit none + + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_oacc_ell_sparse_mat), intent(in) :: a + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: m, n, nzt + + info = psb_success_ + m = a%get_nrows() + n = a%get_ncols() + nzt = a%nzt + + if ((n /= size(x%v)) .or. (m /= size(y%v))) then + write(0,*) 'Size error ', m, n, size(x%v), size(y%v) + info = psb_err_invalid_mat_state_ + return + end if + + if (a%is_host()) call a%sync() + if (x%is_host()) call x%sync() + if (y%is_host()) call y%sync() + + call inner_spmv(m, n, nzt, alpha, a%val, a%ja, x%v, beta, y%v, info) + call y%set_dev() + + contains + + subroutine inner_spmv(m, n, nzt, alpha, val, ja, x, beta, y, info) + implicit none + integer(psb_ipk_) :: m, n, nzt + complex(psb_dpk_), intent(in) :: alpha, beta + complex(psb_dpk_) :: val(:,:), x(:), y(:) + integer(psb_ipk_) :: ja(:,:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, j, ii, isz + complex(psb_dpk_) :: tmp + integer(psb_ipk_), parameter :: vsz = 256 + + info = 0 + + !$acc parallel loop vector_length(vsz) private(isz) + do ii = 1, m, vsz + isz = min(vsz, m - ii + 1) + !$acc loop independent private(tmp) + do i = ii, ii + isz - 1 + tmp = 0.0_psb_dpk_ + !$acc loop seq + do j = 1, nzt + if (ja(i,j) > 0) then + tmp = tmp + val(i,j) * x(ja(i,j)) + end if + end do + y(i) = alpha * tmp + beta * y(i) + end do + end do + end subroutine inner_spmv + + end subroutine psb_z_oacc_ell_vect_mv +end submodule psb_z_oacc_ell_vect_mv_impl diff --git a/openacc/impl/psb_z_oacc_hll_allocate_mnnz.F90 b/openacc/impl/psb_z_oacc_hll_allocate_mnnz.F90 new file mode 100644 index 00000000..c398d259 --- /dev/null +++ b/openacc/impl/psb_z_oacc_hll_allocate_mnnz.F90 @@ -0,0 +1,53 @@ +submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_allocate_mnnz_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_hll_allocate_mnnz(m, n, a, nz) + implicit none + integer(psb_ipk_), intent(in) :: m, n + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act, nz_ + character(len=20) :: name='allocate_mnnz' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: hksz, nhacks + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(nz)) then + nz_ = nz + else + nz_ = 10 + end if + + call a%psb_z_hll_sparse_mat%allocate(m, n, nz_) + + hksz = a%hksz + nhacks = (m + hksz - 1) / hksz + + if (.not.allocated(a%val)) then + allocate(a%val(nz_ * m)) + allocate(a%ja(nz_ * m)) + allocate(a%irn(m)) + allocate(a%idiag(m)) + allocate(a%hkoffs(nhacks)) + end if + + a%val = zzero + a%ja = -1 + a%irn = 0 + a%idiag = 0 + a%hkoffs = 0 + + call a%set_dev() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_z_oacc_hll_allocate_mnnz +end submodule psb_z_oacc_hll_allocate_mnnz_impl diff --git a/openacc/impl/psb_z_oacc_hll_cp_from_coo.F90 b/openacc/impl/psb_z_oacc_hll_cp_from_coo.F90 new file mode 100644 index 00000000..62be2252 --- /dev/null +++ b/openacc/impl/psb_z_oacc_hll_cp_from_coo.F90 @@ -0,0 +1,85 @@ +submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_cp_from_coo_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_hll_cp_from_coo(a, b, info) + implicit none + + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, j, k, row, col, nz_per_row + complex(psb_dpk_) :: value + integer(psb_ipk_), allocatable :: row_counts(:) + integer(psb_ipk_) :: hacksize, nza + + info = psb_success_ + hacksize = 32 ! Assuming a default hack size of 32 + + call a%set_nrows(b%get_nrows()) + call a%set_ncols(b%get_ncols()) + nz_per_row = a%nzt + + if (.not.allocated(a%val)) then + allocate(a%val(nz_per_row * a%get_nrows())) + allocate(a%ja(nz_per_row * a%get_nrows())) + allocate(a%irn(a%get_nrows())) + allocate(a%idiag(a%get_nrows())) + allocate(a%hkoffs((a%get_nrows() + hacksize - 1) / hacksize)) + end if + a%val = zzero + a%ja = -1 + a%irn = 0 + a%idiag = 0 + + allocate(row_counts(a%get_nrows())) + row_counts = 0 + + nza = b%get_nzeros() + + !$acc parallel loop present(b, a, row_counts) + do k = 1, nza + row = b%ia(k) + col = b%ja(k) + value = b%val(k) + if (row_counts(row) < nz_per_row) then + a%val(row_counts(row) + 1 + (row - 1) * nz_per_row) = value + a%ja(row_counts(row) + 1 + (row - 1) * nz_per_row) = col + row_counts(row) = row_counts(row) + 1 + else + info = psb_err_invalid_mat_state_ + !goto 9999 + end if + end do + + a%irn = row_counts + + !$acc parallel loop present(a) + do i = 1, a%get_nrows() + do j = 1, nz_per_row + if (a%ja(j + (i - 1) * nz_per_row) == i) then + a%idiag(i) = j + exit + end if + end do + end do + + ! Calculate hkoffs for HLL format + !$acc parallel loop present(a) + do i = 1, size(a%hkoffs) + a%hkoffs(i) = (i - 1) * hacksize + end do + + deallocate(row_counts) + + call a%set_dev() + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_z_oacc_hll_cp_from_coo +end submodule psb_z_oacc_hll_cp_from_coo_impl diff --git a/openacc/impl/psb_z_oacc_hll_cp_from_fmt.F90 b/openacc/impl/psb_z_oacc_hll_cp_from_fmt.F90 new file mode 100644 index 00000000..f267e1c6 --- /dev/null +++ b/openacc/impl/psb_z_oacc_hll_cp_from_fmt.F90 @@ -0,0 +1,24 @@ +submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_cp_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_hll_cp_from_fmt(a, b, info) + implicit none + + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_z_coo_sparse_mat) + call a%cp_from_coo(b, info) + class default + call a%psb_z_hll_sparse_mat%cp_from_fmt(b, info) + if (info /= 0) return + + !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + end select + + end subroutine psb_z_oacc_hll_cp_from_fmt +end submodule psb_z_oacc_hll_cp_from_fmt_impl diff --git a/openacc/impl/psb_z_oacc_hll_csmm.F90 b/openacc/impl/psb_z_oacc_hll_csmm.F90 new file mode 100644 index 00000000..3cfe5b32 --- /dev/null +++ b/openacc/impl/psb_z_oacc_hll_csmm.F90 @@ -0,0 +1,86 @@ +submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_csmm_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_hll_csmm(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + complex(psb_dpk_), intent(in) :: x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i, j, m, n, k, nxy, nhacks + logical :: tra + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'z_oacc_hll_csmm' + logical, parameter :: debug = .false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) < n) then + info = 36 + call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) + goto 9999 + end if + + if (size(y,1) < m) then + info = 36 + call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) + goto 9999 + end if + + if (tra) then + call a%psb_z_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans) + else + nxy = min(size(x,2), size(y,2)) + nhacks = (a%get_nrows() + a%hksz - 1) / a%hksz + + !$acc parallel loop collapse(2) present(a, x, y) + do j = 1, nxy + do i = 1, m + y(i,j) = beta * y(i,j) + end do + end do + + !$acc parallel loop present(a, x, y) + do j = 1, nxy + do k = 1, nhacks + do i = a%hkoffs(k), a%hkoffs(k + 1) - 1 + y(a%irn(i), j) = y(a%irn(i), j) + alpha * a%val(i) * x(a%ja(i), j) + end do + end do + end do + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_z_oacc_hll_csmm +end submodule psb_z_oacc_hll_csmm_impl diff --git a/openacc/impl/psb_z_oacc_hll_csmv.F90 b/openacc/impl/psb_z_oacc_hll_csmv.F90 new file mode 100644 index 00000000..923bc061 --- /dev/null +++ b/openacc/impl/psb_z_oacc_hll_csmv.F90 @@ -0,0 +1,84 @@ +submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_csmv_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_hll_csmv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + complex(psb_dpk_), intent(in) :: x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i, j, m, n, hksz, nhacks + logical :: tra + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'z_oacc_hll_csmv' + logical, parameter :: debug = .false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) < n) then + info = 36 + call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) + goto 9999 + end if + + if (size(y,1) < m) then + info = 36 + call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) + goto 9999 + end if + + if (tra) then + call a%psb_z_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans) + else + hksz = a%hksz + nhacks = (a%get_nrows() + hksz - 1) / hksz + + !$acc parallel loop present(a, x, y) + do i = 1, m + y(i) = beta * y(i) + end do + ! This loop nest cannot be run with collapse, since + ! the inner loop extent varies. + !$acc parallel loop present(a, x, y) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + y(a%irn(j)) = y(a%irn(j)) + alpha * a%val(j) * x(a%ja(j)) + end do + end do + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_z_oacc_hll_csmv +end submodule psb_z_oacc_hll_csmv_impl diff --git a/openacc/impl/psb_z_oacc_hll_inner_vect_sv.F90 b/openacc/impl/psb_z_oacc_hll_inner_vect_sv.F90 new file mode 100644 index 00000000..1d068542 --- /dev/null +++ b/openacc/impl/psb_z_oacc_hll_inner_vect_sv.F90 @@ -0,0 +1,86 @@ +submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_inner_vect_sv_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + complex(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'z_oacc_hll_inner_vect_sv' + logical, parameter :: debug = .false. + integer(psb_ipk_) :: i, j, nhacks, hksz + + call psb_get_erraction(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra .or. (beta /= dzero)) then + call x%sync() + call y%sync() + call a%psb_z_hll_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans) + call y%set_host() + else + select type (xx => x) + type is (psb_z_vect_oacc) + select type(yy => y) + type is (psb_z_vect_oacc) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + nhacks = size(a%hkoffs) - 1 + hksz = a%hksz + !$acc parallel loop present(a, xx, yy) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i+1) - 1 + yy%v(a%irn(j)) = alpha * a%val(j) * xx%v(a%ja(j)) + beta * yy%v(a%irn(j)) + end do + end do + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_z_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_z_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + endif + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info, name, a_err = 'hll_vect_sv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + end subroutine psb_z_oacc_hll_inner_vect_sv +end submodule psb_z_oacc_hll_inner_vect_sv_impl diff --git a/openacc/impl/psb_z_oacc_hll_mold.F90 b/openacc/impl/psb_z_oacc_hll_mold.F90 new file mode 100644 index 00000000..f614ad89 --- /dev/null +++ b/openacc/impl/psb_z_oacc_hll_mold.F90 @@ -0,0 +1,34 @@ +submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_mold_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_hll_mold(a, b, info) + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'hll_mold' + logical, parameter :: debug = .false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b, stat=info) + end if + if (info == 0) allocate(psb_z_oacc_hll_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + + end subroutine psb_z_oacc_hll_mold +end submodule psb_z_oacc_hll_mold_impl diff --git a/openacc/impl/psb_z_oacc_hll_mv_from_coo.F90 b/openacc/impl/psb_z_oacc_hll_mv_from_coo.F90 new file mode 100644 index 00000000..2ff574d3 --- /dev/null +++ b/openacc/impl/psb_z_oacc_hll_mv_from_coo.F90 @@ -0,0 +1,25 @@ +submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_mv_from_coo_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_hll_mv_from_coo(a, b, info) + implicit none + + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_z_hll_sparse_mat%mv_from_coo(b, info) + if (info /= 0) goto 9999 + + !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_z_oacc_hll_mv_from_coo +end submodule psb_z_oacc_hll_mv_from_coo_impl diff --git a/openacc/impl/psb_z_oacc_hll_mv_from_fmt.F90 b/openacc/impl/psb_z_oacc_hll_mv_from_fmt.F90 new file mode 100644 index 00000000..5fa00e38 --- /dev/null +++ b/openacc/impl/psb_z_oacc_hll_mv_from_fmt.F90 @@ -0,0 +1,24 @@ +submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_mv_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_hll_mv_from_fmt(a, b, info) + implicit none + + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_z_coo_sparse_mat) + call a%mv_from_coo(b, info) + class default + call a%psb_z_hll_sparse_mat%mv_from_fmt(b, info) + if (info /= 0) return + + !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + end select + + end subroutine psb_z_oacc_hll_mv_from_fmt +end submodule psb_z_oacc_hll_mv_from_fmt_impl diff --git a/openacc/impl/psb_z_oacc_hll_reallocate_nz.F90 b/openacc/impl/psb_z_oacc_hll_reallocate_nz.F90 new file mode 100644 index 00000000..5b49efe5 --- /dev/null +++ b/openacc/impl/psb_z_oacc_hll_reallocate_nz.F90 @@ -0,0 +1,29 @@ +submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_reallocate_nz_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_hll_reallocate_nz(nz, a) + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='z_oacc_hll_reallocate_nz' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: hksz, nhacks + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_z_hll_sparse_mat%reallocate(nz) + + call a%set_dev() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_z_oacc_hll_reallocate_nz +end submodule psb_z_oacc_hll_reallocate_nz_impl diff --git a/openacc/impl/psb_z_oacc_hll_scal.F90 b/openacc/impl/psb_z_oacc_hll_scal.F90 new file mode 100644 index 00000000..a2f9aee7 --- /dev/null +++ b/openacc/impl/psb_z_oacc_hll_scal.F90 @@ -0,0 +1,62 @@ +submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_scal_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_hll_scal(d, a, info, side) + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'scal' + integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + hksz = a%hksz + nhacks = (a%get_nrows() + hksz - 1) / hksz + nzt = a%nzt + + if (present(side)) then + if (side == 'L') then + ! $ a parallel loop collapse(2) present(a, d) + !$acc parallel loop present(a, d) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + k = (j - a%hkoffs(i)) / nzt + (i - 1) * hksz + 1 + a%val(j) = a%val(j) * d(k) + end do + end do + else if (side == 'R') then + ! $ a parallel loop collapse(2) present(a, d) + !$acc parallel loop present(a, d) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + a%val(j) = a%val(j) * d(a%ja(j)) + end do + end do + end if + else + ! $ a parallel loop collapse(2) present(a, d) + !$acc parallel loop present(a, d) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + a%val(j) = a%val(j) * d(j - a%hkoffs(i) + 1) + end do + end do + end if + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_z_oacc_hll_scal +end submodule psb_z_oacc_hll_scal_impl diff --git a/openacc/impl/psb_z_oacc_hll_scals.F90 b/openacc/impl/psb_z_oacc_hll_scals.F90 new file mode 100644 index 00000000..888115cd --- /dev/null +++ b/openacc/impl/psb_z_oacc_hll_scals.F90 @@ -0,0 +1,40 @@ +submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_scals_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_hll_scals(d, a, info) + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'scal' + integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + hksz = a%hksz + nhacks = (a%get_nrows() + hksz - 1) / hksz + nzt = a%nzt + + ! $ a parallel loop collapse(2) present(a) + !$acc parallel loop present(a) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + a%val(j) = a%val(j) * d + end do + end do + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_z_oacc_hll_scals +end submodule psb_z_oacc_hll_scals_impl diff --git a/openacc/impl/psb_z_oacc_hll_vect_mv.F90 b/openacc/impl/psb_z_oacc_hll_vect_mv.F90 new file mode 100644 index 00000000..89d970c0 --- /dev/null +++ b/openacc/impl/psb_z_oacc_hll_vect_mv.F90 @@ -0,0 +1,67 @@ +submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_vect_mv_impl + use psb_base_mod +contains + module subroutine psb_z_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans) + implicit none + + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_oacc_hll_sparse_mat), intent(in) :: a + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: m, n, nhacks, hksz + + info = psb_success_ + m = a%get_nrows() + n = a%get_ncols() + nhacks = size(a%hkoffs) - 1 + hksz = a%hksz + + if ((n /= size(x%v)) .or. (m /= size(y%v))) then + write(0,*) 'Size error ', m, n, size(x%v), size(y%v) + info = psb_err_invalid_mat_state_ + return + end if + + if (a%is_host()) call a%sync() + if (x%is_host()) call x%sync() + if (y%is_host()) call y%sync() + + call inner_spmv(m, nhacks, hksz, alpha, a%val, a%ja, a%hkoffs, x%v, beta, y%v, info) + call y%set_dev() + + contains + + subroutine inner_spmv(m, nhacks, hksz, alpha, val, ja, hkoffs, x, beta, y, info) + implicit none + integer(psb_ipk_) :: m, nhacks, hksz + complex(psb_dpk_), intent(in) :: alpha, beta + complex(psb_dpk_) :: val(:), x(:), y(:) + integer(psb_ipk_) :: ja(:), hkoffs(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, j, idx, k + complex(psb_dpk_) :: tmp + + info = 0 + + !$acc parallel loop present(val, ja, hkoffs, x, y) + do i = 1, nhacks + do k = 0, hksz - 1 + idx = hkoffs(i) + k + if (idx <= hkoffs(i + 1) - 1) then + tmp = 0.0_psb_dpk_ + !$acc loop seq + do j = hkoffs(i) + k, hkoffs(i + 1) - 1, hksz + if (ja(j) > 0) then + tmp = tmp + val(j) * x(ja(j)) + end if + end do + y(k + 1 + (i - 1) * hksz) = alpha * tmp + beta * y(k + 1 + (i - 1) * hksz) + end if + end do + end do + end subroutine inner_spmv + + end subroutine psb_z_oacc_hll_vect_mv +end submodule psb_z_oacc_hll_vect_mv_impl diff --git a/openacc/psb_c_oacc_ell_mat_mod.F90 b/openacc/psb_c_oacc_ell_mat_mod.F90 new file mode 100644 index 00000000..102d41c5 --- /dev/null +++ b/openacc/psb_c_oacc_ell_mat_mod.F90 @@ -0,0 +1,341 @@ +module psb_c_oacc_ell_mat_mod + use iso_c_binding + use psb_c_mat_mod + use psb_c_ell_mat_mod + use psb_c_oacc_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_c_ell_sparse_mat) :: psb_c_oacc_ell_sparse_mat + integer(psb_ipk_) :: devstate = is_host + contains + procedure, nopass :: get_fmt => c_oacc_ell_get_fmt + procedure, pass(a) :: sizeof => c_oacc_ell_sizeof + procedure, pass(a) :: is_host => c_oacc_ell_is_host + procedure, pass(a) :: is_sync => c_oacc_ell_is_sync + procedure, pass(a) :: is_dev => c_oacc_ell_is_dev + procedure, pass(a) :: set_host => c_oacc_ell_set_host + procedure, pass(a) :: set_sync => c_oacc_ell_set_sync + procedure, pass(a) :: set_dev => c_oacc_ell_set_dev + procedure, pass(a) :: sync_space => c_oacc_ell_sync_space + procedure, pass(a) :: sync => c_oacc_ell_sync + procedure, pass(a) :: free => c_oacc_ell_free + procedure, pass(a) :: vect_mv => psb_c_oacc_ell_vect_mv + procedure, pass(a) :: in_vect_sv => psb_c_oacc_ell_inner_vect_sv + procedure, pass(a) :: csmm => psb_c_oacc_ell_csmm + procedure, pass(a) :: csmv => psb_c_oacc_ell_csmv + procedure, pass(a) :: scals => psb_c_oacc_ell_scals + procedure, pass(a) :: scalv => psb_c_oacc_ell_scal + procedure, pass(a) :: reallocate_nz => psb_c_oacc_ell_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_oacc_ell_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_c_oacc_ell_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_oacc_ell_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_oacc_ell_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_oacc_ell_mv_from_fmt + procedure, pass(a) :: mold => psb_c_oacc_ell_mold + + end type psb_c_oacc_ell_sparse_mat + + interface + module subroutine psb_c_oacc_ell_mold(a,b,info) + class(psb_c_oacc_ell_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_ell_mold + end interface + + interface + module subroutine psb_c_oacc_ell_cp_from_fmt(a,b,info) + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_ell_cp_from_fmt + end interface + + interface + module subroutine psb_c_oacc_ell_mv_from_coo(a,b,info) + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_ell_mv_from_coo + end interface + + interface + module subroutine psb_c_oacc_ell_mv_from_fmt(a,b,info) + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_ell_mv_from_fmt + end interface + + interface + module subroutine psb_c_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans) + class(psb_c_oacc_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_oacc_ell_vect_mv + end interface + + interface + module subroutine psb_c_oacc_ell_inner_vect_sv(alpha, a, x, beta, y, info, trans) + class(psb_c_oacc_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x,y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_oacc_ell_inner_vect_sv + end interface + + interface + module subroutine psb_c_oacc_ell_csmm(alpha, a, x, beta, y, info, trans) + class(psb_c_oacc_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_oacc_ell_csmm + end interface + + interface + module subroutine psb_c_oacc_ell_csmv(alpha, a, x, beta, y, info, trans) + class(psb_c_oacc_ell_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_oacc_ell_csmv + end interface + + interface + module subroutine psb_c_oacc_ell_scals(d, a, info) + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_ell_scals + end interface + + interface + module subroutine psb_c_oacc_ell_scal(d,a,info,side) + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: side + end subroutine psb_c_oacc_ell_scal + end interface + + interface + module subroutine psb_c_oacc_ell_reallocate_nz(nz,a) + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + end subroutine psb_c_oacc_ell_reallocate_nz + end interface + + interface + module subroutine psb_c_oacc_ell_allocate_mnnz(m,n,a,nz) + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: m,n + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_oacc_ell_allocate_mnnz + end interface + + interface + module subroutine psb_c_oacc_ell_cp_from_coo(a,b,info) + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_ell_cp_from_coo + end interface + +contains + + subroutine c_oacc_ell_free(a) + use psb_base_mod + implicit none + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + if (allocated(a%val)) then + !$acc exit data delete(a%val) + end if + if (allocated(a%ja)) then + !$acc exit data delete(a%ja) + end if + if (allocated(a%irn)) then + !$acc exit data delete(a%irn) + end if + if (allocated(a%idiag)) then + !$acc exit data delete(a%idiag) + end if + + call a%psb_c_ell_sparse_mat%free() + + return + end subroutine c_oacc_ell_free + + + function c_oacc_ell_sizeof(a) result(res) + implicit none + class(psb_c_oacc_ell_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%ja) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + + end function c_oacc_ell_sizeof + + subroutine c_oacc_ell_sync_space(a) + implicit none + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + + if (allocated(a%val)) then + call c_oacc_create_dev(a%val) + end if + if (allocated(a%ja)) then + call i_oacc_create_dev(a%ja) + end if + if (allocated(a%irn)) then + call i_oacc_create_dev_scalar(a%irn) + end if + if (allocated(a%idiag)) then + call i_oacc_create_dev_scalar(a%idiag) + end if + + contains + subroutine c_oacc_create_dev(v) + implicit none + complex(psb_spk_), intent(in) :: v(:,:) + !$acc enter data copyin(v) + end subroutine c_oacc_create_dev + + subroutine i_oacc_create_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:,:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev + + subroutine i_oacc_create_dev_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev_scalar + end subroutine c_oacc_ell_sync_space + + + + function c_oacc_ell_is_host(a) result(res) + implicit none + class(psb_c_oacc_ell_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function c_oacc_ell_is_host + + function c_oacc_ell_is_sync(a) result(res) + implicit none + class(psb_c_oacc_ell_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function c_oacc_ell_is_sync + + function c_oacc_ell_is_dev(a) result(res) + implicit none + class(psb_c_oacc_ell_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function c_oacc_ell_is_dev + + subroutine c_oacc_ell_set_host(a) + implicit none + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine c_oacc_ell_set_host + + subroutine c_oacc_ell_set_sync(a) + implicit none + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine c_oacc_ell_set_sync + + subroutine c_oacc_ell_set_dev(a) + implicit none + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine c_oacc_ell_set_dev + + function c_oacc_ell_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELL_oacc' + end function c_oacc_ell_get_fmt + + subroutine c_oacc_ell_sync(a) + implicit none + class(psb_c_oacc_ell_sparse_mat), target, intent(in) :: a + class(psb_c_oacc_ell_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (a%is_dev()) then + call c_oacc_ell_to_host(a%val) + call i_oacc_ell_to_host(a%ja) + call i_oacc_ell_to_host_scalar(a%irn) + call i_oacc_ell_to_host_scalar(a%idiag) + else if (a%is_host()) then + call c_oacc_ell_to_dev(a%val) + call i_oacc_ell_to_dev(a%ja) + call i_oacc_ell_to_dev_scalar(a%irn) + call i_oacc_ell_to_dev_scalar(a%idiag) + end if + call tmpa%set_sync() + end subroutine c_oacc_ell_sync + + subroutine c_oacc_ell_to_host(v) + implicit none + complex(psb_spk_), intent(in) :: v(:,:) + !$acc update self(v) + end subroutine c_oacc_ell_to_host + + subroutine c_oacc_ell_to_host_scalar(v) + implicit none + complex(psb_spk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine c_oacc_ell_to_host_scalar + + subroutine i_oacc_ell_to_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:,:) + !$acc update device(v) + end subroutine i_oacc_ell_to_dev + + subroutine i_oacc_ell_to_dev_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine i_oacc_ell_to_dev_scalar + + subroutine i_oacc_ell_to_host(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:,:) + !$acc update self(v) + end subroutine i_oacc_ell_to_host + + subroutine i_oacc_ell_to_host_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine i_oacc_ell_to_host_scalar +end module psb_c_oacc_ell_mat_mod diff --git a/openacc/psb_c_oacc_hll_mat_mod.F90 b/openacc/psb_c_oacc_hll_mat_mod.F90 new file mode 100644 index 00000000..faad0a1b --- /dev/null +++ b/openacc/psb_c_oacc_hll_mat_mod.F90 @@ -0,0 +1,352 @@ +module psb_c_oacc_hll_mat_mod + use iso_c_binding + use psb_c_mat_mod + use psb_c_hll_mat_mod + use psb_c_oacc_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_c_hll_sparse_mat) :: psb_c_oacc_hll_sparse_mat + integer(psb_ipk_) :: devstate = is_host + contains + procedure, nopass :: get_fmt => c_oacc_hll_get_fmt + procedure, pass(a) :: sizeof => c_oacc_hll_sizeof + procedure, pass(a) :: is_host => c_oacc_hll_is_host + procedure, pass(a) :: is_sync => c_oacc_hll_is_sync + procedure, pass(a) :: is_dev => c_oacc_hll_is_dev + procedure, pass(a) :: set_host => c_oacc_hll_set_host + procedure, pass(a) :: set_sync => c_oacc_hll_set_sync + procedure, pass(a) :: set_dev => c_oacc_hll_set_dev + procedure, pass(a) :: sync_space => c_oacc_hll_sync_space + procedure, pass(a) :: sync => c_oacc_hll_sync + procedure, pass(a) :: free => c_oacc_hll_free + procedure, pass(a) :: vect_mv => psb_c_oacc_hll_vect_mv + procedure, pass(a) :: in_vect_sv => psb_c_oacc_hll_inner_vect_sv + procedure, pass(a) :: csmm => psb_c_oacc_hll_csmm + procedure, pass(a) :: csmv => psb_c_oacc_hll_csmv + procedure, pass(a) :: scals => psb_c_oacc_hll_scals + procedure, pass(a) :: scalv => psb_c_oacc_hll_scal + procedure, pass(a) :: reallocate_nz => psb_c_oacc_hll_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_oacc_hll_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_c_oacc_hll_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_oacc_hll_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_oacc_hll_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_oacc_hll_mv_from_fmt + procedure, pass(a) :: mold => psb_c_oacc_hll_mold + + end type psb_c_oacc_hll_sparse_mat + + interface + module subroutine psb_c_oacc_hll_mold(a,b,info) + class(psb_c_oacc_hll_sparse_mat), intent(in) :: a + class(psb_c_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_hll_mold + end interface + + interface + module subroutine psb_c_oacc_hll_cp_from_fmt(a,b,info) + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_hll_cp_from_fmt + end interface + + interface + module subroutine psb_c_oacc_hll_mv_from_coo(a,b,info) + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_hll_mv_from_coo + end interface + + interface + module subroutine psb_c_oacc_hll_mv_from_fmt(a,b,info) + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_c_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_hll_mv_from_fmt + end interface + + interface + module subroutine psb_c_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans) + class(psb_c_oacc_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_oacc_hll_vect_mv + end interface + + interface + module subroutine psb_c_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans) + class(psb_c_oacc_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta + class(psb_c_base_vect_type), intent(inout) :: x,y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_oacc_hll_inner_vect_sv + end interface + + interface + module subroutine psb_c_oacc_hll_csmm(alpha, a, x, beta, y, info, trans) + class(psb_c_oacc_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_oacc_hll_csmm + end interface + + interface + module subroutine psb_c_oacc_hll_csmv(alpha, a, x, beta, y, info, trans) + class(psb_c_oacc_hll_sparse_mat), intent(in) :: a + complex(psb_spk_), intent(in) :: alpha, beta, x(:) + complex(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_c_oacc_hll_csmv + end interface + + interface + module subroutine psb_c_oacc_hll_scals(d, a, info) + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_hll_scals + end interface + + interface + module subroutine psb_c_oacc_hll_scal(d,a,info,side) + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + complex(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: side + end subroutine psb_c_oacc_hll_scal + end interface + + interface + module subroutine psb_c_oacc_hll_reallocate_nz(nz,a) + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + end subroutine psb_c_oacc_hll_reallocate_nz + end interface + + interface + module subroutine psb_c_oacc_hll_allocate_mnnz(m,n,a,nz) + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: m,n + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_c_oacc_hll_allocate_mnnz + end interface + + interface + module subroutine psb_c_oacc_hll_cp_from_coo(a,b,info) + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_c_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_c_oacc_hll_cp_from_coo + end interface + +contains + + subroutine c_oacc_hll_free(a) + use psb_base_mod + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + if (allocated(a%val)) then + !$acc exit data delete(a%val) + end if + if (allocated(a%ja)) then + !$acc exit data delete(a%ja) + end if + if (allocated(a%irn)) then + !$acc exit data delete(a%irn) + end if + if (allocated(a%idiag)) then + !$acc exit data delete(a%idiag) + end if + if (allocated(a%hkoffs)) then + !$acc exit data delete(a%hkoffs) + end if + + call a%psb_c_hll_sparse_mat%free() + + return + end subroutine c_oacc_hll_free + + function c_oacc_hll_sizeof(a) result(res) + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%ja) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%hkoffs) + end function c_oacc_hll_sizeof + + + + function c_oacc_hll_is_host(a) result(res) + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function c_oacc_hll_is_host + + function c_oacc_hll_is_sync(a) result(res) + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function c_oacc_hll_is_sync + + function c_oacc_hll_is_dev(a) result(res) + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function c_oacc_hll_is_dev + + subroutine c_oacc_hll_set_host(a) + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine c_oacc_hll_set_host + + subroutine c_oacc_hll_set_sync(a) + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine c_oacc_hll_set_sync + + subroutine c_oacc_hll_set_dev(a) + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine c_oacc_hll_set_dev + + function c_oacc_hll_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLL_oacc' + end function c_oacc_hll_get_fmt + + subroutine c_oacc_hll_sync_space(a) + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + + if (allocated(a%val)) then + call c_oacc_create_dev(a%val) + end if + if (allocated(a%ja)) then + call i_oacc_create_dev(a%ja) + end if + if (allocated(a%irn)) then + call i_oacc_create_dev_scalar(a%irn) + end if + if (allocated(a%idiag)) then + call i_oacc_create_dev_scalar(a%idiag) + end if + if (allocated(a%hkoffs)) then + call i_oacc_create_dev_scalar(a%hkoffs) + end if + + contains + subroutine c_oacc_create_dev(v) + implicit none + complex(psb_spk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine c_oacc_create_dev + + subroutine i_oacc_create_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev + + subroutine i_oacc_create_dev_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev_scalar + + end subroutine c_oacc_hll_sync_space + + + subroutine c_oacc_hll_sync(a) + implicit none + class(psb_c_oacc_hll_sparse_mat), target, intent(in) :: a + class(psb_c_oacc_hll_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (a%is_dev()) then + call c_oacc_hll_to_host(a%val) + call i_oacc_hll_to_host(a%ja) + call i_oacc_hll_to_host_scalar(a%irn) + call i_oacc_hll_to_host_scalar(a%idiag) + call i_oacc_hll_to_host_scalar(a%hkoffs) + else if (a%is_host()) then + call c_oacc_hll_to_dev(a%val) + call i_oacc_hll_to_dev(a%ja) + call i_oacc_hll_to_dev_scalar(a%irn) + call i_oacc_hll_to_dev_scalar(a%idiag) + call i_oacc_hll_to_dev_scalar(a%hkoffs) + end if + call tmpa%set_sync() + end subroutine c_oacc_hll_sync + + subroutine c_oacc_hll_to_host(v) + implicit none + complex(psb_spk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine c_oacc_hll_to_host + + subroutine c_oacc_hll_to_dev(v) + implicit none + complex(psb_spk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine c_oacc_hll_to_dev + + subroutine i_oacc_hll_to_host(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine i_oacc_hll_to_host + + subroutine i_oacc_hll_to_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine i_oacc_hll_to_dev + + subroutine i_oacc_hll_to_host_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine i_oacc_hll_to_host_scalar + + subroutine i_oacc_hll_to_dev_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine i_oacc_hll_to_dev_scalar + + +end module psb_c_oacc_hll_mat_mod diff --git a/openacc/psb_c_oacc_vect_mod.F90 b/openacc/psb_c_oacc_vect_mod.F90 index 6f9545ea..9225f159 100644 --- a/openacc/psb_c_oacc_vect_mod.F90 +++ b/openacc/psb_c_oacc_vect_mod.F90 @@ -42,6 +42,7 @@ module psb_c_oacc_vect_mod procedure, pass(y) :: sctb_buf => c_oacc_sctb_buf procedure, pass(x) :: get_size => c_oacc_get_size + procedure, pass(x) :: dot_v => c_oacc_vect_dot procedure, pass(x) :: dot_a => c_oacc_dot_a procedure, pass(y) :: axpby_v => c_oacc_axpby_v @@ -70,7 +71,6 @@ module psb_c_oacc_vect_mod end subroutine c_oacc_mlt_v end interface - interface subroutine c_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) import @@ -83,7 +83,7 @@ module psb_c_oacc_vect_mod character(len=1), intent(in), optional :: conjgx, conjgy end subroutine c_oacc_mlt_v_2 end interface - + contains subroutine c_oacc_absval1(x) @@ -432,7 +432,7 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space(info) + if (ii%is_host()) call ii%sync_space() if (y%is_host()) call y%sync_space() !$acc parallel loop @@ -459,7 +459,7 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space(info) + if (ii%is_host()) call ii%sync_space() class default call psb_errpush(info, 'c_oacc_sctb_x') return @@ -475,8 +475,6 @@ contains call y%set_dev() end subroutine c_oacc_sctb_x - - subroutine c_oacc_sctb(n, idx, x, beta, y) use psb_base_mod implicit none @@ -498,7 +496,6 @@ contains call y%set_host() end subroutine c_oacc_sctb - subroutine c_oacc_gthzbuf(i, n, idx, x) use psb_base_mod implicit none @@ -515,7 +512,7 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space(info) + if (ii%is_host()) call ii%sync_space() class default call psb_errpush(info, 'c_oacc_gthzbuf') return @@ -542,7 +539,7 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space(info) + if (ii%is_host()) call ii%sync_space() class default call psb_errpush(info, 'c_oacc_gthzv_x') return @@ -577,7 +574,7 @@ contains select type(vval => val) type is (psb_c_vect_oacc) if (vval%is_host()) call vval%sync_space() - if (virl%is_host()) call virl%sync_space(info) + if (virl%is_host()) call virl%sync_space() if (x%is_host()) call x%sync_space() !$acc parallel loop do i = 1, n @@ -591,7 +588,7 @@ contains if (.not.done_oacc) then select type(virl => irl) type is (psb_i_vect_oacc) - if (virl%is_dev()) call virl%sync_space(info) + if (virl%is_dev()) call virl%sync_space() end select select type(vval => val) type is (psb_c_vect_oacc) @@ -607,8 +604,6 @@ contains end subroutine c_oacc_ins_v - - subroutine c_oacc_ins_a(n, irl, val, dupl, x, info) use psi_serial_mod implicit none @@ -628,8 +623,6 @@ contains end subroutine c_oacc_ins_a - - subroutine c_oacc_bld_mn(x, n) use psb_base_mod implicit none @@ -668,7 +661,6 @@ contains end subroutine c_oacc_bld_x - subroutine c_oacc_asb_m(n, x, info) use psb_base_mod implicit none @@ -696,8 +688,6 @@ contains end if end subroutine c_oacc_asb_m - - subroutine c_oacc_set_scal(x, val, first, last) class(psb_c_vect_oacc), intent(inout) :: x complex(psb_spk_), intent(in) :: val @@ -718,8 +708,6 @@ contains call x%set_dev() end subroutine c_oacc_set_scal - - subroutine c_oacc_zero(x) use psi_serial_mod implicit none @@ -743,6 +731,7 @@ contains end function c_oacc_get_fmt + function c_oacc_vect_dot(n, x, y) result(res) implicit none class(psb_c_vect_oacc), intent(inout) :: x @@ -776,9 +765,6 @@ contains end function c_oacc_vect_dot - - - function c_oacc_dot_a(n, x, y) result(res) implicit none class(psb_c_vect_oacc), intent(inout) :: x @@ -910,7 +896,6 @@ contains end if end subroutine c_oacc_vect_all - subroutine c_oacc_vect_free(x, info) implicit none class(psb_c_vect_oacc), intent(inout) :: x diff --git a/openacc/psb_d_oacc_ell_mat_mod.F90 b/openacc/psb_d_oacc_ell_mat_mod.F90 index 8c5946ba..042c0ff3 100644 --- a/openacc/psb_d_oacc_ell_mat_mod.F90 +++ b/openacc/psb_d_oacc_ell_mat_mod.F90 @@ -1,343 +1,341 @@ module psb_d_oacc_ell_mat_mod - use iso_c_binding - use psb_d_mat_mod - use psb_d_ell_mat_mod - use psb_d_oacc_vect_mod - - integer(psb_ipk_), parameter, private :: is_host = -1 - integer(psb_ipk_), parameter, private :: is_sync = 0 - integer(psb_ipk_), parameter, private :: is_dev = 1 - - type, extends(psb_d_ell_sparse_mat) :: psb_d_oacc_ell_sparse_mat - integer(psb_ipk_) :: devstate = is_host - contains - procedure, nopass :: get_fmt => d_oacc_ell_get_fmt - procedure, pass(a) :: sizeof => d_oacc_ell_sizeof - procedure, pass(a) :: is_host => d_oacc_ell_is_host - procedure, pass(a) :: is_sync => d_oacc_ell_is_sync - procedure, pass(a) :: is_dev => d_oacc_ell_is_dev - procedure, pass(a) :: set_host => d_oacc_ell_set_host - procedure, pass(a) :: set_sync => d_oacc_ell_set_sync - procedure, pass(a) :: set_dev => d_oacc_ell_set_dev - procedure, pass(a) :: sync_space => d_oacc_ell_sync_space - procedure, pass(a) :: sync => d_oacc_ell_sync - procedure, pass(a) :: free => d_oacc_ell_free - procedure, pass(a) :: vect_mv => psb_d_oacc_ell_vect_mv - procedure, pass(a) :: in_vect_sv => psb_d_oacc_ell_inner_vect_sv - procedure, pass(a) :: csmm => psb_d_oacc_ell_csmm - procedure, pass(a) :: csmv => psb_d_oacc_ell_csmv - procedure, pass(a) :: scals => psb_d_oacc_ell_scals - procedure, pass(a) :: scalv => psb_d_oacc_ell_scal - procedure, pass(a) :: reallocate_nz => psb_d_oacc_ell_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_d_oacc_ell_allocate_mnnz - procedure, pass(a) :: cp_from_coo => psb_d_oacc_ell_cp_from_coo - procedure, pass(a) :: cp_from_fmt => psb_d_oacc_ell_cp_from_fmt - procedure, pass(a) :: mv_from_coo => psb_d_oacc_ell_mv_from_coo - procedure, pass(a) :: mv_from_fmt => psb_d_oacc_ell_mv_from_fmt - procedure, pass(a) :: mold => psb_d_oacc_ell_mold - - end type psb_d_oacc_ell_sparse_mat - - interface - module subroutine psb_d_oacc_ell_mold(a,b,info) - class(psb_d_oacc_ell_sparse_mat), intent(in) :: a - class(psb_d_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_oacc_ell_mold - end interface - - interface - module subroutine psb_d_oacc_ell_cp_from_fmt(a,b,info) - class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a - class(psb_d_base_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_oacc_ell_cp_from_fmt - end interface - - interface - module subroutine psb_d_oacc_ell_mv_from_coo(a,b,info) - class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a - class(psb_d_coo_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_oacc_ell_mv_from_coo - end interface - - interface - module subroutine psb_d_oacc_ell_mv_from_fmt(a,b,info) - class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a - class(psb_d_base_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_oacc_ell_mv_from_fmt - end interface - - interface - module subroutine psb_d_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans) - class(psb_d_oacc_ell_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta - class(psb_d_base_vect_type), intent(inout) :: x, y - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_d_oacc_ell_vect_mv - end interface - - interface - module subroutine psb_d_oacc_ell_inner_vect_sv(alpha, a, x, beta, y, info, trans) - class(psb_d_oacc_ell_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta - class(psb_d_base_vect_type), intent(inout) :: x,y - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_d_oacc_ell_inner_vect_sv - end interface - - interface - module subroutine psb_d_oacc_ell_csmm(alpha, a, x, beta, y, info, trans) - class(psb_d_oacc_ell_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) - real(psb_dpk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_d_oacc_ell_csmm - end interface - - interface - module subroutine psb_d_oacc_ell_csmv(alpha, a, x, beta, y, info, trans) - class(psb_d_oacc_ell_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta, x(:) - real(psb_dpk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_d_oacc_ell_csmv - end interface - - interface - module subroutine psb_d_oacc_ell_scals(d, a, info) - class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a - real(psb_dpk_), intent(in) :: d - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_oacc_ell_scals - end interface - - interface - module subroutine psb_d_oacc_ell_scal(d,a,info,side) - class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a - real(psb_dpk_), intent(in) :: d(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: side - end subroutine psb_d_oacc_ell_scal - end interface - - interface - module subroutine psb_d_oacc_ell_reallocate_nz(nz,a) - class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(in) :: nz - end subroutine psb_d_oacc_ell_reallocate_nz - end interface - - interface - module subroutine psb_d_oacc_ell_allocate_mnnz(m,n,a,nz) - class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(in) :: m,n - integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_d_oacc_ell_allocate_mnnz - end interface - - interface - module subroutine psb_d_oacc_ell_cp_from_coo(a,b,info) - class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a - class(psb_d_coo_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_oacc_ell_cp_from_coo - end interface - - contains - - subroutine d_oacc_ell_free(a) - use psb_base_mod - implicit none - class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a - integer(psb_ipk_) :: info - - if (allocated(a%val)) then - !$acc exit data delete(a%val) - end if - if (allocated(a%ja)) then - !$acc exit data delete(a%ja) - end if - if (allocated(a%irn)) then - !$acc exit data delete(a%irn) - end if - if (allocated(a%idiag)) then - !$acc exit data delete(a%idiag) - end if - - call a%psb_d_ell_sparse_mat%free() - - return - end subroutine d_oacc_ell_free - - - function d_oacc_ell_sizeof(a) result(res) - implicit none - class(psb_d_oacc_ell_sparse_mat), intent(in) :: a - integer(psb_epk_) :: res - - if (a%is_dev()) call a%sync() - - res = 8 - res = res + psb_sizeof_dp * size(a%val) - res = res + psb_sizeof_ip * size(a%ja) - res = res + psb_sizeof_ip * size(a%irn) - res = res + psb_sizeof_ip * size(a%idiag) - - end function d_oacc_ell_sizeof - - subroutine d_oacc_ell_sync_space(a) - implicit none - class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a - - if (allocated(a%val)) then - call d_oacc_create_dev(a%val) - end if - if (allocated(a%ja)) then - call i_oacc_create_dev(a%ja) - end if - if (allocated(a%irn)) then - call i_oacc_create_dev_scalar(a%irn) - end if - if (allocated(a%idiag)) then - call i_oacc_create_dev_scalar(a%idiag) - end if - - contains - subroutine d_oacc_create_dev(v) - implicit none - real(psb_dpk_), intent(in) :: v(:,:) - !$acc enter data copyin(v) - end subroutine d_oacc_create_dev - - subroutine i_oacc_create_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:,:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev - - subroutine i_oacc_create_dev_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev_scalar - end subroutine d_oacc_ell_sync_space - - - - function d_oacc_ell_is_host(a) result(res) - implicit none - class(psb_d_oacc_ell_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_host) - end function d_oacc_ell_is_host - - function d_oacc_ell_is_sync(a) result(res) - implicit none - class(psb_d_oacc_ell_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_sync) - end function d_oacc_ell_is_sync - - function d_oacc_ell_is_dev(a) result(res) - implicit none - class(psb_d_oacc_ell_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_dev) - end function d_oacc_ell_is_dev - - subroutine d_oacc_ell_set_host(a) - implicit none - class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a - - a%devstate = is_host - end subroutine d_oacc_ell_set_host - - subroutine d_oacc_ell_set_sync(a) - implicit none - class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a - - a%devstate = is_sync - end subroutine d_oacc_ell_set_sync - - subroutine d_oacc_ell_set_dev(a) - implicit none - class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a - - a%devstate = is_dev - end subroutine d_oacc_ell_set_dev - - function d_oacc_ell_get_fmt() result(res) - implicit none - character(len=5) :: res - res = 'ELL_oacc' - end function d_oacc_ell_get_fmt - - subroutine d_oacc_ell_sync(a) - implicit none - class(psb_d_oacc_ell_sparse_mat), target, intent(in) :: a - class(psb_d_oacc_ell_sparse_mat), pointer :: tmpa - integer(psb_ipk_) :: info - - tmpa => a - if (a%is_dev()) then - call d_oacc_ell_to_host(a%val) - call i_oacc_ell_to_host(a%ja) - call i_oacc_ell_to_host_scalar(a%irn) - call i_oacc_ell_to_host_scalar(a%idiag) - else if (a%is_host()) then - call d_oacc_ell_to_dev(a%val) - call i_oacc_ell_to_dev(a%ja) - call i_oacc_ell_to_dev_scalar(a%irn) - call i_oacc_ell_to_dev_scalar(a%idiag) - end if - call tmpa%set_sync() - end subroutine d_oacc_ell_sync - - subroutine d_oacc_ell_to_host(v) - implicit none - real(psb_dpk_), intent(in) :: v(:,:) - !$acc update self(v) - end subroutine d_oacc_ell_to_host - - subroutine d_oacc_ell_to_host_scalar(v) - implicit none - real(psb_dpk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine d_oacc_ell_to_host_scalar - - subroutine i_oacc_ell_to_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:,:) - !$acc update device(v) - end subroutine i_oacc_ell_to_dev - - subroutine i_oacc_ell_to_dev_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine i_oacc_ell_to_dev_scalar - - subroutine i_oacc_ell_to_host(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:,:) - !$acc update self(v) - end subroutine i_oacc_ell_to_host - - subroutine i_oacc_ell_to_host_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine i_oacc_ell_to_host_scalar - - -end module psb_d_oacc_ell_mat_mod \ No newline at end of file + use iso_c_binding + use psb_d_mat_mod + use psb_d_ell_mat_mod + use psb_d_oacc_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_d_ell_sparse_mat) :: psb_d_oacc_ell_sparse_mat + integer(psb_ipk_) :: devstate = is_host + contains + procedure, nopass :: get_fmt => d_oacc_ell_get_fmt + procedure, pass(a) :: sizeof => d_oacc_ell_sizeof + procedure, pass(a) :: is_host => d_oacc_ell_is_host + procedure, pass(a) :: is_sync => d_oacc_ell_is_sync + procedure, pass(a) :: is_dev => d_oacc_ell_is_dev + procedure, pass(a) :: set_host => d_oacc_ell_set_host + procedure, pass(a) :: set_sync => d_oacc_ell_set_sync + procedure, pass(a) :: set_dev => d_oacc_ell_set_dev + procedure, pass(a) :: sync_space => d_oacc_ell_sync_space + procedure, pass(a) :: sync => d_oacc_ell_sync + procedure, pass(a) :: free => d_oacc_ell_free + procedure, pass(a) :: vect_mv => psb_d_oacc_ell_vect_mv + procedure, pass(a) :: in_vect_sv => psb_d_oacc_ell_inner_vect_sv + procedure, pass(a) :: csmm => psb_d_oacc_ell_csmm + procedure, pass(a) :: csmv => psb_d_oacc_ell_csmv + procedure, pass(a) :: scals => psb_d_oacc_ell_scals + procedure, pass(a) :: scalv => psb_d_oacc_ell_scal + procedure, pass(a) :: reallocate_nz => psb_d_oacc_ell_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_oacc_ell_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_d_oacc_ell_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_oacc_ell_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_oacc_ell_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_oacc_ell_mv_from_fmt + procedure, pass(a) :: mold => psb_d_oacc_ell_mold + + end type psb_d_oacc_ell_sparse_mat + + interface + module subroutine psb_d_oacc_ell_mold(a,b,info) + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_ell_mold + end interface + + interface + module subroutine psb_d_oacc_ell_cp_from_fmt(a,b,info) + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_ell_cp_from_fmt + end interface + + interface + module subroutine psb_d_oacc_ell_mv_from_coo(a,b,info) + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_ell_mv_from_coo + end interface + + interface + module subroutine psb_d_oacc_ell_mv_from_fmt(a,b,info) + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_ell_mv_from_fmt + end interface + + interface + module subroutine psb_d_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_ell_vect_mv + end interface + + interface + module subroutine psb_d_oacc_ell_inner_vect_sv(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x,y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_ell_inner_vect_sv + end interface + + interface + module subroutine psb_d_oacc_ell_csmm(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_ell_csmm + end interface + + interface + module subroutine psb_d_oacc_ell_csmv(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_ell_csmv + end interface + + interface + module subroutine psb_d_oacc_ell_scals(d, a, info) + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_ell_scals + end interface + + interface + module subroutine psb_d_oacc_ell_scal(d,a,info,side) + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: side + end subroutine psb_d_oacc_ell_scal + end interface + + interface + module subroutine psb_d_oacc_ell_reallocate_nz(nz,a) + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + end subroutine psb_d_oacc_ell_reallocate_nz + end interface + + interface + module subroutine psb_d_oacc_ell_allocate_mnnz(m,n,a,nz) + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: m,n + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_oacc_ell_allocate_mnnz + end interface + + interface + module subroutine psb_d_oacc_ell_cp_from_coo(a,b,info) + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_ell_cp_from_coo + end interface + +contains + + subroutine d_oacc_ell_free(a) + use psb_base_mod + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + if (allocated(a%val)) then + !$acc exit data delete(a%val) + end if + if (allocated(a%ja)) then + !$acc exit data delete(a%ja) + end if + if (allocated(a%irn)) then + !$acc exit data delete(a%irn) + end if + if (allocated(a%idiag)) then + !$acc exit data delete(a%idiag) + end if + + call a%psb_d_ell_sparse_mat%free() + + return + end subroutine d_oacc_ell_free + + + function d_oacc_ell_sizeof(a) result(res) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%ja) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + + end function d_oacc_ell_sizeof + + subroutine d_oacc_ell_sync_space(a) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + + if (allocated(a%val)) then + call d_oacc_create_dev(a%val) + end if + if (allocated(a%ja)) then + call i_oacc_create_dev(a%ja) + end if + if (allocated(a%irn)) then + call i_oacc_create_dev_scalar(a%irn) + end if + if (allocated(a%idiag)) then + call i_oacc_create_dev_scalar(a%idiag) + end if + + contains + subroutine d_oacc_create_dev(v) + implicit none + real(psb_dpk_), intent(in) :: v(:,:) + !$acc enter data copyin(v) + end subroutine d_oacc_create_dev + + subroutine i_oacc_create_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:,:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev + + subroutine i_oacc_create_dev_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev_scalar + end subroutine d_oacc_ell_sync_space + + + + function d_oacc_ell_is_host(a) result(res) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function d_oacc_ell_is_host + + function d_oacc_ell_is_sync(a) result(res) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function d_oacc_ell_is_sync + + function d_oacc_ell_is_dev(a) result(res) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function d_oacc_ell_is_dev + + subroutine d_oacc_ell_set_host(a) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine d_oacc_ell_set_host + + subroutine d_oacc_ell_set_sync(a) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine d_oacc_ell_set_sync + + subroutine d_oacc_ell_set_dev(a) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine d_oacc_ell_set_dev + + function d_oacc_ell_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELL_oacc' + end function d_oacc_ell_get_fmt + + subroutine d_oacc_ell_sync(a) + implicit none + class(psb_d_oacc_ell_sparse_mat), target, intent(in) :: a + class(psb_d_oacc_ell_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (a%is_dev()) then + call d_oacc_ell_to_host(a%val) + call i_oacc_ell_to_host(a%ja) + call i_oacc_ell_to_host_scalar(a%irn) + call i_oacc_ell_to_host_scalar(a%idiag) + else if (a%is_host()) then + call d_oacc_ell_to_dev(a%val) + call i_oacc_ell_to_dev(a%ja) + call i_oacc_ell_to_dev_scalar(a%irn) + call i_oacc_ell_to_dev_scalar(a%idiag) + end if + call tmpa%set_sync() + end subroutine d_oacc_ell_sync + + subroutine d_oacc_ell_to_host(v) + implicit none + real(psb_dpk_), intent(in) :: v(:,:) + !$acc update self(v) + end subroutine d_oacc_ell_to_host + + subroutine d_oacc_ell_to_host_scalar(v) + implicit none + real(psb_dpk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine d_oacc_ell_to_host_scalar + + subroutine i_oacc_ell_to_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:,:) + !$acc update device(v) + end subroutine i_oacc_ell_to_dev + + subroutine i_oacc_ell_to_dev_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine i_oacc_ell_to_dev_scalar + + subroutine i_oacc_ell_to_host(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:,:) + !$acc update self(v) + end subroutine i_oacc_ell_to_host + + subroutine i_oacc_ell_to_host_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine i_oacc_ell_to_host_scalar +end module psb_d_oacc_ell_mat_mod diff --git a/openacc/psb_d_oacc_hll_mat_mod.F90 b/openacc/psb_d_oacc_hll_mat_mod.F90 index 530af94a..b1c36a65 100644 --- a/openacc/psb_d_oacc_hll_mat_mod.F90 +++ b/openacc/psb_d_oacc_hll_mat_mod.F90 @@ -1,352 +1,352 @@ module psb_d_oacc_hll_mat_mod - use iso_c_binding - use psb_d_mat_mod - use psb_d_hll_mat_mod - use psb_d_oacc_vect_mod - - integer(psb_ipk_), parameter, private :: is_host = -1 - integer(psb_ipk_), parameter, private :: is_sync = 0 - integer(psb_ipk_), parameter, private :: is_dev = 1 - - type, extends(psb_d_hll_sparse_mat) :: psb_d_oacc_hll_sparse_mat - integer(psb_ipk_) :: devstate = is_host - contains - procedure, nopass :: get_fmt => d_oacc_hll_get_fmt - procedure, pass(a) :: sizeof => d_oacc_hll_sizeof - procedure, pass(a) :: is_host => d_oacc_hll_is_host - procedure, pass(a) :: is_sync => d_oacc_hll_is_sync - procedure, pass(a) :: is_dev => d_oacc_hll_is_dev - procedure, pass(a) :: set_host => d_oacc_hll_set_host - procedure, pass(a) :: set_sync => d_oacc_hll_set_sync - procedure, pass(a) :: set_dev => d_oacc_hll_set_dev - procedure, pass(a) :: sync_space => d_oacc_hll_sync_space - procedure, pass(a) :: sync => d_oacc_hll_sync - procedure, pass(a) :: free => d_oacc_hll_free - procedure, pass(a) :: vect_mv => psb_d_oacc_hll_vect_mv - procedure, pass(a) :: in_vect_sv => psb_d_oacc_hll_inner_vect_sv - procedure, pass(a) :: csmm => psb_d_oacc_hll_csmm - procedure, pass(a) :: csmv => psb_d_oacc_hll_csmv - procedure, pass(a) :: scals => psb_d_oacc_hll_scals - procedure, pass(a) :: scalv => psb_d_oacc_hll_scal - procedure, pass(a) :: reallocate_nz => psb_d_oacc_hll_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_d_oacc_hll_allocate_mnnz - procedure, pass(a) :: cp_from_coo => psb_d_oacc_hll_cp_from_coo - procedure, pass(a) :: cp_from_fmt => psb_d_oacc_hll_cp_from_fmt - procedure, pass(a) :: mv_from_coo => psb_d_oacc_hll_mv_from_coo - procedure, pass(a) :: mv_from_fmt => psb_d_oacc_hll_mv_from_fmt - procedure, pass(a) :: mold => psb_d_oacc_hll_mold - - end type psb_d_oacc_hll_sparse_mat - - interface - module subroutine psb_d_oacc_hll_mold(a,b,info) - class(psb_d_oacc_hll_sparse_mat), intent(in) :: a - class(psb_d_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_oacc_hll_mold - end interface - - interface - module subroutine psb_d_oacc_hll_cp_from_fmt(a,b,info) - class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a - class(psb_d_base_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_oacc_hll_cp_from_fmt - end interface - - interface - module subroutine psb_d_oacc_hll_mv_from_coo(a,b,info) - class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a - class(psb_d_coo_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_oacc_hll_mv_from_coo - end interface - - interface - module subroutine psb_d_oacc_hll_mv_from_fmt(a,b,info) - class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a - class(psb_d_base_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_oacc_hll_mv_from_fmt - end interface - - interface - module subroutine psb_d_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans) - class(psb_d_oacc_hll_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta - class(psb_d_base_vect_type), intent(inout) :: x, y - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_d_oacc_hll_vect_mv - end interface - - interface - module subroutine psb_d_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans) - class(psb_d_oacc_hll_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta - class(psb_d_base_vect_type), intent(inout) :: x,y - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_d_oacc_hll_inner_vect_sv - end interface - - interface - module subroutine psb_d_oacc_hll_csmm(alpha, a, x, beta, y, info, trans) - class(psb_d_oacc_hll_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) - real(psb_dpk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_d_oacc_hll_csmm - end interface - - interface - module subroutine psb_d_oacc_hll_csmv(alpha, a, x, beta, y, info, trans) - class(psb_d_oacc_hll_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta, x(:) - real(psb_dpk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_d_oacc_hll_csmv - end interface - - interface - module subroutine psb_d_oacc_hll_scals(d, a, info) - class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a - real(psb_dpk_), intent(in) :: d - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_oacc_hll_scals - end interface - - interface - module subroutine psb_d_oacc_hll_scal(d,a,info,side) - class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a - real(psb_dpk_), intent(in) :: d(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: side - end subroutine psb_d_oacc_hll_scal - end interface - - interface - module subroutine psb_d_oacc_hll_reallocate_nz(nz,a) - class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(in) :: nz - end subroutine psb_d_oacc_hll_reallocate_nz - end interface - - interface - module subroutine psb_d_oacc_hll_allocate_mnnz(m,n,a,nz) - class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(in) :: m,n - integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_d_oacc_hll_allocate_mnnz - end interface - - interface - module subroutine psb_d_oacc_hll_cp_from_coo(a,b,info) - class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a - class(psb_d_coo_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_oacc_hll_cp_from_coo - end interface - - contains - - subroutine d_oacc_hll_free(a) - use psb_base_mod - implicit none - class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a - integer(psb_ipk_) :: info - - if (allocated(a%val)) then - !$acc exit data delete(a%val) - end if - if (allocated(a%ja)) then - !$acc exit data delete(a%ja) - end if - if (allocated(a%irn)) then - !$acc exit data delete(a%irn) - end if - if (allocated(a%idiag)) then - !$acc exit data delete(a%idiag) - end if - if (allocated(a%hkoffs)) then - !$acc exit data delete(a%hkoffs) - end if - - call a%psb_d_hll_sparse_mat%free() - - return - end subroutine d_oacc_hll_free - - function d_oacc_hll_sizeof(a) result(res) - implicit none - class(psb_d_oacc_hll_sparse_mat), intent(in) :: a - integer(psb_epk_) :: res - - if (a%is_dev()) call a%sync() - - res = 8 - res = res + psb_sizeof_dp * size(a%val) - res = res + psb_sizeof_ip * size(a%ja) - res = res + psb_sizeof_ip * size(a%irn) - res = res + psb_sizeof_ip * size(a%idiag) - res = res + psb_sizeof_ip * size(a%hkoffs) - end function d_oacc_hll_sizeof - - - - function d_oacc_hll_is_host(a) result(res) - implicit none - class(psb_d_oacc_hll_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_host) - end function d_oacc_hll_is_host - - function d_oacc_hll_is_sync(a) result(res) - implicit none - class(psb_d_oacc_hll_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_sync) - end function d_oacc_hll_is_sync - - function d_oacc_hll_is_dev(a) result(res) - implicit none - class(psb_d_oacc_hll_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_dev) - end function d_oacc_hll_is_dev - - subroutine d_oacc_hll_set_host(a) - implicit none - class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a - - a%devstate = is_host - end subroutine d_oacc_hll_set_host - - subroutine d_oacc_hll_set_sync(a) - implicit none - class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a - - a%devstate = is_sync - end subroutine d_oacc_hll_set_sync - - subroutine d_oacc_hll_set_dev(a) - implicit none - class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a - - a%devstate = is_dev - end subroutine d_oacc_hll_set_dev - - function d_oacc_hll_get_fmt() result(res) - implicit none - character(len=5) :: res - res = 'HLL_oacc' - end function d_oacc_hll_get_fmt - - subroutine d_oacc_hll_sync_space(a) - implicit none - class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a - - if (allocated(a%val)) then - call d_oacc_create_dev(a%val) - end if - if (allocated(a%ja)) then - call i_oacc_create_dev(a%ja) - end if - if (allocated(a%irn)) then - call i_oacc_create_dev_scalar(a%irn) - end if - if (allocated(a%idiag)) then - call i_oacc_create_dev_scalar(a%idiag) - end if - if (allocated(a%hkoffs)) then - call i_oacc_create_dev_scalar(a%hkoffs) - end if - - contains - subroutine d_oacc_create_dev(v) - implicit none - real(psb_dpk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine d_oacc_create_dev - - subroutine i_oacc_create_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev - - subroutine i_oacc_create_dev_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev_scalar - - end subroutine d_oacc_hll_sync_space - - - subroutine d_oacc_hll_sync(a) - implicit none - class(psb_d_oacc_hll_sparse_mat), target, intent(in) :: a - class(psb_d_oacc_hll_sparse_mat), pointer :: tmpa - integer(psb_ipk_) :: info - - tmpa => a - if (a%is_dev()) then - call d_oacc_hll_to_host(a%val) - call i_oacc_hll_to_host(a%ja) - call i_oacc_hll_to_host_scalar(a%irn) - call i_oacc_hll_to_host_scalar(a%idiag) - call i_oacc_hll_to_host_scalar(a%hkoffs) - else if (a%is_host()) then - call d_oacc_hll_to_dev(a%val) - call i_oacc_hll_to_dev(a%ja) - call i_oacc_hll_to_dev_scalar(a%irn) - call i_oacc_hll_to_dev_scalar(a%idiag) - call i_oacc_hll_to_dev_scalar(a%hkoffs) - end if - call tmpa%set_sync() - end subroutine d_oacc_hll_sync - - subroutine d_oacc_hll_to_host(v) - implicit none - real(psb_dpk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine d_oacc_hll_to_host - - subroutine d_oacc_hll_to_dev(v) - implicit none - real(psb_dpk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine d_oacc_hll_to_dev - - subroutine i_oacc_hll_to_host(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine i_oacc_hll_to_host - - subroutine i_oacc_hll_to_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine i_oacc_hll_to_dev - - subroutine i_oacc_hll_to_host_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine i_oacc_hll_to_host_scalar - - subroutine i_oacc_hll_to_dev_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine i_oacc_hll_to_dev_scalar - - -end module psb_d_oacc_hll_mat_mod \ No newline at end of file + use iso_c_binding + use psb_d_mat_mod + use psb_d_hll_mat_mod + use psb_d_oacc_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_d_hll_sparse_mat) :: psb_d_oacc_hll_sparse_mat + integer(psb_ipk_) :: devstate = is_host + contains + procedure, nopass :: get_fmt => d_oacc_hll_get_fmt + procedure, pass(a) :: sizeof => d_oacc_hll_sizeof + procedure, pass(a) :: is_host => d_oacc_hll_is_host + procedure, pass(a) :: is_sync => d_oacc_hll_is_sync + procedure, pass(a) :: is_dev => d_oacc_hll_is_dev + procedure, pass(a) :: set_host => d_oacc_hll_set_host + procedure, pass(a) :: set_sync => d_oacc_hll_set_sync + procedure, pass(a) :: set_dev => d_oacc_hll_set_dev + procedure, pass(a) :: sync_space => d_oacc_hll_sync_space + procedure, pass(a) :: sync => d_oacc_hll_sync + procedure, pass(a) :: free => d_oacc_hll_free + procedure, pass(a) :: vect_mv => psb_d_oacc_hll_vect_mv + procedure, pass(a) :: in_vect_sv => psb_d_oacc_hll_inner_vect_sv + procedure, pass(a) :: csmm => psb_d_oacc_hll_csmm + procedure, pass(a) :: csmv => psb_d_oacc_hll_csmv + procedure, pass(a) :: scals => psb_d_oacc_hll_scals + procedure, pass(a) :: scalv => psb_d_oacc_hll_scal + procedure, pass(a) :: reallocate_nz => psb_d_oacc_hll_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_oacc_hll_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_d_oacc_hll_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_oacc_hll_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_oacc_hll_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_oacc_hll_mv_from_fmt + procedure, pass(a) :: mold => psb_d_oacc_hll_mold + + end type psb_d_oacc_hll_sparse_mat + + interface + module subroutine psb_d_oacc_hll_mold(a,b,info) + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_hll_mold + end interface + + interface + module subroutine psb_d_oacc_hll_cp_from_fmt(a,b,info) + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_hll_cp_from_fmt + end interface + + interface + module subroutine psb_d_oacc_hll_mv_from_coo(a,b,info) + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_hll_mv_from_coo + end interface + + interface + module subroutine psb_d_oacc_hll_mv_from_fmt(a,b,info) + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_hll_mv_from_fmt + end interface + + interface + module subroutine psb_d_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_hll_vect_mv + end interface + + interface + module subroutine psb_d_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x,y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_hll_inner_vect_sv + end interface + + interface + module subroutine psb_d_oacc_hll_csmm(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_hll_csmm + end interface + + interface + module subroutine psb_d_oacc_hll_csmv(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_hll_csmv + end interface + + interface + module subroutine psb_d_oacc_hll_scals(d, a, info) + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_hll_scals + end interface + + interface + module subroutine psb_d_oacc_hll_scal(d,a,info,side) + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: side + end subroutine psb_d_oacc_hll_scal + end interface + + interface + module subroutine psb_d_oacc_hll_reallocate_nz(nz,a) + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + end subroutine psb_d_oacc_hll_reallocate_nz + end interface + + interface + module subroutine psb_d_oacc_hll_allocate_mnnz(m,n,a,nz) + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: m,n + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_oacc_hll_allocate_mnnz + end interface + + interface + module subroutine psb_d_oacc_hll_cp_from_coo(a,b,info) + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_hll_cp_from_coo + end interface + +contains + + subroutine d_oacc_hll_free(a) + use psb_base_mod + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + if (allocated(a%val)) then + !$acc exit data delete(a%val) + end if + if (allocated(a%ja)) then + !$acc exit data delete(a%ja) + end if + if (allocated(a%irn)) then + !$acc exit data delete(a%irn) + end if + if (allocated(a%idiag)) then + !$acc exit data delete(a%idiag) + end if + if (allocated(a%hkoffs)) then + !$acc exit data delete(a%hkoffs) + end if + + call a%psb_d_hll_sparse_mat%free() + + return + end subroutine d_oacc_hll_free + + function d_oacc_hll_sizeof(a) result(res) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%ja) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%hkoffs) + end function d_oacc_hll_sizeof + + + + function d_oacc_hll_is_host(a) result(res) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function d_oacc_hll_is_host + + function d_oacc_hll_is_sync(a) result(res) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function d_oacc_hll_is_sync + + function d_oacc_hll_is_dev(a) result(res) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function d_oacc_hll_is_dev + + subroutine d_oacc_hll_set_host(a) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine d_oacc_hll_set_host + + subroutine d_oacc_hll_set_sync(a) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine d_oacc_hll_set_sync + + subroutine d_oacc_hll_set_dev(a) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine d_oacc_hll_set_dev + + function d_oacc_hll_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLL_oacc' + end function d_oacc_hll_get_fmt + + subroutine d_oacc_hll_sync_space(a) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + + if (allocated(a%val)) then + call d_oacc_create_dev(a%val) + end if + if (allocated(a%ja)) then + call i_oacc_create_dev(a%ja) + end if + if (allocated(a%irn)) then + call i_oacc_create_dev_scalar(a%irn) + end if + if (allocated(a%idiag)) then + call i_oacc_create_dev_scalar(a%idiag) + end if + if (allocated(a%hkoffs)) then + call i_oacc_create_dev_scalar(a%hkoffs) + end if + + contains + subroutine d_oacc_create_dev(v) + implicit none + real(psb_dpk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine d_oacc_create_dev + + subroutine i_oacc_create_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev + + subroutine i_oacc_create_dev_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev_scalar + + end subroutine d_oacc_hll_sync_space + + + subroutine d_oacc_hll_sync(a) + implicit none + class(psb_d_oacc_hll_sparse_mat), target, intent(in) :: a + class(psb_d_oacc_hll_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (a%is_dev()) then + call d_oacc_hll_to_host(a%val) + call i_oacc_hll_to_host(a%ja) + call i_oacc_hll_to_host_scalar(a%irn) + call i_oacc_hll_to_host_scalar(a%idiag) + call i_oacc_hll_to_host_scalar(a%hkoffs) + else if (a%is_host()) then + call d_oacc_hll_to_dev(a%val) + call i_oacc_hll_to_dev(a%ja) + call i_oacc_hll_to_dev_scalar(a%irn) + call i_oacc_hll_to_dev_scalar(a%idiag) + call i_oacc_hll_to_dev_scalar(a%hkoffs) + end if + call tmpa%set_sync() + end subroutine d_oacc_hll_sync + + subroutine d_oacc_hll_to_host(v) + implicit none + real(psb_dpk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine d_oacc_hll_to_host + + subroutine d_oacc_hll_to_dev(v) + implicit none + real(psb_dpk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine d_oacc_hll_to_dev + + subroutine i_oacc_hll_to_host(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine i_oacc_hll_to_host + + subroutine i_oacc_hll_to_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine i_oacc_hll_to_dev + + subroutine i_oacc_hll_to_host_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine i_oacc_hll_to_host_scalar + + subroutine i_oacc_hll_to_dev_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine i_oacc_hll_to_dev_scalar + + +end module psb_d_oacc_hll_mat_mod diff --git a/openacc/psb_d_oacc_vect_mod.F90 b/openacc/psb_d_oacc_vect_mod.F90 index 7d51766d..0dff0f27 100644 --- a/openacc/psb_d_oacc_vect_mod.F90 +++ b/openacc/psb_d_oacc_vect_mod.F90 @@ -42,6 +42,7 @@ module psb_d_oacc_vect_mod procedure, pass(y) :: sctb_buf => d_oacc_sctb_buf procedure, pass(x) :: get_size => d_oacc_get_size + procedure, pass(x) :: dot_v => d_oacc_vect_dot procedure, pass(x) :: dot_a => d_oacc_dot_a procedure, pass(y) :: axpby_v => d_oacc_axpby_v @@ -70,7 +71,6 @@ module psb_d_oacc_vect_mod end subroutine d_oacc_mlt_v end interface - interface subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) import @@ -83,7 +83,7 @@ module psb_d_oacc_vect_mod character(len=1), intent(in), optional :: conjgx, conjgy end subroutine d_oacc_mlt_v_2 end interface - + contains subroutine d_oacc_absval1(x) @@ -432,7 +432,7 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space(info) + if (ii%is_host()) call ii%sync_space() if (y%is_host()) call y%sync_space() !$acc parallel loop @@ -459,7 +459,7 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space(info) + if (ii%is_host()) call ii%sync_space() class default call psb_errpush(info, 'd_oacc_sctb_x') return @@ -475,8 +475,6 @@ contains call y%set_dev() end subroutine d_oacc_sctb_x - - subroutine d_oacc_sctb(n, idx, x, beta, y) use psb_base_mod implicit none @@ -498,7 +496,6 @@ contains call y%set_host() end subroutine d_oacc_sctb - subroutine d_oacc_gthzbuf(i, n, idx, x) use psb_base_mod implicit none @@ -515,7 +512,7 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space(info) + if (ii%is_host()) call ii%sync_space() class default call psb_errpush(info, 'd_oacc_gthzbuf') return @@ -542,7 +539,7 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space(info) + if (ii%is_host()) call ii%sync_space() class default call psb_errpush(info, 'd_oacc_gthzv_x') return @@ -577,7 +574,7 @@ contains select type(vval => val) type is (psb_d_vect_oacc) if (vval%is_host()) call vval%sync_space() - if (virl%is_host()) call virl%sync_space(info) + if (virl%is_host()) call virl%sync_space() if (x%is_host()) call x%sync_space() !$acc parallel loop do i = 1, n @@ -591,7 +588,7 @@ contains if (.not.done_oacc) then select type(virl => irl) type is (psb_i_vect_oacc) - if (virl%is_dev()) call virl%sync_space(info) + if (virl%is_dev()) call virl%sync_space() end select select type(vval => val) type is (psb_d_vect_oacc) @@ -607,8 +604,6 @@ contains end subroutine d_oacc_ins_v - - subroutine d_oacc_ins_a(n, irl, val, dupl, x, info) use psi_serial_mod implicit none @@ -628,8 +623,6 @@ contains end subroutine d_oacc_ins_a - - subroutine d_oacc_bld_mn(x, n) use psb_base_mod implicit none @@ -668,7 +661,6 @@ contains end subroutine d_oacc_bld_x - subroutine d_oacc_asb_m(n, x, info) use psb_base_mod implicit none @@ -696,8 +688,6 @@ contains end if end subroutine d_oacc_asb_m - - subroutine d_oacc_set_scal(x, val, first, last) class(psb_d_vect_oacc), intent(inout) :: x real(psb_dpk_), intent(in) :: val @@ -718,8 +708,6 @@ contains call x%set_dev() end subroutine d_oacc_set_scal - - subroutine d_oacc_zero(x) use psi_serial_mod implicit none @@ -743,6 +731,7 @@ contains end function d_oacc_get_fmt + function d_oacc_vect_dot(n, x, y) result(res) implicit none class(psb_d_vect_oacc), intent(inout) :: x @@ -776,9 +765,6 @@ contains end function d_oacc_vect_dot - - - function d_oacc_dot_a(n, x, y) result(res) implicit none class(psb_d_vect_oacc), intent(inout) :: x @@ -910,7 +896,6 @@ contains end if end subroutine d_oacc_vect_all - subroutine d_oacc_vect_free(x, info) implicit none class(psb_d_vect_oacc), intent(inout) :: x diff --git a/openacc/psb_i_oacc_vect_mod.F90 b/openacc/psb_i_oacc_vect_mod.F90 index 70fc325e..72e9ada2 100644 --- a/openacc/psb_i_oacc_vect_mod.F90 +++ b/openacc/psb_i_oacc_vect_mod.F90 @@ -1,455 +1,505 @@ module psb_i_oacc_vect_mod - use iso_c_binding - use psb_const_mod - use psb_error_mod - use psb_i_vect_mod - - integer(psb_ipk_), parameter, private :: is_host = -1 - integer(psb_ipk_), parameter, private :: is_sync = 0 - integer(psb_ipk_), parameter, private :: is_dev = 1 - - type, extends(psb_i_base_vect_type) :: psb_i_vect_oacc - integer :: state = is_host - contains - procedure, pass(x) :: get_nrows => i_oacc_get_nrows - procedure, nopass :: get_fmt => i_oacc_get_fmt - - procedure, pass(x) :: all => i_oacc_all - procedure, pass(x) :: zero => i_oacc_zero - procedure, pass(x) :: asb_m => i_oacc_asb_m - procedure, pass(x) :: sync => i_oacc_sync - procedure, pass(x) :: sync_space => i_oacc_sync_space - procedure, pass(x) :: bld_x => i_oacc_bld_x - procedure, pass(x) :: bld_mn => i_oacc_bld_mn - procedure, pass(x) :: free => i_oacc_free - procedure, pass(x) :: ins_a => i_oacc_ins_a - procedure, pass(x) :: ins_v => i_oacc_ins_v - procedure, pass(x) :: is_host => i_oacc_is_host - procedure, pass(x) :: is_dev => i_oacc_is_dev - procedure, pass(x) :: is_sync => i_oacc_is_sync - procedure, pass(x) :: set_host => i_oacc_set_host - procedure, pass(x) :: set_dev => i_oacc_set_dev - procedure, pass(x) :: set_sync => i_oacc_set_sync - procedure, pass(x) :: set_scal => i_oacc_set_scal - procedure, pass(x) :: gthzv_x => i_oacc_gthzv_x - procedure, pass(y) :: sctb => i_oacc_sctb - procedure, pass(y) :: sctb_x => i_oacc_sctb_x - procedure, pass(x) :: gthzbuf => i_oacc_gthzbuf - procedure, pass(y) :: sctb_buf => i_oacc_sctb_buf - - final :: i_oacc_vect_finalize - end type psb_i_vect_oacc - - public :: psb_i_vect_oacc_ - private :: constructor - interface psb_i_vect_oacc_ - module procedure constructor - end interface psb_i_vect_oacc_ + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_i_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_i_base_vect_type) :: psb_i_vect_oacc + integer :: state = is_host + + contains + procedure, pass(x) :: get_nrows => i_oacc_get_nrows + procedure, nopass :: get_fmt => i_oacc_get_fmt + + procedure, pass(x) :: all => i_oacc_vect_all + procedure, pass(x) :: zero => i_oacc_zero + procedure, pass(x) :: asb_m => i_oacc_asb_m + procedure, pass(x) :: sync => i_oacc_sync + procedure, pass(x) :: sync_space => i_oacc_sync_space + procedure, pass(x) :: bld_x => i_oacc_bld_x + procedure, pass(x) :: bld_mn => i_oacc_bld_mn + procedure, pass(x) :: free => i_oacc_vect_free + procedure, pass(x) :: ins_a => i_oacc_ins_a + procedure, pass(x) :: ins_v => i_oacc_ins_v + procedure, pass(x) :: is_host => i_oacc_is_host + procedure, pass(x) :: is_dev => i_oacc_is_dev + procedure, pass(x) :: is_sync => i_oacc_is_sync + procedure, pass(x) :: set_host => i_oacc_set_host + procedure, pass(x) :: set_dev => i_oacc_set_dev + procedure, pass(x) :: set_sync => i_oacc_set_sync + procedure, pass(x) :: set_scal => i_oacc_set_scal + + procedure, pass(x) :: gthzv_x => i_oacc_gthzv_x + procedure, pass(x) :: gthzbuf_x => i_oacc_gthzbuf + procedure, pass(y) :: sctb => i_oacc_sctb + procedure, pass(y) :: sctb_x => i_oacc_sctb_x + procedure, pass(y) :: sctb_buf => i_oacc_sctb_buf + + procedure, pass(x) :: get_size => i_oacc_get_size + + + end type psb_i_vect_oacc + contains - function constructor(x) result(this) - integer(psb_ipk_) :: x(:) - type(psb_i_vect_oacc) :: this - integer(psb_ipk_) :: info - this%v = x - call this%asb(size(x), info) - end function constructor + subroutine i_oacc_sctb_buf(i, n, idx, beta, y) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + integer(psb_ipk_) :: beta + class(psb_i_vect_oacc) :: y + integer(psb_ipk_) :: info + if (.not.allocated(y%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') + return + end if - subroutine i_oacc_gthzv_x(i, n, idx, x, y) - implicit none - integer(psb_ipk_) :: i, n - class(psb_i_base_vect_type) :: idx - integer(psb_ipk_) :: y(:) - class(psb_i_vect_oacc) :: x - integer :: info + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync_space() + if (y%is_host()) call y%sync_space() - !$acc parallel loop - do i = 1, n - y(i) = x%v(idx%v(i)) - end do - end subroutine i_oacc_gthzv_x + !$acc parallel loop + do i = 1, n + y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i) + end do - subroutine i_oacc_gthzbuf(i, n, idx, x) - implicit none - integer(psb_ipk_) :: i, n - class(psb_i_base_vect_type) :: idx - class(psb_i_vect_oacc) :: x - integer :: info + class default + !$acc parallel loop + do i = 1, n + y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i) + end do + end select + end subroutine i_oacc_sctb_buf - if (.not.allocated(x%combuf)) then - call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') - return - end if + subroutine i_oacc_sctb_x(i, n, idx, x, beta, y) + use psb_base_mod + implicit none + integer(psb_ipk_):: i, n + class(psb_i_base_vect_type) :: idx + integer(psb_ipk_) :: beta, x(:) + class(psb_i_vect_oacc) :: y + integer(psb_ipk_) :: info, ni - !$acc parallel loop - do i = 1, n - x%combuf(i) = x%v(idx%v(i)) - end do - end subroutine i_oacc_gthzbuf + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync_space() + class default + call psb_errpush(info, 'i_oacc_sctb_x') + return + end select - subroutine i_oacc_sctb(n, idx, x, beta, y) - implicit none - integer(psb_ipk_) :: n, idx(:) - integer(psb_ipk_) :: beta, x(:) - class(psb_i_vect_oacc) :: y - integer(psb_ipk_) :: info - integer :: i + if (y%is_host()) call y%sync_space() - if (n == 0) return + !$acc parallel loop + do i = 1, n + y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i) + end do + call y%set_dev() + end subroutine i_oacc_sctb_x + + subroutine i_oacc_sctb(n, idx, x, beta, y) + use psb_base_mod + implicit none + integer(psb_ipk_) :: n + integer(psb_ipk_) :: idx(:) + integer(psb_ipk_) :: beta, x(:) + class(psb_i_vect_oacc) :: y + integer(psb_ipk_) :: info + integer(psb_ipk_) :: i + + if (n == 0) return + if (y%is_dev()) call y%sync_space() + + !$acc parallel loop + do i = 1, n + y%v(idx(i)) = beta * y%v(idx(i)) + x(i) + end do + + call y%set_host() + end subroutine i_oacc_sctb + + subroutine i_oacc_gthzbuf(i, n, idx, x) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + class(psb_i_vect_oacc) :: x + integer(psb_ipk_) :: info + + info = 0 + if (.not.allocated(x%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') + return + end if + + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync_space() + class default + call psb_errpush(info, 'i_oacc_gthzbuf') + return + end select + + if (x%is_host()) call x%sync_space() + + !$acc parallel loop + do i = 1, n + x%combuf(i) = x%v(idx%v(i)) + end do + end subroutine i_oacc_gthzbuf + + subroutine i_oacc_gthzv_x(i, n, idx, x, y) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type):: idx + integer(psb_ipk_) :: y(:) + class(psb_i_vect_oacc):: x + integer(psb_ipk_) :: info + + info = 0 + + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync_space() + class default + call psb_errpush(info, 'i_oacc_gthzv_x') + return + end select + + if (x%is_host()) call x%sync_space() + + !$acc parallel loop + do i = 1, n + y(i) = x%v(idx%v(i)) + end do + end subroutine i_oacc_gthzv_x + + subroutine i_oacc_ins_v(n, irl, val, dupl, x, info) + use psi_serial_mod + implicit none + class(psb_i_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + class(psb_i_base_vect_type), intent(inout) :: irl + class(psb_i_base_vect_type), intent(inout) :: val + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, isz + logical :: done_oacc + + info = 0 + if (psb_errstatus_fatal()) return + + done_oacc = .false. + select type(virl => irl) + type is (psb_i_vect_oacc) + select type(vval => val) + type is (psb_i_vect_oacc) + if (vval%is_host()) call vval%sync_space() + if (virl%is_host()) call virl%sync_space() + if (x%is_host()) call x%sync_space() !$acc parallel loop do i = 1, n - y%v(idx(i)) = beta * y%v(idx(i)) + x(i) - end do - end subroutine i_oacc_sctb - - subroutine i_oacc_sctb_x(i, n, idx, x, beta, y) - implicit none - integer(psb_ipk_) :: i, n - class(psb_i_base_vect_type) :: idx - integer(psb_ipk_) :: beta, x(:) - class(psb_i_vect_oacc) :: y - integer :: info - - select type(ii => idx) - class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space(info) - if (y%is_host()) call y%sync_space(info) - - !$acc parallel loop - do i = 1, n - y%v(ii%v(i)) = beta * y%v(ii%v(i)) + x(i) - end do - - class default - !$acc parallel loop - do i = 1, n - y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i) - end do - end select - end subroutine i_oacc_sctb_x - - subroutine i_oacc_sctb_buf(i, n, idx, beta, y) - implicit none - integer(psb_ipk_) :: i, n - class(psb_i_base_vect_type) :: idx - integer(psb_ipk_) :: beta - class(psb_i_vect_oacc) :: y - integer(psb_ipk_) :: info - - if (.not.allocated(y%v)) then - call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') - return - end if - - !$acc parallel loop - do i = 1, n - y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%v(i) - end do - end subroutine i_oacc_sctb_buf - - subroutine i_oacc_set_host(x) - class(psb_i_vect_oacc), intent(inout) :: x - x%state = is_host - end subroutine i_oacc_set_host - - subroutine i_oacc_set_sync(x) - class(psb_i_vect_oacc), intent(inout) :: x - x%state = is_sync - end subroutine i_oacc_set_sync - - subroutine i_oacc_set_dev(x) - class(psb_i_vect_oacc), intent(inout) :: x - x%state = is_dev - end subroutine i_oacc_set_dev - - subroutine i_oacc_set_scal(x, val, first, last) - class(psb_i_vect_oacc), intent(inout) :: x - integer(psb_ipk_), intent(in) :: val - integer(psb_ipk_), optional :: first, last - - integer(psb_ipk_) :: first_, last_ - - first_ = 1 - last_ = size(x%v) - if (present(first)) first_ = max(1, first) - if (present(last)) last_ = min(size(x%v), last) - - !$acc parallel loop - do i = first_, last_ - x%v(i) = val + x%v(virl%v(i)) = vval%v(i) end do call x%set_dev() - end subroutine i_oacc_set_scal - - function i_oacc_is_host(x) result(res) - implicit none - class(psb_i_vect_oacc), intent(in) :: x - logical :: res - - res = (x%state == is_host) - end function i_oacc_is_host - - function i_oacc_is_dev(x) result(res) - implicit none - class(psb_i_vect_oacc), intent(in) :: x - logical :: res - - res = (x%state == is_dev) - end function i_oacc_is_dev - - function i_oacc_is_sync(x) result(res) - implicit none - class(psb_i_vect_oacc), intent(in) :: x - logical :: res - - res = (x%state == is_sync) - end function i_oacc_is_sync - - subroutine i_oacc_free(x, info) - use psb_error_mod - implicit none - class(psb_i_vect_oacc), intent(inout) :: x - integer(psb_ipk_), intent(out) :: info - - info = 0 - if (allocated(x%v)) deallocate(x%v, stat=info) - if (info /= 0) then - info = psb_err_alloc_dealloc_ - call psb_errpush(info, 'i_oacc_free') - end if - call x%set_sync() - end subroutine i_oacc_free - - subroutine i_oacc_ins_a(n, irl, val, dupl, x, info) - implicit none - class(psb_i_vect_oacc), intent(inout) :: x - integer(psb_ipk_), intent(in) :: n, dupl - integer(psb_ipk_), intent(in) :: irl(:) - integer(psb_ipk_), intent(in) :: val(:) - integer(psb_ipk_), intent(out) :: info - - integer(psb_ipk_) :: i - - info = 0 - if (x%is_dev()) call x%sync() - call x%psb_i_base_vect_type%ins(n, irl, val, dupl, info) + done_oacc = .true. + end select + end select + + if (.not.done_oacc) then + select type(virl => irl) + type is (psb_i_vect_oacc) + if (virl%is_dev()) call virl%sync_space() + end select + select type(vval => val) + type is (psb_i_vect_oacc) + if (vval%is_dev()) call vval%sync_space() + end select + call x%ins(n, irl%v, val%v, dupl, info) + end if + + if (info /= 0) then + call psb_errpush(info, 'oacc_vect_ins') + return + end if + + end subroutine i_oacc_ins_v + + subroutine i_oacc_ins_a(n, irl, val, dupl, x, info) + use psi_serial_mod + implicit none + class(psb_i_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + integer(psb_ipk_), intent(in) :: irl(:) + integer(psb_ipk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + if (x%is_dev()) call x%sync_space() + call x%psb_i_base_vect_type%ins(n, irl, val, dupl, info) + call x%set_host() + !$acc update device(x%v) + + end subroutine i_oacc_ins_a + + subroutine i_oacc_bld_mn(x, n) + use psb_base_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_i_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(n, info) + if (info /= 0) then + call psb_errpush(info, 'i_oacc_bld_mn', i_err=(/n, n, n, n, n/)) + end if + call x%set_host() + !$acc update device(x%v) + + end subroutine i_oacc_bld_mn + + + subroutine i_oacc_bld_x(x, this) + use psb_base_mod + implicit none + integer(psb_ipk_), intent(in) :: this(:) + class(psb_i_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + + call psb_realloc(size(this), x%v, info) + if (info /= 0) then + info = psb_err_alloc_request_ + call psb_errpush(info, 'i_oacc_bld_x', & + i_err=(/size(this), izero, izero, izero, izero/)) + return + end if + + x%v(:) = this(:) + call x%set_host() + !$acc update device(x%v) + + end subroutine i_oacc_bld_x + + subroutine i_oacc_asb_m(n, x, info) + use psb_base_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_i_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_mpk_) :: nd + + info = psb_success_ + + if (x%is_dev()) then + nd = size(x%v) + if (nd < n) then + call x%sync() + call x%psb_i_base_vect_type%asb(n, info) + if (info == psb_success_) call x%sync_space() call x%set_host() - end subroutine i_oacc_ins_a - - subroutine i_oacc_ins_v(n, irl, val, dupl, x, info) - implicit none - class(psb_i_vect_oacc), intent(inout) :: x - integer(psb_ipk_), intent(in) :: n, dupl - class(psb_i_base_vect_type), intent(inout) :: irl - class(psb_i_base_vect_type), intent(inout) :: val - integer(psb_ipk_), intent(out) :: info - - integer(psb_ipk_) :: i, isz - logical :: done_oacc - - info = 0 - if (psb_errstatus_fatal()) return - - done_oacc = .false. - select type(virl => irl) - class is (psb_i_vect_oacc) - select type(vval => val) - class is (psb_i_vect_oacc) - if (vval%is_host()) call vval%sync() - if (virl%is_host()) call virl%sync() - if (x%is_host()) call x%sync() - ! Add the OpenACC kernel call here if needed - call x%set_dev() - done_oacc = .true. - end select - end select - - if (.not.done_oacc) then - if (irl%is_dev()) call irl%sync() - if (val%is_dev()) call val%sync() - call x%ins(n, irl%v, val%v, dupl, info) - end if - - if (info /= 0) then - call psb_errpush(info,'i_oacc_ins_v') - return - end if - end subroutine i_oacc_ins_v - - subroutine i_oacc_bld_x(x, this) - use psb_error_mod - implicit none - integer(psb_ipk_), intent(in) :: this(:) - class(psb_i_vect_oacc), intent(inout) :: x - integer(psb_ipk_) :: info - - call psb_realloc(size(this), x%v, info) - if (info /= 0) then - info = psb_err_alloc_request_ - call psb_errpush(info, 'i_oacc_bld_x', i_err = (/size(this), izero, izero, izero, izero/)) - end if - x%v(:) = this(:) + end if + else + if (size(x%v) < n) then + call x%psb_i_base_vect_type%asb(n, info) + if (info == psb_success_) call x%sync_space() call x%set_host() - call x%sync() - end subroutine i_oacc_bld_x - - subroutine i_oacc_bld_mn(x, n) - use psb_error_mod - implicit none - integer(psb_mpk_), intent(in) :: n - class(psb_i_vect_oacc), intent(inout) :: x - integer(psb_ipk_) :: info - - call x%all(n, info) - if (info /= 0) then - call psb_errpush(info, 'i_oacc_bld_mn', i_err = (/n, n, n, n, n/)) - end if - end subroutine i_oacc_bld_mn - - subroutine i_oacc_sync(x) - use psb_error_mod - implicit none - class(psb_i_vect_oacc), intent(inout) :: x - integer(psb_ipk_) :: n, info - - info = 0 - if (x%is_host()) then - n = size(x%v) - if (.not.allocated(x%v)) then - write(*, *) 'Incoherent situation : x%v not allocated' - call psb_realloc(n, x%v, info) - end if - if ((n > size(x%v)) .or. (n > x%get_nrows())) then - write(*, *) 'Incoherent situation : sizes', n, size(x%v), x%get_nrows() - call psb_realloc(n, x%v, info) - end if - !$acc update device(x%v) - else if (x%is_dev()) then - n = size(x%v) - if (.not.allocated(x%v)) then - write(*, *) 'Incoherent situation : x%v not allocated' - call psb_realloc(n, x%v, info) - end if - !$acc update self(x%v) - end if - if (info == 0) call x%set_sync() - if (info /= 0) then - info = psb_err_internal_error_ - call psb_errpush(info, 'i_oacc_sync') - end if - end subroutine i_oacc_sync - - subroutine i_oacc_sync_space(x, info) - use psb_error_mod - implicit none - class(psb_i_vect_oacc), intent(inout) :: x - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: nh, nd - - info = 0 - if (x%is_dev()) then - nh = size(x%v) - nd = nh - if (nh < nd) then - call psb_realloc(nd, x%v, info) - end if - else - nh = size(x%v) - nd = nh - if (nh < nd) then - call psb_realloc(nd, x%v, info) - end if - end if - end subroutine i_oacc_sync_space - - function i_oacc_get_nrows(x) result(res) - implicit none - class(psb_i_vect_oacc), intent(in) :: x - integer(psb_ipk_) :: res - - res = 0 - if (allocated(x%v)) res = size(x%v) - end function i_oacc_get_nrows - - function i_oacc_get_fmt() result(res) - implicit none - character(len=5) :: res - res = 'iOACC' - end function i_oacc_get_fmt - - subroutine i_oacc_all(n, x, info) - use psb_error_mod - implicit none - class(psb_i_vect_oacc), intent(out) :: x - integer(psb_ipk_), intent(in) :: n - integer(psb_ipk_), intent(out) :: info - - call psb_realloc(n, x%v, info) - if (info == 0) call x%set_host() - if (info == 0) call x%sync_space(info) - if (info /= 0) then - info = psb_err_alloc_request_ - call psb_errpush(info, 'i_oacc_all', i_err=(/n, n, n, n, n/)) - end if - end subroutine i_oacc_all - - subroutine i_oacc_zero(x) - use psb_error_mod - implicit none - class(psb_i_vect_oacc), intent(inout) :: x - ! Ensure zeroing on the GPU side - call x%set_dev() - x%v = 0 - !$acc update device(x%v) - end subroutine i_oacc_zero - - subroutine i_oacc_asb_m(n, x, info) - use psb_error_mod - use psb_realloc_mod - implicit none - integer(psb_ipk_), intent(in) :: n - class(psb_i_vect_oacc), intent(inout) :: x - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: nh, nd - - info = 0 - if (x%is_dev()) then - nd = size(x%v) - if (nd < n) then - call x%sync() - call x%psb_i_base_vect_type%asb(n, info) - if (info == psb_success_) call x%sync_space(info) - call x%set_host() - end if - else - nh = size(x%v) - if (nh < n) then - call x%psb_i_base_vect_type%asb(n, info) - if (info == psb_success_) call x%sync_space(info) - call x%set_host() - end if - end if - end subroutine i_oacc_asb_m - - subroutine i_oacc_vect_finalize(x) - use psi_serial_mod - use psb_realloc_mod - implicit none - type(psb_i_vect_oacc), intent(inout) :: x - integer(psb_ipk_) :: info - - info = 0 - call x%free(info) - end subroutine i_oacc_vect_finalize + end if + end if + end subroutine i_oacc_asb_m -end module psb_i_oacc_vect_mod + subroutine i_oacc_set_scal(x, val, first, last) + class(psb_i_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: val + integer(psb_ipk_), optional :: first, last + + integer(psb_ipk_) :: first_, last_ + first_ = 1 + last_ = x%get_nrows() + if (present(first)) first_ = max(1, first) + if (present(last)) last_ = min(last, last_) + + !$acc parallel loop + do i = first_, last_ + x%v(i) = val + end do + !$acc end parallel loop + + call x%set_dev() + end subroutine i_oacc_set_scal + + subroutine i_oacc_zero(x) + use psi_serial_mod + implicit none + class(psb_i_vect_oacc), intent(inout) :: x + call x%set_dev() + call x%set_scal(izero) + end subroutine i_oacc_zero + + function i_oacc_get_nrows(x) result(res) + implicit none + class(psb_i_vect_oacc), intent(in) :: x + integer(psb_ipk_) :: res + + if (allocated(x%v)) res = size(x%v) + end function i_oacc_get_nrows + + function i_oacc_get_fmt() result(res) + implicit none + character(len=5) :: res + res = "iOACC" + + end function i_oacc_get_fmt + + ! subroutine i_oacc_set_vect(x,y) + ! implicit none + ! class(psb_i_vect_oacc), intent(inout) :: x + ! integer(psb_ipk_), intent(in) :: y(:) + ! integer(psb_ipk_) :: info + + ! if (size(x%v) /= size(y)) then + ! call x%free(info) + ! call x%all(size(y),info) + ! end if + ! x%v(:) = y(:) + ! call x%set_host() + ! end subroutine i_oacc_set_vect - - - - - \ No newline at end of file + subroutine i_oacc_to_dev(v) + implicit none + integer(psb_ipk_) :: v(:) + !$acc update device(v) + end subroutine i_oacc_to_dev + + subroutine i_oacc_to_host(v) + implicit none + integer(psb_ipk_) :: v(:) + !$acc update self(v) + end subroutine i_oacc_to_host + + subroutine i_oacc_sync_space(x) + implicit none + class(psb_i_vect_oacc), intent(inout) :: x + if (allocated(x%v)) then + call i_oacc_create_dev(x%v) + end if + contains + subroutine i_oacc_create_dev(v) + implicit none + integer(psb_ipk_) :: v(:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev + end subroutine i_oacc_sync_space + + subroutine i_oacc_sync(x) + implicit none + class(psb_i_vect_oacc), intent(inout) :: x + if (x%is_dev()) then + call i_oacc_to_host(x%v) + end if + if (x%is_host()) then + call i_oacc_to_dev(x%v) + end if + call x%set_sync() + end subroutine i_oacc_sync + + subroutine i_oacc_set_host(x) + implicit none + class(psb_i_vect_oacc), intent(inout) :: x + + x%state = is_host + end subroutine i_oacc_set_host + + subroutine i_oacc_set_dev(x) + implicit none + class(psb_i_vect_oacc), intent(inout) :: x + + x%state = is_dev + end subroutine i_oacc_set_dev + + subroutine i_oacc_set_sync(x) + implicit none + class(psb_i_vect_oacc), intent(inout) :: x + + x%state = is_sync + end subroutine i_oacc_set_sync + + function i_oacc_is_dev(x) result(res) + implicit none + class(psb_i_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function i_oacc_is_dev + + function i_oacc_is_host(x) result(res) + implicit none + class(psb_i_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function i_oacc_is_host + + function i_oacc_is_sync(x) result(res) + implicit none + class(psb_i_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function i_oacc_is_sync + + subroutine i_oacc_vect_all(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: n + class(psb_i_vect_oacc), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(n, x%v, info) + if (info == 0) then + call x%set_host() + !$acc enter data create(x%v) + call x%sync_space() + end if + if (info /= 0) then + info = psb_err_alloc_request_ + call psb_errpush(info, 'i_oacc_all', & + i_err=(/n, n, n, n, n/)) + end if + end subroutine i_oacc_vect_all + + subroutine i_oacc_vect_free(x, info) + implicit none + class(psb_i_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + info = 0 + if (allocated(x%v)) then + !$acc exit data delete(x%v) finalize + deallocate(x%v, stat=info) + end if + + end subroutine i_oacc_vect_free + + function i_oacc_get_size(x) result(res) + implicit none + class(psb_i_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: res + + if (x%is_dev()) call x%sync() + res = size(x%v) + end function i_oacc_get_size + +end module psb_i_oacc_vect_mod diff --git a/openacc/psb_l_oacc_vect_mod.F90 b/openacc/psb_l_oacc_vect_mod.F90 new file mode 100644 index 00000000..aeba4537 --- /dev/null +++ b/openacc/psb_l_oacc_vect_mod.F90 @@ -0,0 +1,507 @@ +module psb_l_oacc_vect_mod + use iso_c_binding + use psb_const_mod + use psb_error_mod + use psb_l_vect_mod + use psb_i_vect_mod + use psb_i_oacc_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_l_base_vect_type) :: psb_l_vect_oacc + integer :: state = is_host + + contains + procedure, pass(x) :: get_nrows => l_oacc_get_nrows + procedure, nopass :: get_fmt => l_oacc_get_fmt + + procedure, pass(x) :: all => l_oacc_vect_all + procedure, pass(x) :: zero => l_oacc_zero + procedure, pass(x) :: asb_m => l_oacc_asb_m + procedure, pass(x) :: sync => l_oacc_sync + procedure, pass(x) :: sync_space => l_oacc_sync_space + procedure, pass(x) :: bld_x => l_oacc_bld_x + procedure, pass(x) :: bld_mn => l_oacc_bld_mn + procedure, pass(x) :: free => l_oacc_vect_free + procedure, pass(x) :: ins_a => l_oacc_ins_a + procedure, pass(x) :: ins_v => l_oacc_ins_v + procedure, pass(x) :: is_host => l_oacc_is_host + procedure, pass(x) :: is_dev => l_oacc_is_dev + procedure, pass(x) :: is_sync => l_oacc_is_sync + procedure, pass(x) :: set_host => l_oacc_set_host + procedure, pass(x) :: set_dev => l_oacc_set_dev + procedure, pass(x) :: set_sync => l_oacc_set_sync + procedure, pass(x) :: set_scal => l_oacc_set_scal + + procedure, pass(x) :: gthzv_x => l_oacc_gthzv_x + procedure, pass(x) :: gthzbuf_x => l_oacc_gthzbuf + procedure, pass(y) :: sctb => l_oacc_sctb + procedure, pass(y) :: sctb_x => l_oacc_sctb_x + procedure, pass(y) :: sctb_buf => l_oacc_sctb_buf + + procedure, pass(x) :: get_size => l_oacc_get_size + + + end type psb_l_vect_oacc + + +contains + + + subroutine l_oacc_sctb_buf(i, n, idx, beta, y) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + integer(psb_lpk_) :: beta + class(psb_l_vect_oacc) :: y + integer(psb_ipk_) :: info + + if (.not.allocated(y%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') + return + end if + + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync_space() + if (y%is_host()) call y%sync_space() + + !$acc parallel loop + do i = 1, n + y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i) + end do + + class default + !$acc parallel loop + do i = 1, n + y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i) + end do + end select + end subroutine l_oacc_sctb_buf + + subroutine l_oacc_sctb_x(i, n, idx, x, beta, y) + use psb_base_mod + implicit none + integer(psb_ipk_):: i, n + class(psb_i_base_vect_type) :: idx + integer(psb_lpk_) :: beta, x(:) + class(psb_l_vect_oacc) :: y + integer(psb_ipk_) :: info, ni + + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync_space() + class default + call psb_errpush(info, 'l_oacc_sctb_x') + return + end select + + if (y%is_host()) call y%sync_space() + + !$acc parallel loop + do i = 1, n + y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i) + end do + + call y%set_dev() + end subroutine l_oacc_sctb_x + + subroutine l_oacc_sctb(n, idx, x, beta, y) + use psb_base_mod + implicit none + integer(psb_ipk_) :: n + integer(psb_ipk_) :: idx(:) + integer(psb_lpk_) :: beta, x(:) + class(psb_l_vect_oacc) :: y + integer(psb_ipk_) :: info + integer(psb_ipk_) :: i + + if (n == 0) return + if (y%is_dev()) call y%sync_space() + + !$acc parallel loop + do i = 1, n + y%v(idx(i)) = beta * y%v(idx(i)) + x(i) + end do + + call y%set_host() + end subroutine l_oacc_sctb + + subroutine l_oacc_gthzbuf(i, n, idx, x) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type) :: idx + class(psb_l_vect_oacc) :: x + integer(psb_ipk_) :: info + + info = 0 + if (.not.allocated(x%combuf)) then + call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') + return + end if + + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync_space() + class default + call psb_errpush(info, 'l_oacc_gthzbuf') + return + end select + + if (x%is_host()) call x%sync_space() + + !$acc parallel loop + do i = 1, n + x%combuf(i) = x%v(idx%v(i)) + end do + end subroutine l_oacc_gthzbuf + + subroutine l_oacc_gthzv_x(i, n, idx, x, y) + use psb_base_mod + implicit none + integer(psb_ipk_) :: i, n + class(psb_i_base_vect_type):: idx + integer(psb_lpk_) :: y(:) + class(psb_l_vect_oacc):: x + integer(psb_ipk_) :: info + + info = 0 + + select type(ii => idx) + class is (psb_i_vect_oacc) + if (ii%is_host()) call ii%sync_space() + class default + call psb_errpush(info, 'l_oacc_gthzv_x') + return + end select + + if (x%is_host()) call x%sync_space() + + !$acc parallel loop + do i = 1, n + y(i) = x%v(idx%v(i)) + end do + end subroutine l_oacc_gthzv_x + + subroutine l_oacc_ins_v(n, irl, val, dupl, x, info) + use psi_serial_mod + implicit none + class(psb_l_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + class(psb_i_base_vect_type), intent(inout) :: irl + class(psb_l_base_vect_type), intent(inout) :: val + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, isz + logical :: done_oacc + + info = 0 + if (psb_errstatus_fatal()) return + + done_oacc = .false. + select type(virl => irl) + type is (psb_i_vect_oacc) + select type(vval => val) + type is (psb_l_vect_oacc) + if (vval%is_host()) call vval%sync_space() + if (virl%is_host()) call virl%sync_space() + if (x%is_host()) call x%sync_space() + !$acc parallel loop + do i = 1, n + x%v(virl%v(i)) = vval%v(i) + end do + call x%set_dev() + done_oacc = .true. + end select + end select + + if (.not.done_oacc) then + select type(virl => irl) + type is (psb_i_vect_oacc) + if (virl%is_dev()) call virl%sync_space() + end select + select type(vval => val) + type is (psb_l_vect_oacc) + if (vval%is_dev()) call vval%sync_space() + end select + call x%ins(n, irl%v, val%v, dupl, info) + end if + + if (info /= 0) then + call psb_errpush(info, 'oacc_vect_ins') + return + end if + + end subroutine l_oacc_ins_v + + subroutine l_oacc_ins_a(n, irl, val, dupl, x, info) + use psi_serial_mod + implicit none + class(psb_l_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n, dupl + integer(psb_ipk_), intent(in) :: irl(:) + integer(psb_lpk_), intent(in) :: val(:) + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i + + info = 0 + if (x%is_dev()) call x%sync_space() + call x%psb_l_base_vect_type%ins(n, irl, val, dupl, info) + call x%set_host() + !$acc update device(x%v) + + end subroutine l_oacc_ins_a + + subroutine l_oacc_bld_mn(x, n) + use psb_base_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_l_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + + call x%all(n, info) + if (info /= 0) then + call psb_errpush(info, 'l_oacc_bld_mn', i_err=(/n, n, n, n, n/)) + end if + call x%set_host() + !$acc update device(x%v) + + end subroutine l_oacc_bld_mn + + + subroutine l_oacc_bld_x(x, this) + use psb_base_mod + implicit none + integer(psb_lpk_), intent(in) :: this(:) + class(psb_l_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + + call psb_realloc(size(this), x%v, info) + if (info /= 0) then + info = psb_err_alloc_request_ + call psb_errpush(info, 'l_oacc_bld_x', & + i_err=(/size(this), izero, izero, izero, izero/)) + return + end if + + x%v(:) = this(:) + call x%set_host() + !$acc update device(x%v) + + end subroutine l_oacc_bld_x + + subroutine l_oacc_asb_m(n, x, info) + use psb_base_mod + implicit none + integer(psb_mpk_), intent(in) :: n + class(psb_l_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + integer(psb_mpk_) :: nd + + info = psb_success_ + + if (x%is_dev()) then + nd = size(x%v) + if (nd < n) then + call x%sync() + call x%psb_l_base_vect_type%asb(n, info) + if (info == psb_success_) call x%sync_space() + call x%set_host() + end if + else + if (size(x%v) < n) then + call x%psb_l_base_vect_type%asb(n, info) + if (info == psb_success_) call x%sync_space() + call x%set_host() + end if + end if + end subroutine l_oacc_asb_m + + subroutine l_oacc_set_scal(x, val, first, last) + class(psb_l_vect_oacc), intent(inout) :: x + integer(psb_lpk_), intent(in) :: val + integer(psb_ipk_), optional :: first, last + + integer(psb_ipk_) :: first_, last_ + first_ = 1 + last_ = x%get_nrows() + if (present(first)) first_ = max(1, first) + if (present(last)) last_ = min(last, last_) + + !$acc parallel loop + do i = first_, last_ + x%v(i) = val + end do + !$acc end parallel loop + + call x%set_dev() + end subroutine l_oacc_set_scal + + subroutine l_oacc_zero(x) + use psi_serial_mod + implicit none + class(psb_l_vect_oacc), intent(inout) :: x + call x%set_dev() + call x%set_scal(lzero) + end subroutine l_oacc_zero + + function l_oacc_get_nrows(x) result(res) + implicit none + class(psb_l_vect_oacc), intent(in) :: x + integer(psb_ipk_) :: res + + if (allocated(x%v)) res = size(x%v) + end function l_oacc_get_nrows + + function l_oacc_get_fmt() result(res) + implicit none + character(len=5) :: res + res = "lOACC" + + end function l_oacc_get_fmt + + ! subroutine l_oacc_set_vect(x,y) + ! implicit none + ! class(psb_l_vect_oacc), intent(inout) :: x + ! integer(psb_lpk_), intent(in) :: y(:) + ! integer(psb_ipk_) :: info + + ! if (size(x%v) /= size(y)) then + ! call x%free(info) + ! call x%all(size(y),info) + ! end if + ! x%v(:) = y(:) + ! call x%set_host() + ! end subroutine l_oacc_set_vect + + subroutine l_oacc_to_dev(v) + implicit none + integer(psb_lpk_) :: v(:) + !$acc update device(v) + end subroutine l_oacc_to_dev + + subroutine l_oacc_to_host(v) + implicit none + integer(psb_lpk_) :: v(:) + !$acc update self(v) + end subroutine l_oacc_to_host + + subroutine l_oacc_sync_space(x) + implicit none + class(psb_l_vect_oacc), intent(inout) :: x + if (allocated(x%v)) then + call l_oacc_create_dev(x%v) + end if + contains + subroutine l_oacc_create_dev(v) + implicit none + integer(psb_lpk_) :: v(:) + !$acc enter data copyin(v) + end subroutine l_oacc_create_dev + end subroutine l_oacc_sync_space + + subroutine l_oacc_sync(x) + implicit none + class(psb_l_vect_oacc), intent(inout) :: x + if (x%is_dev()) then + call l_oacc_to_host(x%v) + end if + if (x%is_host()) then + call l_oacc_to_dev(x%v) + end if + call x%set_sync() + end subroutine l_oacc_sync + + subroutine l_oacc_set_host(x) + implicit none + class(psb_l_vect_oacc), intent(inout) :: x + + x%state = is_host + end subroutine l_oacc_set_host + + subroutine l_oacc_set_dev(x) + implicit none + class(psb_l_vect_oacc), intent(inout) :: x + + x%state = is_dev + end subroutine l_oacc_set_dev + + subroutine l_oacc_set_sync(x) + implicit none + class(psb_l_vect_oacc), intent(inout) :: x + + x%state = is_sync + end subroutine l_oacc_set_sync + + function l_oacc_is_dev(x) result(res) + implicit none + class(psb_l_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_dev) + end function l_oacc_is_dev + + function l_oacc_is_host(x) result(res) + implicit none + class(psb_l_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_host) + end function l_oacc_is_host + + function l_oacc_is_sync(x) result(res) + implicit none + class(psb_l_vect_oacc), intent(in) :: x + logical :: res + + res = (x%state == is_sync) + end function l_oacc_is_sync + + subroutine l_oacc_vect_all(n, x, info) + use psi_serial_mod + use psb_realloc_mod + implicit none + integer(psb_ipk_), intent(in) :: n + class(psb_l_vect_oacc), intent(out) :: x + integer(psb_ipk_), intent(out) :: info + + call psb_realloc(n, x%v, info) + if (info == 0) then + call x%set_host() + !$acc enter data create(x%v) + call x%sync_space() + end if + if (info /= 0) then + info = psb_err_alloc_request_ + call psb_errpush(info, 'l_oacc_all', & + i_err=(/n, n, n, n, n/)) + end if + end subroutine l_oacc_vect_all + + subroutine l_oacc_vect_free(x, info) + implicit none + class(psb_l_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + info = 0 + if (allocated(x%v)) then + !$acc exit data delete(x%v) finalize + deallocate(x%v, stat=info) + end if + + end subroutine l_oacc_vect_free + + function l_oacc_get_size(x) result(res) + implicit none + class(psb_l_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: res + + if (x%is_dev()) call x%sync() + res = size(x%v) + end function l_oacc_get_size + +end module psb_l_oacc_vect_mod diff --git a/openacc/psb_oacc_mod.F90 b/openacc/psb_oacc_mod.F90 index 2d8e8b40..7d3f9406 100644 --- a/openacc/psb_oacc_mod.F90 +++ b/openacc/psb_oacc_mod.F90 @@ -4,6 +4,7 @@ module psb_oacc_mod use psb_oacc_env_mod use psb_i_oacc_vect_mod + use psb_l_oacc_vect_mod use psb_s_oacc_vect_mod use psb_d_oacc_vect_mod use psb_c_oacc_vect_mod @@ -13,5 +14,13 @@ module psb_oacc_mod use psb_d_oacc_csr_mat_mod use psb_c_oacc_csr_mat_mod use psb_z_oacc_csr_mat_mod + use psb_s_oacc_ell_mat_mod + use psb_d_oacc_ell_mat_mod + use psb_c_oacc_ell_mat_mod + use psb_z_oacc_ell_mat_mod + use psb_s_oacc_hll_mat_mod + use psb_d_oacc_hll_mat_mod + use psb_c_oacc_hll_mat_mod + use psb_z_oacc_hll_mat_mod end module psb_oacc_mod diff --git a/openacc/psb_s_oacc_ell_mat_mod.F90 b/openacc/psb_s_oacc_ell_mat_mod.F90 new file mode 100644 index 00000000..541fdf9a --- /dev/null +++ b/openacc/psb_s_oacc_ell_mat_mod.F90 @@ -0,0 +1,341 @@ +module psb_s_oacc_ell_mat_mod + use iso_c_binding + use psb_s_mat_mod + use psb_s_ell_mat_mod + use psb_s_oacc_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_s_ell_sparse_mat) :: psb_s_oacc_ell_sparse_mat + integer(psb_ipk_) :: devstate = is_host + contains + procedure, nopass :: get_fmt => s_oacc_ell_get_fmt + procedure, pass(a) :: sizeof => s_oacc_ell_sizeof + procedure, pass(a) :: is_host => s_oacc_ell_is_host + procedure, pass(a) :: is_sync => s_oacc_ell_is_sync + procedure, pass(a) :: is_dev => s_oacc_ell_is_dev + procedure, pass(a) :: set_host => s_oacc_ell_set_host + procedure, pass(a) :: set_sync => s_oacc_ell_set_sync + procedure, pass(a) :: set_dev => s_oacc_ell_set_dev + procedure, pass(a) :: sync_space => s_oacc_ell_sync_space + procedure, pass(a) :: sync => s_oacc_ell_sync + procedure, pass(a) :: free => s_oacc_ell_free + procedure, pass(a) :: vect_mv => psb_s_oacc_ell_vect_mv + procedure, pass(a) :: in_vect_sv => psb_s_oacc_ell_inner_vect_sv + procedure, pass(a) :: csmm => psb_s_oacc_ell_csmm + procedure, pass(a) :: csmv => psb_s_oacc_ell_csmv + procedure, pass(a) :: scals => psb_s_oacc_ell_scals + procedure, pass(a) :: scalv => psb_s_oacc_ell_scal + procedure, pass(a) :: reallocate_nz => psb_s_oacc_ell_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_oacc_ell_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_s_oacc_ell_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_oacc_ell_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_oacc_ell_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_oacc_ell_mv_from_fmt + procedure, pass(a) :: mold => psb_s_oacc_ell_mold + + end type psb_s_oacc_ell_sparse_mat + + interface + module subroutine psb_s_oacc_ell_mold(a,b,info) + class(psb_s_oacc_ell_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_ell_mold + end interface + + interface + module subroutine psb_s_oacc_ell_cp_from_fmt(a,b,info) + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_ell_cp_from_fmt + end interface + + interface + module subroutine psb_s_oacc_ell_mv_from_coo(a,b,info) + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_ell_mv_from_coo + end interface + + interface + module subroutine psb_s_oacc_ell_mv_from_fmt(a,b,info) + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_ell_mv_from_fmt + end interface + + interface + module subroutine psb_s_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans) + class(psb_s_oacc_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_oacc_ell_vect_mv + end interface + + interface + module subroutine psb_s_oacc_ell_inner_vect_sv(alpha, a, x, beta, y, info, trans) + class(psb_s_oacc_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x,y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_oacc_ell_inner_vect_sv + end interface + + interface + module subroutine psb_s_oacc_ell_csmm(alpha, a, x, beta, y, info, trans) + class(psb_s_oacc_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_oacc_ell_csmm + end interface + + interface + module subroutine psb_s_oacc_ell_csmv(alpha, a, x, beta, y, info, trans) + class(psb_s_oacc_ell_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_oacc_ell_csmv + end interface + + interface + module subroutine psb_s_oacc_ell_scals(d, a, info) + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_ell_scals + end interface + + interface + module subroutine psb_s_oacc_ell_scal(d,a,info,side) + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: side + end subroutine psb_s_oacc_ell_scal + end interface + + interface + module subroutine psb_s_oacc_ell_reallocate_nz(nz,a) + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + end subroutine psb_s_oacc_ell_reallocate_nz + end interface + + interface + module subroutine psb_s_oacc_ell_allocate_mnnz(m,n,a,nz) + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: m,n + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_oacc_ell_allocate_mnnz + end interface + + interface + module subroutine psb_s_oacc_ell_cp_from_coo(a,b,info) + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_ell_cp_from_coo + end interface + +contains + + subroutine s_oacc_ell_free(a) + use psb_base_mod + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + if (allocated(a%val)) then + !$acc exit data delete(a%val) + end if + if (allocated(a%ja)) then + !$acc exit data delete(a%ja) + end if + if (allocated(a%irn)) then + !$acc exit data delete(a%irn) + end if + if (allocated(a%idiag)) then + !$acc exit data delete(a%idiag) + end if + + call a%psb_s_ell_sparse_mat%free() + + return + end subroutine s_oacc_ell_free + + + function s_oacc_ell_sizeof(a) result(res) + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%ja) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + + end function s_oacc_ell_sizeof + + subroutine s_oacc_ell_sync_space(a) + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + + if (allocated(a%val)) then + call s_oacc_create_dev(a%val) + end if + if (allocated(a%ja)) then + call i_oacc_create_dev(a%ja) + end if + if (allocated(a%irn)) then + call i_oacc_create_dev_scalar(a%irn) + end if + if (allocated(a%idiag)) then + call i_oacc_create_dev_scalar(a%idiag) + end if + + contains + subroutine s_oacc_create_dev(v) + implicit none + real(psb_spk_), intent(in) :: v(:,:) + !$acc enter data copyin(v) + end subroutine s_oacc_create_dev + + subroutine i_oacc_create_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:,:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev + + subroutine i_oacc_create_dev_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev_scalar + end subroutine s_oacc_ell_sync_space + + + + function s_oacc_ell_is_host(a) result(res) + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function s_oacc_ell_is_host + + function s_oacc_ell_is_sync(a) result(res) + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function s_oacc_ell_is_sync + + function s_oacc_ell_is_dev(a) result(res) + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function s_oacc_ell_is_dev + + subroutine s_oacc_ell_set_host(a) + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine s_oacc_ell_set_host + + subroutine s_oacc_ell_set_sync(a) + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine s_oacc_ell_set_sync + + subroutine s_oacc_ell_set_dev(a) + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine s_oacc_ell_set_dev + + function s_oacc_ell_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELL_oacc' + end function s_oacc_ell_get_fmt + + subroutine s_oacc_ell_sync(a) + implicit none + class(psb_s_oacc_ell_sparse_mat), target, intent(in) :: a + class(psb_s_oacc_ell_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (a%is_dev()) then + call s_oacc_ell_to_host(a%val) + call i_oacc_ell_to_host(a%ja) + call i_oacc_ell_to_host_scalar(a%irn) + call i_oacc_ell_to_host_scalar(a%idiag) + else if (a%is_host()) then + call s_oacc_ell_to_dev(a%val) + call i_oacc_ell_to_dev(a%ja) + call i_oacc_ell_to_dev_scalar(a%irn) + call i_oacc_ell_to_dev_scalar(a%idiag) + end if + call tmpa%set_sync() + end subroutine s_oacc_ell_sync + + subroutine s_oacc_ell_to_host(v) + implicit none + real(psb_spk_), intent(in) :: v(:,:) + !$acc update self(v) + end subroutine s_oacc_ell_to_host + + subroutine s_oacc_ell_to_host_scalar(v) + implicit none + real(psb_spk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine s_oacc_ell_to_host_scalar + + subroutine i_oacc_ell_to_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:,:) + !$acc update device(v) + end subroutine i_oacc_ell_to_dev + + subroutine i_oacc_ell_to_dev_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine i_oacc_ell_to_dev_scalar + + subroutine i_oacc_ell_to_host(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:,:) + !$acc update self(v) + end subroutine i_oacc_ell_to_host + + subroutine i_oacc_ell_to_host_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine i_oacc_ell_to_host_scalar +end module psb_s_oacc_ell_mat_mod diff --git a/openacc/psb_s_oacc_hll_mat_mod.F90 b/openacc/psb_s_oacc_hll_mat_mod.F90 new file mode 100644 index 00000000..bf8949a1 --- /dev/null +++ b/openacc/psb_s_oacc_hll_mat_mod.F90 @@ -0,0 +1,352 @@ +module psb_s_oacc_hll_mat_mod + use iso_c_binding + use psb_s_mat_mod + use psb_s_hll_mat_mod + use psb_s_oacc_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_s_hll_sparse_mat) :: psb_s_oacc_hll_sparse_mat + integer(psb_ipk_) :: devstate = is_host + contains + procedure, nopass :: get_fmt => s_oacc_hll_get_fmt + procedure, pass(a) :: sizeof => s_oacc_hll_sizeof + procedure, pass(a) :: is_host => s_oacc_hll_is_host + procedure, pass(a) :: is_sync => s_oacc_hll_is_sync + procedure, pass(a) :: is_dev => s_oacc_hll_is_dev + procedure, pass(a) :: set_host => s_oacc_hll_set_host + procedure, pass(a) :: set_sync => s_oacc_hll_set_sync + procedure, pass(a) :: set_dev => s_oacc_hll_set_dev + procedure, pass(a) :: sync_space => s_oacc_hll_sync_space + procedure, pass(a) :: sync => s_oacc_hll_sync + procedure, pass(a) :: free => s_oacc_hll_free + procedure, pass(a) :: vect_mv => psb_s_oacc_hll_vect_mv + procedure, pass(a) :: in_vect_sv => psb_s_oacc_hll_inner_vect_sv + procedure, pass(a) :: csmm => psb_s_oacc_hll_csmm + procedure, pass(a) :: csmv => psb_s_oacc_hll_csmv + procedure, pass(a) :: scals => psb_s_oacc_hll_scals + procedure, pass(a) :: scalv => psb_s_oacc_hll_scal + procedure, pass(a) :: reallocate_nz => psb_s_oacc_hll_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_oacc_hll_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_s_oacc_hll_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_oacc_hll_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_oacc_hll_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_oacc_hll_mv_from_fmt + procedure, pass(a) :: mold => psb_s_oacc_hll_mold + + end type psb_s_oacc_hll_sparse_mat + + interface + module subroutine psb_s_oacc_hll_mold(a,b,info) + class(psb_s_oacc_hll_sparse_mat), intent(in) :: a + class(psb_s_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_hll_mold + end interface + + interface + module subroutine psb_s_oacc_hll_cp_from_fmt(a,b,info) + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_hll_cp_from_fmt + end interface + + interface + module subroutine psb_s_oacc_hll_mv_from_coo(a,b,info) + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_hll_mv_from_coo + end interface + + interface + module subroutine psb_s_oacc_hll_mv_from_fmt(a,b,info) + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_s_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_hll_mv_from_fmt + end interface + + interface + module subroutine psb_s_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans) + class(psb_s_oacc_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_oacc_hll_vect_mv + end interface + + interface + module subroutine psb_s_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans) + class(psb_s_oacc_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta + class(psb_s_base_vect_type), intent(inout) :: x,y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_oacc_hll_inner_vect_sv + end interface + + interface + module subroutine psb_s_oacc_hll_csmm(alpha, a, x, beta, y, info, trans) + class(psb_s_oacc_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:,:) + real(psb_spk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_oacc_hll_csmm + end interface + + interface + module subroutine psb_s_oacc_hll_csmv(alpha, a, x, beta, y, info, trans) + class(psb_s_oacc_hll_sparse_mat), intent(in) :: a + real(psb_spk_), intent(in) :: alpha, beta, x(:) + real(psb_spk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_s_oacc_hll_csmv + end interface + + interface + module subroutine psb_s_oacc_hll_scals(d, a, info) + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_hll_scals + end interface + + interface + module subroutine psb_s_oacc_hll_scal(d,a,info,side) + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + real(psb_spk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: side + end subroutine psb_s_oacc_hll_scal + end interface + + interface + module subroutine psb_s_oacc_hll_reallocate_nz(nz,a) + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + end subroutine psb_s_oacc_hll_reallocate_nz + end interface + + interface + module subroutine psb_s_oacc_hll_allocate_mnnz(m,n,a,nz) + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: m,n + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_s_oacc_hll_allocate_mnnz + end interface + + interface + module subroutine psb_s_oacc_hll_cp_from_coo(a,b,info) + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_s_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_s_oacc_hll_cp_from_coo + end interface + +contains + + subroutine s_oacc_hll_free(a) + use psb_base_mod + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + if (allocated(a%val)) then + !$acc exit data delete(a%val) + end if + if (allocated(a%ja)) then + !$acc exit data delete(a%ja) + end if + if (allocated(a%irn)) then + !$acc exit data delete(a%irn) + end if + if (allocated(a%idiag)) then + !$acc exit data delete(a%idiag) + end if + if (allocated(a%hkoffs)) then + !$acc exit data delete(a%hkoffs) + end if + + call a%psb_s_hll_sparse_mat%free() + + return + end subroutine s_oacc_hll_free + + function s_oacc_hll_sizeof(a) result(res) + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%ja) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%hkoffs) + end function s_oacc_hll_sizeof + + + + function s_oacc_hll_is_host(a) result(res) + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function s_oacc_hll_is_host + + function s_oacc_hll_is_sync(a) result(res) + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function s_oacc_hll_is_sync + + function s_oacc_hll_is_dev(a) result(res) + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function s_oacc_hll_is_dev + + subroutine s_oacc_hll_set_host(a) + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine s_oacc_hll_set_host + + subroutine s_oacc_hll_set_sync(a) + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine s_oacc_hll_set_sync + + subroutine s_oacc_hll_set_dev(a) + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine s_oacc_hll_set_dev + + function s_oacc_hll_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLL_oacc' + end function s_oacc_hll_get_fmt + + subroutine s_oacc_hll_sync_space(a) + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + + if (allocated(a%val)) then + call s_oacc_create_dev(a%val) + end if + if (allocated(a%ja)) then + call i_oacc_create_dev(a%ja) + end if + if (allocated(a%irn)) then + call i_oacc_create_dev_scalar(a%irn) + end if + if (allocated(a%idiag)) then + call i_oacc_create_dev_scalar(a%idiag) + end if + if (allocated(a%hkoffs)) then + call i_oacc_create_dev_scalar(a%hkoffs) + end if + + contains + subroutine s_oacc_create_dev(v) + implicit none + real(psb_spk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine s_oacc_create_dev + + subroutine i_oacc_create_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev + + subroutine i_oacc_create_dev_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev_scalar + + end subroutine s_oacc_hll_sync_space + + + subroutine s_oacc_hll_sync(a) + implicit none + class(psb_s_oacc_hll_sparse_mat), target, intent(in) :: a + class(psb_s_oacc_hll_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (a%is_dev()) then + call s_oacc_hll_to_host(a%val) + call i_oacc_hll_to_host(a%ja) + call i_oacc_hll_to_host_scalar(a%irn) + call i_oacc_hll_to_host_scalar(a%idiag) + call i_oacc_hll_to_host_scalar(a%hkoffs) + else if (a%is_host()) then + call s_oacc_hll_to_dev(a%val) + call i_oacc_hll_to_dev(a%ja) + call i_oacc_hll_to_dev_scalar(a%irn) + call i_oacc_hll_to_dev_scalar(a%idiag) + call i_oacc_hll_to_dev_scalar(a%hkoffs) + end if + call tmpa%set_sync() + end subroutine s_oacc_hll_sync + + subroutine s_oacc_hll_to_host(v) + implicit none + real(psb_spk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine s_oacc_hll_to_host + + subroutine s_oacc_hll_to_dev(v) + implicit none + real(psb_spk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine s_oacc_hll_to_dev + + subroutine i_oacc_hll_to_host(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine i_oacc_hll_to_host + + subroutine i_oacc_hll_to_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine i_oacc_hll_to_dev + + subroutine i_oacc_hll_to_host_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine i_oacc_hll_to_host_scalar + + subroutine i_oacc_hll_to_dev_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine i_oacc_hll_to_dev_scalar + + +end module psb_s_oacc_hll_mat_mod diff --git a/openacc/psb_s_oacc_vect_mod.F90 b/openacc/psb_s_oacc_vect_mod.F90 index 36ae7da8..5c34827d 100644 --- a/openacc/psb_s_oacc_vect_mod.F90 +++ b/openacc/psb_s_oacc_vect_mod.F90 @@ -42,6 +42,7 @@ module psb_s_oacc_vect_mod procedure, pass(y) :: sctb_buf => s_oacc_sctb_buf procedure, pass(x) :: get_size => s_oacc_get_size + procedure, pass(x) :: dot_v => s_oacc_vect_dot procedure, pass(x) :: dot_a => s_oacc_dot_a procedure, pass(y) :: axpby_v => s_oacc_axpby_v @@ -70,7 +71,6 @@ module psb_s_oacc_vect_mod end subroutine s_oacc_mlt_v end interface - interface subroutine s_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) import @@ -83,7 +83,7 @@ module psb_s_oacc_vect_mod character(len=1), intent(in), optional :: conjgx, conjgy end subroutine s_oacc_mlt_v_2 end interface - + contains subroutine s_oacc_absval1(x) @@ -432,7 +432,7 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space(info) + if (ii%is_host()) call ii%sync_space() if (y%is_host()) call y%sync_space() !$acc parallel loop @@ -459,7 +459,7 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space(info) + if (ii%is_host()) call ii%sync_space() class default call psb_errpush(info, 's_oacc_sctb_x') return @@ -475,8 +475,6 @@ contains call y%set_dev() end subroutine s_oacc_sctb_x - - subroutine s_oacc_sctb(n, idx, x, beta, y) use psb_base_mod implicit none @@ -498,7 +496,6 @@ contains call y%set_host() end subroutine s_oacc_sctb - subroutine s_oacc_gthzbuf(i, n, idx, x) use psb_base_mod implicit none @@ -515,7 +512,7 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space(info) + if (ii%is_host()) call ii%sync_space() class default call psb_errpush(info, 's_oacc_gthzbuf') return @@ -542,7 +539,7 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space(info) + if (ii%is_host()) call ii%sync_space() class default call psb_errpush(info, 's_oacc_gthzv_x') return @@ -577,7 +574,7 @@ contains select type(vval => val) type is (psb_s_vect_oacc) if (vval%is_host()) call vval%sync_space() - if (virl%is_host()) call virl%sync_space(info) + if (virl%is_host()) call virl%sync_space() if (x%is_host()) call x%sync_space() !$acc parallel loop do i = 1, n @@ -591,7 +588,7 @@ contains if (.not.done_oacc) then select type(virl => irl) type is (psb_i_vect_oacc) - if (virl%is_dev()) call virl%sync_space(info) + if (virl%is_dev()) call virl%sync_space() end select select type(vval => val) type is (psb_s_vect_oacc) @@ -607,8 +604,6 @@ contains end subroutine s_oacc_ins_v - - subroutine s_oacc_ins_a(n, irl, val, dupl, x, info) use psi_serial_mod implicit none @@ -628,8 +623,6 @@ contains end subroutine s_oacc_ins_a - - subroutine s_oacc_bld_mn(x, n) use psb_base_mod implicit none @@ -668,7 +661,6 @@ contains end subroutine s_oacc_bld_x - subroutine s_oacc_asb_m(n, x, info) use psb_base_mod implicit none @@ -696,8 +688,6 @@ contains end if end subroutine s_oacc_asb_m - - subroutine s_oacc_set_scal(x, val, first, last) class(psb_s_vect_oacc), intent(inout) :: x real(psb_spk_), intent(in) :: val @@ -718,8 +708,6 @@ contains call x%set_dev() end subroutine s_oacc_set_scal - - subroutine s_oacc_zero(x) use psi_serial_mod implicit none @@ -743,6 +731,7 @@ contains end function s_oacc_get_fmt + function s_oacc_vect_dot(n, x, y) result(res) implicit none class(psb_s_vect_oacc), intent(inout) :: x @@ -776,9 +765,6 @@ contains end function s_oacc_vect_dot - - - function s_oacc_dot_a(n, x, y) result(res) implicit none class(psb_s_vect_oacc), intent(inout) :: x @@ -910,7 +896,6 @@ contains end if end subroutine s_oacc_vect_all - subroutine s_oacc_vect_free(x, info) implicit none class(psb_s_vect_oacc), intent(inout) :: x diff --git a/openacc/psb_z_oacc_ell_mat_mod.F90 b/openacc/psb_z_oacc_ell_mat_mod.F90 new file mode 100644 index 00000000..8bf8c9fa --- /dev/null +++ b/openacc/psb_z_oacc_ell_mat_mod.F90 @@ -0,0 +1,341 @@ +module psb_z_oacc_ell_mat_mod + use iso_c_binding + use psb_z_mat_mod + use psb_z_ell_mat_mod + use psb_z_oacc_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_z_ell_sparse_mat) :: psb_z_oacc_ell_sparse_mat + integer(psb_ipk_) :: devstate = is_host + contains + procedure, nopass :: get_fmt => z_oacc_ell_get_fmt + procedure, pass(a) :: sizeof => z_oacc_ell_sizeof + procedure, pass(a) :: is_host => z_oacc_ell_is_host + procedure, pass(a) :: is_sync => z_oacc_ell_is_sync + procedure, pass(a) :: is_dev => z_oacc_ell_is_dev + procedure, pass(a) :: set_host => z_oacc_ell_set_host + procedure, pass(a) :: set_sync => z_oacc_ell_set_sync + procedure, pass(a) :: set_dev => z_oacc_ell_set_dev + procedure, pass(a) :: sync_space => z_oacc_ell_sync_space + procedure, pass(a) :: sync => z_oacc_ell_sync + procedure, pass(a) :: free => z_oacc_ell_free + procedure, pass(a) :: vect_mv => psb_z_oacc_ell_vect_mv + procedure, pass(a) :: in_vect_sv => psb_z_oacc_ell_inner_vect_sv + procedure, pass(a) :: csmm => psb_z_oacc_ell_csmm + procedure, pass(a) :: csmv => psb_z_oacc_ell_csmv + procedure, pass(a) :: scals => psb_z_oacc_ell_scals + procedure, pass(a) :: scalv => psb_z_oacc_ell_scal + procedure, pass(a) :: reallocate_nz => psb_z_oacc_ell_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_oacc_ell_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_z_oacc_ell_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_oacc_ell_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_oacc_ell_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_oacc_ell_mv_from_fmt + procedure, pass(a) :: mold => psb_z_oacc_ell_mold + + end type psb_z_oacc_ell_sparse_mat + + interface + module subroutine psb_z_oacc_ell_mold(a,b,info) + class(psb_z_oacc_ell_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_ell_mold + end interface + + interface + module subroutine psb_z_oacc_ell_cp_from_fmt(a,b,info) + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_ell_cp_from_fmt + end interface + + interface + module subroutine psb_z_oacc_ell_mv_from_coo(a,b,info) + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_ell_mv_from_coo + end interface + + interface + module subroutine psb_z_oacc_ell_mv_from_fmt(a,b,info) + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_ell_mv_from_fmt + end interface + + interface + module subroutine psb_z_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans) + class(psb_z_oacc_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_oacc_ell_vect_mv + end interface + + interface + module subroutine psb_z_oacc_ell_inner_vect_sv(alpha, a, x, beta, y, info, trans) + class(psb_z_oacc_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x,y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_oacc_ell_inner_vect_sv + end interface + + interface + module subroutine psb_z_oacc_ell_csmm(alpha, a, x, beta, y, info, trans) + class(psb_z_oacc_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_oacc_ell_csmm + end interface + + interface + module subroutine psb_z_oacc_ell_csmv(alpha, a, x, beta, y, info, trans) + class(psb_z_oacc_ell_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_oacc_ell_csmv + end interface + + interface + module subroutine psb_z_oacc_ell_scals(d, a, info) + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_ell_scals + end interface + + interface + module subroutine psb_z_oacc_ell_scal(d,a,info,side) + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: side + end subroutine psb_z_oacc_ell_scal + end interface + + interface + module subroutine psb_z_oacc_ell_reallocate_nz(nz,a) + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + end subroutine psb_z_oacc_ell_reallocate_nz + end interface + + interface + module subroutine psb_z_oacc_ell_allocate_mnnz(m,n,a,nz) + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: m,n + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_oacc_ell_allocate_mnnz + end interface + + interface + module subroutine psb_z_oacc_ell_cp_from_coo(a,b,info) + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_ell_cp_from_coo + end interface + +contains + + subroutine z_oacc_ell_free(a) + use psb_base_mod + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + if (allocated(a%val)) then + !$acc exit data delete(a%val) + end if + if (allocated(a%ja)) then + !$acc exit data delete(a%ja) + end if + if (allocated(a%irn)) then + !$acc exit data delete(a%irn) + end if + if (allocated(a%idiag)) then + !$acc exit data delete(a%idiag) + end if + + call a%psb_z_ell_sparse_mat%free() + + return + end subroutine z_oacc_ell_free + + + function z_oacc_ell_sizeof(a) result(res) + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%ja) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + + end function z_oacc_ell_sizeof + + subroutine z_oacc_ell_sync_space(a) + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + + if (allocated(a%val)) then + call z_oacc_create_dev(a%val) + end if + if (allocated(a%ja)) then + call i_oacc_create_dev(a%ja) + end if + if (allocated(a%irn)) then + call i_oacc_create_dev_scalar(a%irn) + end if + if (allocated(a%idiag)) then + call i_oacc_create_dev_scalar(a%idiag) + end if + + contains + subroutine z_oacc_create_dev(v) + implicit none + complex(psb_dpk_), intent(in) :: v(:,:) + !$acc enter data copyin(v) + end subroutine z_oacc_create_dev + + subroutine i_oacc_create_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:,:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev + + subroutine i_oacc_create_dev_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev_scalar + end subroutine z_oacc_ell_sync_space + + + + function z_oacc_ell_is_host(a) result(res) + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function z_oacc_ell_is_host + + function z_oacc_ell_is_sync(a) result(res) + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function z_oacc_ell_is_sync + + function z_oacc_ell_is_dev(a) result(res) + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function z_oacc_ell_is_dev + + subroutine z_oacc_ell_set_host(a) + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine z_oacc_ell_set_host + + subroutine z_oacc_ell_set_sync(a) + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine z_oacc_ell_set_sync + + subroutine z_oacc_ell_set_dev(a) + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine z_oacc_ell_set_dev + + function z_oacc_ell_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'ELL_oacc' + end function z_oacc_ell_get_fmt + + subroutine z_oacc_ell_sync(a) + implicit none + class(psb_z_oacc_ell_sparse_mat), target, intent(in) :: a + class(psb_z_oacc_ell_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (a%is_dev()) then + call z_oacc_ell_to_host(a%val) + call i_oacc_ell_to_host(a%ja) + call i_oacc_ell_to_host_scalar(a%irn) + call i_oacc_ell_to_host_scalar(a%idiag) + else if (a%is_host()) then + call z_oacc_ell_to_dev(a%val) + call i_oacc_ell_to_dev(a%ja) + call i_oacc_ell_to_dev_scalar(a%irn) + call i_oacc_ell_to_dev_scalar(a%idiag) + end if + call tmpa%set_sync() + end subroutine z_oacc_ell_sync + + subroutine z_oacc_ell_to_host(v) + implicit none + complex(psb_dpk_), intent(in) :: v(:,:) + !$acc update self(v) + end subroutine z_oacc_ell_to_host + + subroutine z_oacc_ell_to_host_scalar(v) + implicit none + complex(psb_dpk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine z_oacc_ell_to_host_scalar + + subroutine i_oacc_ell_to_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:,:) + !$acc update device(v) + end subroutine i_oacc_ell_to_dev + + subroutine i_oacc_ell_to_dev_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine i_oacc_ell_to_dev_scalar + + subroutine i_oacc_ell_to_host(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:,:) + !$acc update self(v) + end subroutine i_oacc_ell_to_host + + subroutine i_oacc_ell_to_host_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine i_oacc_ell_to_host_scalar +end module psb_z_oacc_ell_mat_mod diff --git a/openacc/psb_z_oacc_hll_mat_mod.F90 b/openacc/psb_z_oacc_hll_mat_mod.F90 new file mode 100644 index 00000000..e6a4929a --- /dev/null +++ b/openacc/psb_z_oacc_hll_mat_mod.F90 @@ -0,0 +1,352 @@ +module psb_z_oacc_hll_mat_mod + use iso_c_binding + use psb_z_mat_mod + use psb_z_hll_mat_mod + use psb_z_oacc_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_z_hll_sparse_mat) :: psb_z_oacc_hll_sparse_mat + integer(psb_ipk_) :: devstate = is_host + contains + procedure, nopass :: get_fmt => z_oacc_hll_get_fmt + procedure, pass(a) :: sizeof => z_oacc_hll_sizeof + procedure, pass(a) :: is_host => z_oacc_hll_is_host + procedure, pass(a) :: is_sync => z_oacc_hll_is_sync + procedure, pass(a) :: is_dev => z_oacc_hll_is_dev + procedure, pass(a) :: set_host => z_oacc_hll_set_host + procedure, pass(a) :: set_sync => z_oacc_hll_set_sync + procedure, pass(a) :: set_dev => z_oacc_hll_set_dev + procedure, pass(a) :: sync_space => z_oacc_hll_sync_space + procedure, pass(a) :: sync => z_oacc_hll_sync + procedure, pass(a) :: free => z_oacc_hll_free + procedure, pass(a) :: vect_mv => psb_z_oacc_hll_vect_mv + procedure, pass(a) :: in_vect_sv => psb_z_oacc_hll_inner_vect_sv + procedure, pass(a) :: csmm => psb_z_oacc_hll_csmm + procedure, pass(a) :: csmv => psb_z_oacc_hll_csmv + procedure, pass(a) :: scals => psb_z_oacc_hll_scals + procedure, pass(a) :: scalv => psb_z_oacc_hll_scal + procedure, pass(a) :: reallocate_nz => psb_z_oacc_hll_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_oacc_hll_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_z_oacc_hll_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_oacc_hll_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_oacc_hll_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_oacc_hll_mv_from_fmt + procedure, pass(a) :: mold => psb_z_oacc_hll_mold + + end type psb_z_oacc_hll_sparse_mat + + interface + module subroutine psb_z_oacc_hll_mold(a,b,info) + class(psb_z_oacc_hll_sparse_mat), intent(in) :: a + class(psb_z_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_hll_mold + end interface + + interface + module subroutine psb_z_oacc_hll_cp_from_fmt(a,b,info) + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_hll_cp_from_fmt + end interface + + interface + module subroutine psb_z_oacc_hll_mv_from_coo(a,b,info) + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_hll_mv_from_coo + end interface + + interface + module subroutine psb_z_oacc_hll_mv_from_fmt(a,b,info) + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_z_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_hll_mv_from_fmt + end interface + + interface + module subroutine psb_z_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans) + class(psb_z_oacc_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_oacc_hll_vect_mv + end interface + + interface + module subroutine psb_z_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans) + class(psb_z_oacc_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta + class(psb_z_base_vect_type), intent(inout) :: x,y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_oacc_hll_inner_vect_sv + end interface + + interface + module subroutine psb_z_oacc_hll_csmm(alpha, a, x, beta, y, info, trans) + class(psb_z_oacc_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + complex(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_oacc_hll_csmm + end interface + + interface + module subroutine psb_z_oacc_hll_csmv(alpha, a, x, beta, y, info, trans) + class(psb_z_oacc_hll_sparse_mat), intent(in) :: a + complex(psb_dpk_), intent(in) :: alpha, beta, x(:) + complex(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_z_oacc_hll_csmv + end interface + + interface + module subroutine psb_z_oacc_hll_scals(d, a, info) + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_hll_scals + end interface + + interface + module subroutine psb_z_oacc_hll_scal(d,a,info,side) + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + complex(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: side + end subroutine psb_z_oacc_hll_scal + end interface + + interface + module subroutine psb_z_oacc_hll_reallocate_nz(nz,a) + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + end subroutine psb_z_oacc_hll_reallocate_nz + end interface + + interface + module subroutine psb_z_oacc_hll_allocate_mnnz(m,n,a,nz) + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: m,n + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_z_oacc_hll_allocate_mnnz + end interface + + interface + module subroutine psb_z_oacc_hll_cp_from_coo(a,b,info) + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_z_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_z_oacc_hll_cp_from_coo + end interface + +contains + + subroutine z_oacc_hll_free(a) + use psb_base_mod + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + if (allocated(a%val)) then + !$acc exit data delete(a%val) + end if + if (allocated(a%ja)) then + !$acc exit data delete(a%ja) + end if + if (allocated(a%irn)) then + !$acc exit data delete(a%irn) + end if + if (allocated(a%idiag)) then + !$acc exit data delete(a%idiag) + end if + if (allocated(a%hkoffs)) then + !$acc exit data delete(a%hkoffs) + end if + + call a%psb_z_hll_sparse_mat%free() + + return + end subroutine z_oacc_hll_free + + function z_oacc_hll_sizeof(a) result(res) + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%ja) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%hkoffs) + end function z_oacc_hll_sizeof + + + + function z_oacc_hll_is_host(a) result(res) + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function z_oacc_hll_is_host + + function z_oacc_hll_is_sync(a) result(res) + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function z_oacc_hll_is_sync + + function z_oacc_hll_is_dev(a) result(res) + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function z_oacc_hll_is_dev + + subroutine z_oacc_hll_set_host(a) + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine z_oacc_hll_set_host + + subroutine z_oacc_hll_set_sync(a) + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine z_oacc_hll_set_sync + + subroutine z_oacc_hll_set_dev(a) + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine z_oacc_hll_set_dev + + function z_oacc_hll_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLL_oacc' + end function z_oacc_hll_get_fmt + + subroutine z_oacc_hll_sync_space(a) + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + + if (allocated(a%val)) then + call z_oacc_create_dev(a%val) + end if + if (allocated(a%ja)) then + call i_oacc_create_dev(a%ja) + end if + if (allocated(a%irn)) then + call i_oacc_create_dev_scalar(a%irn) + end if + if (allocated(a%idiag)) then + call i_oacc_create_dev_scalar(a%idiag) + end if + if (allocated(a%hkoffs)) then + call i_oacc_create_dev_scalar(a%hkoffs) + end if + + contains + subroutine z_oacc_create_dev(v) + implicit none + complex(psb_dpk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine z_oacc_create_dev + + subroutine i_oacc_create_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev + + subroutine i_oacc_create_dev_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev_scalar + + end subroutine z_oacc_hll_sync_space + + + subroutine z_oacc_hll_sync(a) + implicit none + class(psb_z_oacc_hll_sparse_mat), target, intent(in) :: a + class(psb_z_oacc_hll_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (a%is_dev()) then + call z_oacc_hll_to_host(a%val) + call i_oacc_hll_to_host(a%ja) + call i_oacc_hll_to_host_scalar(a%irn) + call i_oacc_hll_to_host_scalar(a%idiag) + call i_oacc_hll_to_host_scalar(a%hkoffs) + else if (a%is_host()) then + call z_oacc_hll_to_dev(a%val) + call i_oacc_hll_to_dev(a%ja) + call i_oacc_hll_to_dev_scalar(a%irn) + call i_oacc_hll_to_dev_scalar(a%idiag) + call i_oacc_hll_to_dev_scalar(a%hkoffs) + end if + call tmpa%set_sync() + end subroutine z_oacc_hll_sync + + subroutine z_oacc_hll_to_host(v) + implicit none + complex(psb_dpk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine z_oacc_hll_to_host + + subroutine z_oacc_hll_to_dev(v) + implicit none + complex(psb_dpk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine z_oacc_hll_to_dev + + subroutine i_oacc_hll_to_host(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine i_oacc_hll_to_host + + subroutine i_oacc_hll_to_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine i_oacc_hll_to_dev + + subroutine i_oacc_hll_to_host_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine i_oacc_hll_to_host_scalar + + subroutine i_oacc_hll_to_dev_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine i_oacc_hll_to_dev_scalar + + +end module psb_z_oacc_hll_mat_mod diff --git a/openacc/psb_z_oacc_vect_mod.F90 b/openacc/psb_z_oacc_vect_mod.F90 index 5d03b49d..0bac854a 100644 --- a/openacc/psb_z_oacc_vect_mod.F90 +++ b/openacc/psb_z_oacc_vect_mod.F90 @@ -42,6 +42,7 @@ module psb_z_oacc_vect_mod procedure, pass(y) :: sctb_buf => z_oacc_sctb_buf procedure, pass(x) :: get_size => z_oacc_get_size + procedure, pass(x) :: dot_v => z_oacc_vect_dot procedure, pass(x) :: dot_a => z_oacc_dot_a procedure, pass(y) :: axpby_v => z_oacc_axpby_v @@ -70,7 +71,6 @@ module psb_z_oacc_vect_mod end subroutine z_oacc_mlt_v end interface - interface subroutine z_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) import @@ -83,7 +83,7 @@ module psb_z_oacc_vect_mod character(len=1), intent(in), optional :: conjgx, conjgy end subroutine z_oacc_mlt_v_2 end interface - + contains subroutine z_oacc_absval1(x) @@ -432,7 +432,7 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space(info) + if (ii%is_host()) call ii%sync_space() if (y%is_host()) call y%sync_space() !$acc parallel loop @@ -459,7 +459,7 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space(info) + if (ii%is_host()) call ii%sync_space() class default call psb_errpush(info, 'z_oacc_sctb_x') return @@ -475,8 +475,6 @@ contains call y%set_dev() end subroutine z_oacc_sctb_x - - subroutine z_oacc_sctb(n, idx, x, beta, y) use psb_base_mod implicit none @@ -498,7 +496,6 @@ contains call y%set_host() end subroutine z_oacc_sctb - subroutine z_oacc_gthzbuf(i, n, idx, x) use psb_base_mod implicit none @@ -515,7 +512,7 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space(info) + if (ii%is_host()) call ii%sync_space() class default call psb_errpush(info, 'z_oacc_gthzbuf') return @@ -542,7 +539,7 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space(info) + if (ii%is_host()) call ii%sync_space() class default call psb_errpush(info, 'z_oacc_gthzv_x') return @@ -577,7 +574,7 @@ contains select type(vval => val) type is (psb_z_vect_oacc) if (vval%is_host()) call vval%sync_space() - if (virl%is_host()) call virl%sync_space(info) + if (virl%is_host()) call virl%sync_space() if (x%is_host()) call x%sync_space() !$acc parallel loop do i = 1, n @@ -591,7 +588,7 @@ contains if (.not.done_oacc) then select type(virl => irl) type is (psb_i_vect_oacc) - if (virl%is_dev()) call virl%sync_space(info) + if (virl%is_dev()) call virl%sync_space() end select select type(vval => val) type is (psb_z_vect_oacc) @@ -607,8 +604,6 @@ contains end subroutine z_oacc_ins_v - - subroutine z_oacc_ins_a(n, irl, val, dupl, x, info) use psi_serial_mod implicit none @@ -628,8 +623,6 @@ contains end subroutine z_oacc_ins_a - - subroutine z_oacc_bld_mn(x, n) use psb_base_mod implicit none @@ -668,7 +661,6 @@ contains end subroutine z_oacc_bld_x - subroutine z_oacc_asb_m(n, x, info) use psb_base_mod implicit none @@ -696,8 +688,6 @@ contains end if end subroutine z_oacc_asb_m - - subroutine z_oacc_set_scal(x, val, first, last) class(psb_z_vect_oacc), intent(inout) :: x complex(psb_dpk_), intent(in) :: val @@ -718,8 +708,6 @@ contains call x%set_dev() end subroutine z_oacc_set_scal - - subroutine z_oacc_zero(x) use psi_serial_mod implicit none @@ -743,6 +731,7 @@ contains end function z_oacc_get_fmt + function z_oacc_vect_dot(n, x, y) result(res) implicit none class(psb_z_vect_oacc), intent(inout) :: x @@ -776,9 +765,6 @@ contains end function z_oacc_vect_dot - - - function z_oacc_dot_a(n, x, y) result(res) implicit none class(psb_z_vect_oacc), intent(inout) :: x @@ -910,7 +896,6 @@ contains end if end subroutine z_oacc_vect_all - subroutine z_oacc_vect_free(x, info) implicit none class(psb_z_vect_oacc), intent(inout) :: x From 2709aa9f16b6b1614ecea1a3516d7d7a91651c80 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Thu, 8 Aug 2024 11:10:04 +0200 Subject: [PATCH 27/39] Fix upd_xyz name --- openacc/psb_c_oacc_vect_mod.F90 | 6 +++--- openacc/psb_d_oacc_vect_mod.F90 | 6 +++--- openacc/psb_s_oacc_vect_mod.F90 | 6 +++--- openacc/psb_z_oacc_vect_mod.F90 | 6 +++--- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/openacc/psb_c_oacc_vect_mod.F90 b/openacc/psb_c_oacc_vect_mod.F90 index 9225f159..fc501e04 100644 --- a/openacc/psb_c_oacc_vect_mod.F90 +++ b/openacc/psb_c_oacc_vect_mod.F90 @@ -47,7 +47,7 @@ module psb_c_oacc_vect_mod procedure, pass(x) :: dot_a => c_oacc_dot_a procedure, pass(y) :: axpby_v => c_oacc_axpby_v procedure, pass(y) :: axpby_a => c_oacc_axpby_a - procedure, pass(z) :: abgdxyz => c_oacc_abgdxyz + procedure, pass(z) :: upd_xyz => c_oacc_upd_xyz procedure, pass(y) :: mlt_a => c_oacc_mlt_a procedure, pass(z) :: mlt_a_2 => c_oacc_mlt_a_2 procedure, pass(y) :: mlt_v => c_oacc_mlt_v @@ -364,7 +364,7 @@ contains call y%set_host() end subroutine c_oacc_axpby_a - subroutine c_oacc_abgdxyz(m, alpha, beta, gamma, delta, x, y, z, info) + subroutine c_oacc_upd_xyz(m, alpha, beta, gamma, delta, x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -414,7 +414,7 @@ contains call y%axpby(m, alpha, x, beta, info) call z%axpby(m, gamma, y, delta, info) end if - end subroutine c_oacc_abgdxyz + end subroutine c_oacc_upd_xyz subroutine c_oacc_sctb_buf(i, n, idx, beta, y) use psb_base_mod diff --git a/openacc/psb_d_oacc_vect_mod.F90 b/openacc/psb_d_oacc_vect_mod.F90 index 0dff0f27..bfb97b5c 100644 --- a/openacc/psb_d_oacc_vect_mod.F90 +++ b/openacc/psb_d_oacc_vect_mod.F90 @@ -47,7 +47,7 @@ module psb_d_oacc_vect_mod procedure, pass(x) :: dot_a => d_oacc_dot_a procedure, pass(y) :: axpby_v => d_oacc_axpby_v procedure, pass(y) :: axpby_a => d_oacc_axpby_a - procedure, pass(z) :: abgdxyz => d_oacc_abgdxyz + procedure, pass(z) :: upd_xyz => d_oacc_upd_xyz procedure, pass(y) :: mlt_a => d_oacc_mlt_a procedure, pass(z) :: mlt_a_2 => d_oacc_mlt_a_2 procedure, pass(y) :: mlt_v => d_oacc_mlt_v @@ -364,7 +364,7 @@ contains call y%set_host() end subroutine d_oacc_axpby_a - subroutine d_oacc_abgdxyz(m, alpha, beta, gamma, delta, x, y, z, info) + subroutine d_oacc_upd_xyz(m, alpha, beta, gamma, delta, x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -414,7 +414,7 @@ contains call y%axpby(m, alpha, x, beta, info) call z%axpby(m, gamma, y, delta, info) end if - end subroutine d_oacc_abgdxyz + end subroutine d_oacc_upd_xyz subroutine d_oacc_sctb_buf(i, n, idx, beta, y) use psb_base_mod diff --git a/openacc/psb_s_oacc_vect_mod.F90 b/openacc/psb_s_oacc_vect_mod.F90 index 5c34827d..47922d6a 100644 --- a/openacc/psb_s_oacc_vect_mod.F90 +++ b/openacc/psb_s_oacc_vect_mod.F90 @@ -47,7 +47,7 @@ module psb_s_oacc_vect_mod procedure, pass(x) :: dot_a => s_oacc_dot_a procedure, pass(y) :: axpby_v => s_oacc_axpby_v procedure, pass(y) :: axpby_a => s_oacc_axpby_a - procedure, pass(z) :: abgdxyz => s_oacc_abgdxyz + procedure, pass(z) :: upd_xyz => s_oacc_upd_xyz procedure, pass(y) :: mlt_a => s_oacc_mlt_a procedure, pass(z) :: mlt_a_2 => s_oacc_mlt_a_2 procedure, pass(y) :: mlt_v => s_oacc_mlt_v @@ -364,7 +364,7 @@ contains call y%set_host() end subroutine s_oacc_axpby_a - subroutine s_oacc_abgdxyz(m, alpha, beta, gamma, delta, x, y, z, info) + subroutine s_oacc_upd_xyz(m, alpha, beta, gamma, delta, x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -414,7 +414,7 @@ contains call y%axpby(m, alpha, x, beta, info) call z%axpby(m, gamma, y, delta, info) end if - end subroutine s_oacc_abgdxyz + end subroutine s_oacc_upd_xyz subroutine s_oacc_sctb_buf(i, n, idx, beta, y) use psb_base_mod diff --git a/openacc/psb_z_oacc_vect_mod.F90 b/openacc/psb_z_oacc_vect_mod.F90 index 0bac854a..be03b1cd 100644 --- a/openacc/psb_z_oacc_vect_mod.F90 +++ b/openacc/psb_z_oacc_vect_mod.F90 @@ -47,7 +47,7 @@ module psb_z_oacc_vect_mod procedure, pass(x) :: dot_a => z_oacc_dot_a procedure, pass(y) :: axpby_v => z_oacc_axpby_v procedure, pass(y) :: axpby_a => z_oacc_axpby_a - procedure, pass(z) :: abgdxyz => z_oacc_abgdxyz + procedure, pass(z) :: upd_xyz => z_oacc_upd_xyz procedure, pass(y) :: mlt_a => z_oacc_mlt_a procedure, pass(z) :: mlt_a_2 => z_oacc_mlt_a_2 procedure, pass(y) :: mlt_v => z_oacc_mlt_v @@ -364,7 +364,7 @@ contains call y%set_host() end subroutine z_oacc_axpby_a - subroutine z_oacc_abgdxyz(m, alpha, beta, gamma, delta, x, y, z, info) + subroutine z_oacc_upd_xyz(m, alpha, beta, gamma, delta, x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -414,7 +414,7 @@ contains call y%axpby(m, alpha, x, beta, info) call z%axpby(m, gamma, y, delta, info) end if - end subroutine z_oacc_abgdxyz + end subroutine z_oacc_upd_xyz subroutine z_oacc_sctb_buf(i, n, idx, beta, y) use psb_base_mod From cf2cc6cab9af7178ba4da7d30a7ee47a3adc7044 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Thu, 8 Aug 2024 14:34:28 +0200 Subject: [PATCH 28/39] Precedence of oacc_vect modules --- openacc/Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/openacc/Makefile b/openacc/Makefile index 035244d9..cdcc9f71 100644 --- a/openacc/Makefile +++ b/openacc/Makefile @@ -58,8 +58,9 @@ psb_oacc_mod.o : psb_i_oacc_vect_mod.o psb_l_oacc_vect_mod.o \ psb_z_oacc_ell_mat_mod.o psb_z_oacc_hll_mat_mod.o \ psb_oacc_env_mod.o -psb_s_oacc_vect_mod.o psb_d_oacc_vect_mod.o \ - psb_c_oacc_vect_mod.o psb_z_oacc_vect_mod.o : psb_i_oacc_vect_mod.o psb_l_oacc_vect_mod.o +psb_s_oacc_vect_mod.o psb_d_oacc_vect_mod.o\ + psb_c_oacc_vect_mod.o psb_z_oacc_vect_mod.o: psb_i_oacc_vect_mod.o psb_l_oacc_vect_mod.o +psb_l_oacc_vect_mod.o: psb_i_oacc_vect_mod.o psb_s_oacc_csr_mat_mod.o psb_s_oacc_ell_mat_mod.o psb_s_oacc_hll_mat_mod.o: psb_s_oacc_vect_mod.o From 82201407290cdbd35af47cbaae9f9ca7bf8d3a09 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Thu, 8 Aug 2024 14:59:09 +0200 Subject: [PATCH 29/39] Merged recent changes from development --- base/modules/auxil/psi_c_serial_mod.f90 | 8 ++++---- base/modules/auxil/psi_d_serial_mod.f90 | 8 ++++---- base/modules/auxil/psi_e_serial_mod.f90 | 8 ++++---- base/modules/auxil/psi_i2_serial_mod.f90 | 8 ++++---- base/modules/auxil/psi_m_serial_mod.f90 | 8 ++++---- base/modules/auxil/psi_s_serial_mod.f90 | 8 ++++---- base/modules/auxil/psi_z_serial_mod.f90 | 8 ++++---- base/modules/psblas/psb_c_psblas_mod.F90 | 8 ++++---- base/modules/psblas/psb_d_psblas_mod.F90 | 8 ++++---- base/modules/psblas/psb_s_psblas_mod.F90 | 8 ++++---- base/modules/psblas/psb_z_psblas_mod.F90 | 8 ++++---- base/modules/serial/psb_c_base_vect_mod.F90 | 14 +++++++------- base/modules/serial/psb_c_vect_mod.F90 | 9 +++++---- base/modules/serial/psb_d_base_vect_mod.F90 | 14 +++++++------- base/modules/serial/psb_d_vect_mod.F90 | 9 +++++---- base/modules/serial/psb_s_base_vect_mod.F90 | 14 +++++++------- base/modules/serial/psb_s_vect_mod.F90 | 9 +++++---- base/modules/serial/psb_z_base_vect_mod.F90 | 14 +++++++------- base/modules/serial/psb_z_vect_mod.F90 | 9 +++++---- base/psblas/psb_caxpby.f90 | 8 ++++---- base/psblas/psb_daxpby.f90 | 8 ++++---- base/psblas/psb_saxpby.f90 | 8 ++++---- base/psblas/psb_zaxpby.f90 | 8 ++++---- base/serial/psi_c_serial_impl.F90 | 8 ++++---- base/serial/psi_d_serial_impl.F90 | 8 ++++---- base/serial/psi_e_serial_impl.F90 | 8 ++++---- base/serial/psi_i2_serial_impl.F90 | 8 ++++---- base/serial/psi_m_serial_impl.F90 | 8 ++++---- base/serial/psi_s_serial_impl.F90 | 8 ++++---- base/serial/psi_z_serial_impl.F90 | 8 ++++---- cuda/psb_c_cuda_vect_mod.F90 | 8 ++++---- cuda/psb_c_vectordev_mod.F90 | 8 ++++---- cuda/psb_d_cuda_vect_mod.F90 | 8 ++++---- cuda/psb_d_vectordev_mod.F90 | 8 ++++---- cuda/psb_s_cuda_vect_mod.F90 | 8 ++++---- cuda/psb_s_vectordev_mod.F90 | 8 ++++---- cuda/psb_z_cuda_vect_mod.F90 | 8 ++++---- cuda/psb_z_vectordev_mod.F90 | 8 ++++---- 38 files changed, 168 insertions(+), 164 deletions(-) diff --git a/base/modules/auxil/psi_c_serial_mod.f90 b/base/modules/auxil/psi_c_serial_mod.f90 index 38b740a7..3fe001c8 100644 --- a/base/modules/auxil/psi_c_serial_mod.f90 +++ b/base/modules/auxil/psi_c_serial_mod.f90 @@ -99,8 +99,8 @@ module psi_c_serial_mod end subroutine psi_caxpbyv2 end interface psb_geaxpby - interface psi_abgdxyz - subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + interface psi_upd_xyz + subroutine psi_c_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) import :: psb_ipk_, psb_spk_ implicit none integer(psb_ipk_), intent(in) :: m @@ -109,8 +109,8 @@ module psi_c_serial_mod complex(psb_spk_), intent (inout) :: z(:) complex(psb_spk_), intent (in) :: alpha, beta,gamma,delta integer(psb_ipk_), intent(out) :: info - end subroutine psi_cabgdxyz - end interface psi_abgdxyz + end subroutine psi_c_upd_xyz + end interface psi_upd_xyz interface psi_xyzw subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info) diff --git a/base/modules/auxil/psi_d_serial_mod.f90 b/base/modules/auxil/psi_d_serial_mod.f90 index 1d65c5f6..a08263df 100644 --- a/base/modules/auxil/psi_d_serial_mod.f90 +++ b/base/modules/auxil/psi_d_serial_mod.f90 @@ -99,8 +99,8 @@ module psi_d_serial_mod end subroutine psi_daxpbyv2 end interface psb_geaxpby - interface psi_abgdxyz - subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + interface psi_upd_xyz + subroutine psi_d_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) import :: psb_ipk_, psb_dpk_ implicit none integer(psb_ipk_), intent(in) :: m @@ -109,8 +109,8 @@ module psi_d_serial_mod real(psb_dpk_), intent (inout) :: z(:) real(psb_dpk_), intent (in) :: alpha, beta,gamma,delta integer(psb_ipk_), intent(out) :: info - end subroutine psi_dabgdxyz - end interface psi_abgdxyz + end subroutine psi_d_upd_xyz + end interface psi_upd_xyz interface psi_xyzw subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info) diff --git a/base/modules/auxil/psi_e_serial_mod.f90 b/base/modules/auxil/psi_e_serial_mod.f90 index 6f4e8c06..1f1bebd7 100644 --- a/base/modules/auxil/psi_e_serial_mod.f90 +++ b/base/modules/auxil/psi_e_serial_mod.f90 @@ -99,8 +99,8 @@ module psi_e_serial_mod end subroutine psi_eaxpbyv2 end interface psb_geaxpby - interface psi_abgdxyz - subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + interface psi_upd_xyz + subroutine psi_e_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none integer(psb_ipk_), intent(in) :: m @@ -109,8 +109,8 @@ module psi_e_serial_mod integer(psb_epk_), intent (inout) :: z(:) integer(psb_epk_), intent (in) :: alpha, beta,gamma,delta integer(psb_ipk_), intent(out) :: info - end subroutine psi_eabgdxyz - end interface psi_abgdxyz + end subroutine psi_e_upd_xyz + end interface psi_upd_xyz interface psi_xyzw subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info) diff --git a/base/modules/auxil/psi_i2_serial_mod.f90 b/base/modules/auxil/psi_i2_serial_mod.f90 index ffa14059..770d3256 100644 --- a/base/modules/auxil/psi_i2_serial_mod.f90 +++ b/base/modules/auxil/psi_i2_serial_mod.f90 @@ -99,8 +99,8 @@ module psi_i2_serial_mod end subroutine psi_i2axpbyv2 end interface psb_geaxpby - interface psi_abgdxyz - subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + interface psi_upd_xyz + subroutine psi_i2_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none integer(psb_ipk_), intent(in) :: m @@ -109,8 +109,8 @@ module psi_i2_serial_mod integer(psb_i2pk_), intent (inout) :: z(:) integer(psb_i2pk_), intent (in) :: alpha, beta,gamma,delta integer(psb_ipk_), intent(out) :: info - end subroutine psi_i2abgdxyz - end interface psi_abgdxyz + end subroutine psi_i2_upd_xyz + end interface psi_upd_xyz interface psi_xyzw subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info) diff --git a/base/modules/auxil/psi_m_serial_mod.f90 b/base/modules/auxil/psi_m_serial_mod.f90 index 5661fdbf..3583cccc 100644 --- a/base/modules/auxil/psi_m_serial_mod.f90 +++ b/base/modules/auxil/psi_m_serial_mod.f90 @@ -99,8 +99,8 @@ module psi_m_serial_mod end subroutine psi_maxpbyv2 end interface psb_geaxpby - interface psi_abgdxyz - subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + interface psi_upd_xyz + subroutine psi_m_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_ implicit none integer(psb_ipk_), intent(in) :: m @@ -109,8 +109,8 @@ module psi_m_serial_mod integer(psb_mpk_), intent (inout) :: z(:) integer(psb_mpk_), intent (in) :: alpha, beta,gamma,delta integer(psb_ipk_), intent(out) :: info - end subroutine psi_mabgdxyz - end interface psi_abgdxyz + end subroutine psi_m_upd_xyz + end interface psi_upd_xyz interface psi_xyzw subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info) diff --git a/base/modules/auxil/psi_s_serial_mod.f90 b/base/modules/auxil/psi_s_serial_mod.f90 index 5cc17d58..3e0c6d91 100644 --- a/base/modules/auxil/psi_s_serial_mod.f90 +++ b/base/modules/auxil/psi_s_serial_mod.f90 @@ -99,8 +99,8 @@ module psi_s_serial_mod end subroutine psi_saxpbyv2 end interface psb_geaxpby - interface psi_abgdxyz - subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + interface psi_upd_xyz + subroutine psi_s_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) import :: psb_ipk_, psb_spk_ implicit none integer(psb_ipk_), intent(in) :: m @@ -109,8 +109,8 @@ module psi_s_serial_mod real(psb_spk_), intent (inout) :: z(:) real(psb_spk_), intent (in) :: alpha, beta,gamma,delta integer(psb_ipk_), intent(out) :: info - end subroutine psi_sabgdxyz - end interface psi_abgdxyz + end subroutine psi_s_upd_xyz + end interface psi_upd_xyz interface psi_xyzw subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info) diff --git a/base/modules/auxil/psi_z_serial_mod.f90 b/base/modules/auxil/psi_z_serial_mod.f90 index 8a3f053d..a8ea734e 100644 --- a/base/modules/auxil/psi_z_serial_mod.f90 +++ b/base/modules/auxil/psi_z_serial_mod.f90 @@ -99,8 +99,8 @@ module psi_z_serial_mod end subroutine psi_zaxpbyv2 end interface psb_geaxpby - interface psi_abgdxyz - subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + interface psi_upd_xyz + subroutine psi_z_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) import :: psb_ipk_, psb_dpk_ implicit none integer(psb_ipk_), intent(in) :: m @@ -109,8 +109,8 @@ module psi_z_serial_mod complex(psb_dpk_), intent (inout) :: z(:) complex(psb_dpk_), intent (in) :: alpha, beta,gamma,delta integer(psb_ipk_), intent(out) :: info - end subroutine psi_zabgdxyz - end interface psi_abgdxyz + end subroutine psi_z_upd_xyz + end interface psi_upd_xyz interface psi_xyzw subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info) diff --git a/base/modules/psblas/psb_c_psblas_mod.F90 b/base/modules/psblas/psb_c_psblas_mod.F90 index 7f7f937c..591dec09 100644 --- a/base/modules/psblas/psb_c_psblas_mod.F90 +++ b/base/modules/psblas/psb_c_psblas_mod.F90 @@ -143,8 +143,8 @@ module psb_c_psblas_mod end subroutine psb_caxpby end interface - interface psb_abgdxyz - subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& + interface psb_upd_xyz + subroutine psb_c_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) import :: psb_desc_type, psb_spk_, psb_ipk_, & & psb_c_vect_type, psb_cspmat_type @@ -154,8 +154,8 @@ module psb_c_psblas_mod complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta type(psb_desc_type), intent (in) :: desc_a integer(psb_ipk_), intent(out) :: info - end subroutine psb_cabgdxyz_vect - end interface psb_abgdxyz + end subroutine psb_c_upd_xyz_vect + end interface psb_upd_xyz interface psb_geamax function psb_camax(x, desc_a, info, jx,global) diff --git a/base/modules/psblas/psb_d_psblas_mod.F90 b/base/modules/psblas/psb_d_psblas_mod.F90 index 12090956..b200bc8a 100644 --- a/base/modules/psblas/psb_d_psblas_mod.F90 +++ b/base/modules/psblas/psb_d_psblas_mod.F90 @@ -143,8 +143,8 @@ module psb_d_psblas_mod end subroutine psb_daxpby end interface - interface psb_abgdxyz - subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& + interface psb_upd_xyz + subroutine psb_d_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) import :: psb_desc_type, psb_dpk_, psb_ipk_, & & psb_d_vect_type, psb_dspmat_type @@ -154,8 +154,8 @@ module psb_d_psblas_mod real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta type(psb_desc_type), intent (in) :: desc_a integer(psb_ipk_), intent(out) :: info - end subroutine psb_dabgdxyz_vect - end interface psb_abgdxyz + end subroutine psb_d_upd_xyz_vect + end interface psb_upd_xyz interface psb_geamax function psb_damax(x, desc_a, info, jx,global) diff --git a/base/modules/psblas/psb_s_psblas_mod.F90 b/base/modules/psblas/psb_s_psblas_mod.F90 index 7a7ce783..a60da025 100644 --- a/base/modules/psblas/psb_s_psblas_mod.F90 +++ b/base/modules/psblas/psb_s_psblas_mod.F90 @@ -143,8 +143,8 @@ module psb_s_psblas_mod end subroutine psb_saxpby end interface - interface psb_abgdxyz - subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& + interface psb_upd_xyz + subroutine psb_s_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) import :: psb_desc_type, psb_spk_, psb_ipk_, & & psb_s_vect_type, psb_sspmat_type @@ -154,8 +154,8 @@ module psb_s_psblas_mod real(psb_spk_), intent (in) :: alpha, beta, gamma, delta type(psb_desc_type), intent (in) :: desc_a integer(psb_ipk_), intent(out) :: info - end subroutine psb_sabgdxyz_vect - end interface psb_abgdxyz + end subroutine psb_s_upd_xyz_vect + end interface psb_upd_xyz interface psb_geamax function psb_samax(x, desc_a, info, jx,global) diff --git a/base/modules/psblas/psb_z_psblas_mod.F90 b/base/modules/psblas/psb_z_psblas_mod.F90 index bcfe9caa..241df2b9 100644 --- a/base/modules/psblas/psb_z_psblas_mod.F90 +++ b/base/modules/psblas/psb_z_psblas_mod.F90 @@ -143,8 +143,8 @@ module psb_z_psblas_mod end subroutine psb_zaxpby end interface - interface psb_abgdxyz - subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& + interface psb_upd_xyz + subroutine psb_z_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) import :: psb_desc_type, psb_dpk_, psb_ipk_, & & psb_z_vect_type, psb_zspmat_type @@ -154,8 +154,8 @@ module psb_z_psblas_mod complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta type(psb_desc_type), intent (in) :: desc_a integer(psb_ipk_), intent(out) :: info - end subroutine psb_zabgdxyz_vect - end interface psb_abgdxyz + end subroutine psb_z_upd_xyz_vect + end interface psb_upd_xyz interface psb_geamax function psb_zamax(x, desc_a, info, jx,global) diff --git a/base/modules/serial/psb_c_base_vect_mod.F90 b/base/modules/serial/psb_c_base_vect_mod.F90 index 41bab5ab..4dac86d6 100644 --- a/base/modules/serial/psb_c_base_vect_mod.F90 +++ b/base/modules/serial/psb_c_base_vect_mod.F90 @@ -155,7 +155,7 @@ module psb_c_base_vect_mod procedure, pass(z) :: axpby_v2 => c_base_axpby_v2 procedure, pass(z) :: axpby_a2 => c_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 - procedure, pass(z) :: abgdxyz => c_base_abgdxyz + procedure, pass(z) :: upd_xyz => c_base_upd_xyz procedure, pass(w) :: xyzw => c_base_xyzw ! @@ -1130,12 +1130,12 @@ contains end subroutine c_base_axpby_a2 ! - ! ABGDXYZ is invoked via Z, hence the structure below. + ! UPD_XYZ is invoked via Z, hence the structure below. ! ! - !> Function base_abgdxyz + !> Function base_upd_xyz !! \memberof psb_c_base_vect_type - !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta + !! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param beta scalar beta @@ -1146,7 +1146,7 @@ contains !! \param z The class(base_vect) to be added !! \param info return code !! - subroutine c_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + subroutine c_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -1159,11 +1159,11 @@ contains if (x%is_dev().and.(alpha/=czero)) call x%sync() if (y%is_dev().and.(beta/=czero)) call y%sync() if (z%is_dev().and.(delta/=czero)) call z%sync() - call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) call y%set_host() call z%set_host() - end subroutine c_base_abgdxyz + end subroutine c_base_upd_xyz subroutine c_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info) use psi_serial_mod diff --git a/base/modules/serial/psb_c_vect_mod.F90 b/base/modules/serial/psb_c_vect_mod.F90 index faf1733f..1e9510f2 100644 --- a/base/modules/serial/psb_c_vect_mod.F90 +++ b/base/modules/serial/psb_c_vect_mod.F90 @@ -102,8 +102,9 @@ module psb_c_vect_mod procedure, pass(z) :: axpby_v2 => c_vect_axpby_v2 procedure, pass(z) :: axpby_a2 => c_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 - procedure, pass(z) :: abgdxyz => c_vect_abgdxyz + procedure, pass(z) :: upd_xyz => c_vect_upd_xyz procedure, pass(z) :: xyzw => c_vect_xyzw + procedure, pass(y) :: mlt_v => c_vect_mlt_v procedure, pass(y) :: mlt_a => c_vect_mlt_a procedure, pass(z) :: mlt_a_2 => c_vect_mlt_a_2 @@ -773,7 +774,7 @@ contains end subroutine c_vect_axpby_a2 - subroutine c_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info) + subroutine c_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -784,9 +785,9 @@ contains integer(psb_ipk_), intent(out) :: info if (allocated(z%v)) & - call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info) + call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info) - end subroutine c_vect_abgdxyz + end subroutine c_vect_upd_xyz subroutine c_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info) use psi_serial_mod diff --git a/base/modules/serial/psb_d_base_vect_mod.F90 b/base/modules/serial/psb_d_base_vect_mod.F90 index 1ad1ffa5..f07b5aed 100644 --- a/base/modules/serial/psb_d_base_vect_mod.F90 +++ b/base/modules/serial/psb_d_base_vect_mod.F90 @@ -155,7 +155,7 @@ module psb_d_base_vect_mod procedure, pass(z) :: axpby_v2 => d_base_axpby_v2 procedure, pass(z) :: axpby_a2 => d_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 - procedure, pass(z) :: abgdxyz => d_base_abgdxyz + procedure, pass(z) :: upd_xyz => d_base_upd_xyz procedure, pass(w) :: xyzw => d_base_xyzw ! @@ -1137,12 +1137,12 @@ contains end subroutine d_base_axpby_a2 ! - ! ABGDXYZ is invoked via Z, hence the structure below. + ! UPD_XYZ is invoked via Z, hence the structure below. ! ! - !> Function base_abgdxyz + !> Function base_upd_xyz !! \memberof psb_d_base_vect_type - !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta + !! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param beta scalar beta @@ -1153,7 +1153,7 @@ contains !! \param z The class(base_vect) to be added !! \param info return code !! - subroutine d_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + subroutine d_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -1166,11 +1166,11 @@ contains if (x%is_dev().and.(alpha/=dzero)) call x%sync() if (y%is_dev().and.(beta/=dzero)) call y%sync() if (z%is_dev().and.(delta/=dzero)) call z%sync() - call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) call y%set_host() call z%set_host() - end subroutine d_base_abgdxyz + end subroutine d_base_upd_xyz subroutine d_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info) use psi_serial_mod diff --git a/base/modules/serial/psb_d_vect_mod.F90 b/base/modules/serial/psb_d_vect_mod.F90 index a54ddfdb..ae3062dd 100644 --- a/base/modules/serial/psb_d_vect_mod.F90 +++ b/base/modules/serial/psb_d_vect_mod.F90 @@ -102,8 +102,9 @@ module psb_d_vect_mod procedure, pass(z) :: axpby_v2 => d_vect_axpby_v2 procedure, pass(z) :: axpby_a2 => d_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 - procedure, pass(z) :: abgdxyz => d_vect_abgdxyz + procedure, pass(z) :: upd_xyz => d_vect_upd_xyz procedure, pass(z) :: xyzw => d_vect_xyzw + procedure, pass(y) :: mlt_v => d_vect_mlt_v procedure, pass(y) :: mlt_a => d_vect_mlt_a procedure, pass(z) :: mlt_a_2 => d_vect_mlt_a_2 @@ -780,7 +781,7 @@ contains end subroutine d_vect_axpby_a2 - subroutine d_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info) + subroutine d_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -791,9 +792,9 @@ contains integer(psb_ipk_), intent(out) :: info if (allocated(z%v)) & - call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info) + call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info) - end subroutine d_vect_abgdxyz + end subroutine d_vect_upd_xyz subroutine d_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info) use psi_serial_mod diff --git a/base/modules/serial/psb_s_base_vect_mod.F90 b/base/modules/serial/psb_s_base_vect_mod.F90 index 26b82c31..596cd634 100644 --- a/base/modules/serial/psb_s_base_vect_mod.F90 +++ b/base/modules/serial/psb_s_base_vect_mod.F90 @@ -155,7 +155,7 @@ module psb_s_base_vect_mod procedure, pass(z) :: axpby_v2 => s_base_axpby_v2 procedure, pass(z) :: axpby_a2 => s_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 - procedure, pass(z) :: abgdxyz => s_base_abgdxyz + procedure, pass(z) :: upd_xyz => s_base_upd_xyz procedure, pass(w) :: xyzw => s_base_xyzw ! @@ -1137,12 +1137,12 @@ contains end subroutine s_base_axpby_a2 ! - ! ABGDXYZ is invoked via Z, hence the structure below. + ! UPD_XYZ is invoked via Z, hence the structure below. ! ! - !> Function base_abgdxyz + !> Function base_upd_xyz !! \memberof psb_s_base_vect_type - !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta + !! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param beta scalar beta @@ -1153,7 +1153,7 @@ contains !! \param z The class(base_vect) to be added !! \param info return code !! - subroutine s_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + subroutine s_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -1166,11 +1166,11 @@ contains if (x%is_dev().and.(alpha/=szero)) call x%sync() if (y%is_dev().and.(beta/=szero)) call y%sync() if (z%is_dev().and.(delta/=szero)) call z%sync() - call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) call y%set_host() call z%set_host() - end subroutine s_base_abgdxyz + end subroutine s_base_upd_xyz subroutine s_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info) use psi_serial_mod diff --git a/base/modules/serial/psb_s_vect_mod.F90 b/base/modules/serial/psb_s_vect_mod.F90 index 901c6103..cad4659c 100644 --- a/base/modules/serial/psb_s_vect_mod.F90 +++ b/base/modules/serial/psb_s_vect_mod.F90 @@ -102,8 +102,9 @@ module psb_s_vect_mod procedure, pass(z) :: axpby_v2 => s_vect_axpby_v2 procedure, pass(z) :: axpby_a2 => s_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 - procedure, pass(z) :: abgdxyz => s_vect_abgdxyz + procedure, pass(z) :: upd_xyz => s_vect_upd_xyz procedure, pass(z) :: xyzw => s_vect_xyzw + procedure, pass(y) :: mlt_v => s_vect_mlt_v procedure, pass(y) :: mlt_a => s_vect_mlt_a procedure, pass(z) :: mlt_a_2 => s_vect_mlt_a_2 @@ -780,7 +781,7 @@ contains end subroutine s_vect_axpby_a2 - subroutine s_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info) + subroutine s_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -791,9 +792,9 @@ contains integer(psb_ipk_), intent(out) :: info if (allocated(z%v)) & - call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info) + call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info) - end subroutine s_vect_abgdxyz + end subroutine s_vect_upd_xyz subroutine s_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info) use psi_serial_mod diff --git a/base/modules/serial/psb_z_base_vect_mod.F90 b/base/modules/serial/psb_z_base_vect_mod.F90 index a3afc9c1..1bbdfba1 100644 --- a/base/modules/serial/psb_z_base_vect_mod.F90 +++ b/base/modules/serial/psb_z_base_vect_mod.F90 @@ -155,7 +155,7 @@ module psb_z_base_vect_mod procedure, pass(z) :: axpby_v2 => z_base_axpby_v2 procedure, pass(z) :: axpby_a2 => z_base_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 - procedure, pass(z) :: abgdxyz => z_base_abgdxyz + procedure, pass(z) :: upd_xyz => z_base_upd_xyz procedure, pass(w) :: xyzw => z_base_xyzw ! @@ -1130,12 +1130,12 @@ contains end subroutine z_base_axpby_a2 ! - ! ABGDXYZ is invoked via Z, hence the structure below. + ! UPD_XYZ is invoked via Z, hence the structure below. ! ! - !> Function base_abgdxyz + !> Function base_upd_xyz !! \memberof psb_z_base_vect_type - !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta + !! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta !! \param m Number of entries to be considered !! \param alpha scalar alpha !! \param beta scalar beta @@ -1146,7 +1146,7 @@ contains !! \param z The class(base_vect) to be added !! \param info return code !! - subroutine z_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + subroutine z_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -1159,11 +1159,11 @@ contains if (x%is_dev().and.(alpha/=zzero)) call x%sync() if (y%is_dev().and.(beta/=zzero)) call y%sync() if (z%is_dev().and.(delta/=zzero)) call z%sync() - call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) + call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info) call y%set_host() call z%set_host() - end subroutine z_base_abgdxyz + end subroutine z_base_upd_xyz subroutine z_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info) use psi_serial_mod diff --git a/base/modules/serial/psb_z_vect_mod.F90 b/base/modules/serial/psb_z_vect_mod.F90 index 8235d1d4..48f2e947 100644 --- a/base/modules/serial/psb_z_vect_mod.F90 +++ b/base/modules/serial/psb_z_vect_mod.F90 @@ -102,8 +102,9 @@ module psb_z_vect_mod procedure, pass(z) :: axpby_v2 => z_vect_axpby_v2 procedure, pass(z) :: axpby_a2 => z_vect_axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 - procedure, pass(z) :: abgdxyz => z_vect_abgdxyz + procedure, pass(z) :: upd_xyz => z_vect_upd_xyz procedure, pass(z) :: xyzw => z_vect_xyzw + procedure, pass(y) :: mlt_v => z_vect_mlt_v procedure, pass(y) :: mlt_a => z_vect_mlt_a procedure, pass(z) :: mlt_a_2 => z_vect_mlt_a_2 @@ -773,7 +774,7 @@ contains end subroutine z_vect_axpby_a2 - subroutine z_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info) + subroutine z_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -784,9 +785,9 @@ contains integer(psb_ipk_), intent(out) :: info if (allocated(z%v)) & - call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info) + call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info) - end subroutine z_vect_abgdxyz + end subroutine z_vect_upd_xyz subroutine z_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info) use psi_serial_mod diff --git a/base/psblas/psb_caxpby.f90 b/base/psblas/psb_caxpby.f90 index 7c22bb06..5d80ef00 100644 --- a/base/psblas/psb_caxpby.f90 +++ b/base/psblas/psb_caxpby.f90 @@ -743,9 +743,9 @@ subroutine psb_caddconst_vect(x,b,z,desc_a,info) end subroutine psb_caddconst_vect -subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& +subroutine psb_c_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) - use psb_base_mod, psb_protect_name => psb_cabgdxyz_vect + use psb_base_mod, psb_protect_name => psb_c_upd_xyz_vect implicit none type(psb_c_vect_type), intent (inout) :: x type(psb_c_vect_type), intent (inout) :: y @@ -812,7 +812,7 @@ subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& end if if(desc_a%get_local_rows() > 0) then - call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info) + call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info) end if call psb_erractionrestore(err_act) @@ -822,5 +822,5 @@ subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& return -end subroutine psb_cabgdxyz_vect +end subroutine psb_c_upd_xyz_vect diff --git a/base/psblas/psb_daxpby.f90 b/base/psblas/psb_daxpby.f90 index 1de77647..38ebe465 100644 --- a/base/psblas/psb_daxpby.f90 +++ b/base/psblas/psb_daxpby.f90 @@ -743,9 +743,9 @@ subroutine psb_daddconst_vect(x,b,z,desc_a,info) end subroutine psb_daddconst_vect -subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& +subroutine psb_d_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) - use psb_base_mod, psb_protect_name => psb_dabgdxyz_vect + use psb_base_mod, psb_protect_name => psb_d_upd_xyz_vect implicit none type(psb_d_vect_type), intent (inout) :: x type(psb_d_vect_type), intent (inout) :: y @@ -812,7 +812,7 @@ subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& end if if(desc_a%get_local_rows() > 0) then - call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info) + call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info) end if call psb_erractionrestore(err_act) @@ -822,5 +822,5 @@ subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& return -end subroutine psb_dabgdxyz_vect +end subroutine psb_d_upd_xyz_vect diff --git a/base/psblas/psb_saxpby.f90 b/base/psblas/psb_saxpby.f90 index 1b1f24e6..0055fdbe 100644 --- a/base/psblas/psb_saxpby.f90 +++ b/base/psblas/psb_saxpby.f90 @@ -743,9 +743,9 @@ subroutine psb_saddconst_vect(x,b,z,desc_a,info) end subroutine psb_saddconst_vect -subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& +subroutine psb_s_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) - use psb_base_mod, psb_protect_name => psb_sabgdxyz_vect + use psb_base_mod, psb_protect_name => psb_s_upd_xyz_vect implicit none type(psb_s_vect_type), intent (inout) :: x type(psb_s_vect_type), intent (inout) :: y @@ -812,7 +812,7 @@ subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& end if if(desc_a%get_local_rows() > 0) then - call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info) + call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info) end if call psb_erractionrestore(err_act) @@ -822,5 +822,5 @@ subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& return -end subroutine psb_sabgdxyz_vect +end subroutine psb_s_upd_xyz_vect diff --git a/base/psblas/psb_zaxpby.f90 b/base/psblas/psb_zaxpby.f90 index 0f37a1f4..e93488e3 100644 --- a/base/psblas/psb_zaxpby.f90 +++ b/base/psblas/psb_zaxpby.f90 @@ -743,9 +743,9 @@ subroutine psb_zaddconst_vect(x,b,z,desc_a,info) end subroutine psb_zaddconst_vect -subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& +subroutine psb_z_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,& & desc_a, info) - use psb_base_mod, psb_protect_name => psb_zabgdxyz_vect + use psb_base_mod, psb_protect_name => psb_z_upd_xyz_vect implicit none type(psb_z_vect_type), intent (inout) :: x type(psb_z_vect_type), intent (inout) :: y @@ -812,7 +812,7 @@ subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& end if if(desc_a%get_local_rows() > 0) then - call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info) + call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info) end if call psb_erractionrestore(err_act) @@ -822,5 +822,5 @@ subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,& return -end subroutine psb_zabgdxyz_vect +end subroutine psb_z_upd_xyz_vect diff --git a/base/serial/psi_c_serial_impl.F90 b/base/serial/psi_c_serial_impl.F90 index e230a1e0..e3f1d9a3 100644 --- a/base/serial/psi_c_serial_impl.F90 +++ b/base/serial/psi_c_serial_impl.F90 @@ -1568,7 +1568,7 @@ subroutine caxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) end subroutine caxpbyv2 -subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) +subroutine psi_c_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psb_const_mod use psb_error_mod implicit none @@ -1582,7 +1582,7 @@ subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='cabgdxyz' + name='c_upd_xyz' info = psb_success_ if (m.lt.0) then @@ -1791,7 +1791,7 @@ subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_serror() return -end subroutine psi_cabgdxyz +end subroutine psi_c_upd_xyz subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info) use psb_const_mod @@ -1808,7 +1808,7 @@ subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='cabgdxyz' + name='c_xyzw' info = psb_success_ if (m.lt.0) then diff --git a/base/serial/psi_d_serial_impl.F90 b/base/serial/psi_d_serial_impl.F90 index bf1b2917..d6a9a31d 100644 --- a/base/serial/psi_d_serial_impl.F90 +++ b/base/serial/psi_d_serial_impl.F90 @@ -1568,7 +1568,7 @@ subroutine daxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) end subroutine daxpbyv2 -subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) +subroutine psi_d_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psb_const_mod use psb_error_mod implicit none @@ -1582,7 +1582,7 @@ subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='dabgdxyz' + name='d_upd_xyz' info = psb_success_ if (m.lt.0) then @@ -1791,7 +1791,7 @@ subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_serror() return -end subroutine psi_dabgdxyz +end subroutine psi_d_upd_xyz subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info) use psb_const_mod @@ -1808,7 +1808,7 @@ subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='dabgdxyz' + name='d_xyzw' info = psb_success_ if (m.lt.0) then diff --git a/base/serial/psi_e_serial_impl.F90 b/base/serial/psi_e_serial_impl.F90 index 911ab4ec..37b11a94 100644 --- a/base/serial/psi_e_serial_impl.F90 +++ b/base/serial/psi_e_serial_impl.F90 @@ -1568,7 +1568,7 @@ subroutine eaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) end subroutine eaxpbyv2 -subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) +subroutine psi_e_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psb_const_mod use psb_error_mod implicit none @@ -1582,7 +1582,7 @@ subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='eabgdxyz' + name='e_upd_xyz' info = psb_success_ if (m.lt.0) then @@ -1791,7 +1791,7 @@ subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_serror() return -end subroutine psi_eabgdxyz +end subroutine psi_e_upd_xyz subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info) use psb_const_mod @@ -1808,7 +1808,7 @@ subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='eabgdxyz' + name='e_xyzw' info = psb_success_ if (m.lt.0) then diff --git a/base/serial/psi_i2_serial_impl.F90 b/base/serial/psi_i2_serial_impl.F90 index fb42dfcd..c20cd60b 100644 --- a/base/serial/psi_i2_serial_impl.F90 +++ b/base/serial/psi_i2_serial_impl.F90 @@ -1568,7 +1568,7 @@ subroutine i2axpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) end subroutine i2axpbyv2 -subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) +subroutine psi_i2_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psb_const_mod use psb_error_mod implicit none @@ -1582,7 +1582,7 @@ subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='i2abgdxyz' + name='i2_upd_xyz' info = psb_success_ if (m.lt.0) then @@ -1791,7 +1791,7 @@ subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_serror() return -end subroutine psi_i2abgdxyz +end subroutine psi_i2_upd_xyz subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info) use psb_const_mod @@ -1808,7 +1808,7 @@ subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='i2abgdxyz' + name='i2_xyzw' info = psb_success_ if (m.lt.0) then diff --git a/base/serial/psi_m_serial_impl.F90 b/base/serial/psi_m_serial_impl.F90 index 346fd897..55913a16 100644 --- a/base/serial/psi_m_serial_impl.F90 +++ b/base/serial/psi_m_serial_impl.F90 @@ -1568,7 +1568,7 @@ subroutine maxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) end subroutine maxpbyv2 -subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) +subroutine psi_m_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psb_const_mod use psb_error_mod implicit none @@ -1582,7 +1582,7 @@ subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='mabgdxyz' + name='m_upd_xyz' info = psb_success_ if (m.lt.0) then @@ -1791,7 +1791,7 @@ subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_serror() return -end subroutine psi_mabgdxyz +end subroutine psi_m_upd_xyz subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info) use psb_const_mod @@ -1808,7 +1808,7 @@ subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='mabgdxyz' + name='m_xyzw' info = psb_success_ if (m.lt.0) then diff --git a/base/serial/psi_s_serial_impl.F90 b/base/serial/psi_s_serial_impl.F90 index 52f86bcd..c3846c8e 100644 --- a/base/serial/psi_s_serial_impl.F90 +++ b/base/serial/psi_s_serial_impl.F90 @@ -1568,7 +1568,7 @@ subroutine saxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) end subroutine saxpbyv2 -subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) +subroutine psi_s_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psb_const_mod use psb_error_mod implicit none @@ -1582,7 +1582,7 @@ subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='sabgdxyz' + name='s_upd_xyz' info = psb_success_ if (m.lt.0) then @@ -1791,7 +1791,7 @@ subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_serror() return -end subroutine psi_sabgdxyz +end subroutine psi_s_upd_xyz subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info) use psb_const_mod @@ -1808,7 +1808,7 @@ subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='sabgdxyz' + name='s_xyzw' info = psb_success_ if (m.lt.0) then diff --git a/base/serial/psi_z_serial_impl.F90 b/base/serial/psi_z_serial_impl.F90 index 7e680273..763eae22 100644 --- a/base/serial/psi_z_serial_impl.F90 +++ b/base/serial/psi_z_serial_impl.F90 @@ -1568,7 +1568,7 @@ subroutine zaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info) end subroutine zaxpbyv2 -subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) +subroutine psi_z_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psb_const_mod use psb_error_mod implicit none @@ -1582,7 +1582,7 @@ subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='zabgdxyz' + name='z_upd_xyz' info = psb_success_ if (m.lt.0) then @@ -1791,7 +1791,7 @@ subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) call fcpsb_serror() return -end subroutine psi_zabgdxyz +end subroutine psi_z_upd_xyz subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info) use psb_const_mod @@ -1808,7 +1808,7 @@ subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info) integer(psb_ipk_) :: i integer(psb_ipk_) :: int_err(5) character name*20 - name='zabgdxyz' + name='z_xyzw' info = psb_success_ if (m.lt.0) then diff --git a/cuda/psb_c_cuda_vect_mod.F90 b/cuda/psb_c_cuda_vect_mod.F90 index 2c2a4f61..45fafe0a 100644 --- a/cuda/psb_c_cuda_vect_mod.F90 +++ b/cuda/psb_c_cuda_vect_mod.F90 @@ -90,7 +90,7 @@ module psb_c_cuda_vect_mod procedure, pass(x) :: dot_a => c_cuda_dot_a procedure, pass(y) :: axpby_v => c_cuda_axpby_v procedure, pass(y) :: axpby_a => c_cuda_axpby_a - procedure, pass(z) :: abgdxyz => c_cuda_abgdxyz + procedure, pass(z) :: upd_xyz => c_cuda_upd_xyz procedure, pass(y) :: mlt_v => c_cuda_mlt_v procedure, pass(y) :: mlt_a => c_cuda_mlt_a procedure, pass(z) :: mlt_a_2 => c_cuda_mlt_a_2 @@ -912,7 +912,7 @@ contains end subroutine c_cuda_axpby_v - subroutine c_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + subroutine c_cuda_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -946,7 +946,7 @@ contains if ((nx d_cuda_dot_a procedure, pass(y) :: axpby_v => d_cuda_axpby_v procedure, pass(y) :: axpby_a => d_cuda_axpby_a - procedure, pass(z) :: abgdxyz => d_cuda_abgdxyz + procedure, pass(z) :: upd_xyz => d_cuda_upd_xyz procedure, pass(y) :: mlt_v => d_cuda_mlt_v procedure, pass(y) :: mlt_a => d_cuda_mlt_a procedure, pass(z) :: mlt_a_2 => d_cuda_mlt_a_2 @@ -912,7 +912,7 @@ contains end subroutine d_cuda_axpby_v - subroutine d_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + subroutine d_cuda_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -946,7 +946,7 @@ contains if ((nx s_cuda_dot_a procedure, pass(y) :: axpby_v => s_cuda_axpby_v procedure, pass(y) :: axpby_a => s_cuda_axpby_a - procedure, pass(z) :: abgdxyz => s_cuda_abgdxyz + procedure, pass(z) :: upd_xyz => s_cuda_upd_xyz procedure, pass(y) :: mlt_v => s_cuda_mlt_v procedure, pass(y) :: mlt_a => s_cuda_mlt_a procedure, pass(z) :: mlt_a_2 => s_cuda_mlt_a_2 @@ -912,7 +912,7 @@ contains end subroutine s_cuda_axpby_v - subroutine s_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + subroutine s_cuda_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -946,7 +946,7 @@ contains if ((nx z_cuda_dot_a procedure, pass(y) :: axpby_v => z_cuda_axpby_v procedure, pass(y) :: axpby_a => z_cuda_axpby_a - procedure, pass(z) :: abgdxyz => z_cuda_abgdxyz + procedure, pass(z) :: upd_xyz => z_cuda_upd_xyz procedure, pass(y) :: mlt_v => z_cuda_mlt_v procedure, pass(y) :: mlt_a => z_cuda_mlt_a procedure, pass(z) :: mlt_a_2 => z_cuda_mlt_a_2 @@ -912,7 +912,7 @@ contains end subroutine z_cuda_axpby_v - subroutine z_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info) + subroutine z_cuda_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info) use psi_serial_mod implicit none integer(psb_ipk_), intent(in) :: m @@ -946,7 +946,7 @@ contains if ((nx Date: Thu, 8 Aug 2024 15:08:45 +0200 Subject: [PATCH 30/39] Fix missing method in oacc_ell --- openacc/psb_c_oacc_ell_mat_mod.F90 | 12 ++++++++++++ openacc/psb_d_oacc_ell_mat_mod.F90 | 12 ++++++++++++ openacc/psb_s_oacc_ell_mat_mod.F90 | 12 ++++++++++++ openacc/psb_z_oacc_ell_mat_mod.F90 | 12 ++++++++++++ 4 files changed, 48 insertions(+) diff --git a/openacc/psb_c_oacc_ell_mat_mod.F90 b/openacc/psb_c_oacc_ell_mat_mod.F90 index 102d41c5..5e5dc302 100644 --- a/openacc/psb_c_oacc_ell_mat_mod.F90 +++ b/openacc/psb_c_oacc_ell_mat_mod.F90 @@ -303,6 +303,18 @@ contains call tmpa%set_sync() end subroutine c_oacc_ell_sync + subroutine c_oacc_ell_to_dev_scalar(v) + implicit none + complex(psb_spk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine c_oacc_ell_to_dev_scalar + + subroutine c_oacc_ell_to_dev(v) + implicit none + complex(psb_spk_), intent(in) :: v(:,:) + !$acc update device(v) + end subroutine c_oacc_ell_to_dev + subroutine c_oacc_ell_to_host(v) implicit none complex(psb_spk_), intent(in) :: v(:,:) diff --git a/openacc/psb_d_oacc_ell_mat_mod.F90 b/openacc/psb_d_oacc_ell_mat_mod.F90 index 042c0ff3..962ad2db 100644 --- a/openacc/psb_d_oacc_ell_mat_mod.F90 +++ b/openacc/psb_d_oacc_ell_mat_mod.F90 @@ -303,6 +303,18 @@ contains call tmpa%set_sync() end subroutine d_oacc_ell_sync + subroutine d_oacc_ell_to_dev_scalar(v) + implicit none + real(psb_dpk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine d_oacc_ell_to_dev_scalar + + subroutine d_oacc_ell_to_dev(v) + implicit none + real(psb_dpk_), intent(in) :: v(:,:) + !$acc update device(v) + end subroutine d_oacc_ell_to_dev + subroutine d_oacc_ell_to_host(v) implicit none real(psb_dpk_), intent(in) :: v(:,:) diff --git a/openacc/psb_s_oacc_ell_mat_mod.F90 b/openacc/psb_s_oacc_ell_mat_mod.F90 index 541fdf9a..9924ba77 100644 --- a/openacc/psb_s_oacc_ell_mat_mod.F90 +++ b/openacc/psb_s_oacc_ell_mat_mod.F90 @@ -303,6 +303,18 @@ contains call tmpa%set_sync() end subroutine s_oacc_ell_sync + subroutine s_oacc_ell_to_dev_scalar(v) + implicit none + real(psb_spk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine s_oacc_ell_to_dev_scalar + + subroutine s_oacc_ell_to_dev(v) + implicit none + real(psb_spk_), intent(in) :: v(:,:) + !$acc update device(v) + end subroutine s_oacc_ell_to_dev + subroutine s_oacc_ell_to_host(v) implicit none real(psb_spk_), intent(in) :: v(:,:) diff --git a/openacc/psb_z_oacc_ell_mat_mod.F90 b/openacc/psb_z_oacc_ell_mat_mod.F90 index 8bf8c9fa..eb8884d6 100644 --- a/openacc/psb_z_oacc_ell_mat_mod.F90 +++ b/openacc/psb_z_oacc_ell_mat_mod.F90 @@ -303,6 +303,18 @@ contains call tmpa%set_sync() end subroutine z_oacc_ell_sync + subroutine z_oacc_ell_to_dev_scalar(v) + implicit none + complex(psb_dpk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine z_oacc_ell_to_dev_scalar + + subroutine z_oacc_ell_to_dev(v) + implicit none + complex(psb_dpk_), intent(in) :: v(:,:) + !$acc update device(v) + end subroutine z_oacc_ell_to_dev + subroutine z_oacc_ell_to_host(v) implicit none complex(psb_dpk_), intent(in) :: v(:,:) From fa5e7ff9455730ec6e5a07ad5fa6a9018eeac6a3 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 20 Aug 2024 19:38:34 +0200 Subject: [PATCH 31/39] Fixes for vector methods and sync() --- openacc/impl/psb_c_oacc_mlt_v.f90 | 29 ++- openacc/impl/psb_c_oacc_mlt_v_2.f90 | 98 +++++----- openacc/impl/psb_d_oacc_mlt_v.f90 | 29 ++- openacc/impl/psb_d_oacc_mlt_v_2.f90 | 98 +++++----- openacc/impl/psb_s_oacc_mlt_v.f90 | 29 ++- openacc/impl/psb_s_oacc_mlt_v_2.f90 | 98 +++++----- openacc/impl/psb_z_oacc_mlt_v.f90 | 29 ++- openacc/impl/psb_z_oacc_mlt_v_2.f90 | 98 +++++----- openacc/psb_c_oacc_vect_mod.F90 | 279 ++++++++++++++++++---------- openacc/psb_d_oacc_vect_mod.F90 | 279 ++++++++++++++++++---------- openacc/psb_i_oacc_vect_mod.F90 | 54 +++--- openacc/psb_l_oacc_vect_mod.F90 | 54 +++--- openacc/psb_s_oacc_vect_mod.F90 | 279 ++++++++++++++++++---------- openacc/psb_z_oacc_vect_mod.F90 | 279 ++++++++++++++++++---------- 14 files changed, 1082 insertions(+), 650 deletions(-) diff --git a/openacc/impl/psb_c_oacc_mlt_v.f90 b/openacc/impl/psb_c_oacc_mlt_v.f90 index 66c4e865..a366543a 100644 --- a/openacc/impl/psb_c_oacc_mlt_v.f90 +++ b/openacc/impl/psb_c_oacc_mlt_v.f90 @@ -1,6 +1,6 @@ -subroutine c_oacc_mlt_v(x, y, info) - use psb_c_oacc_vect_mod, psb_protect_name => c_oacc_mlt_v +subroutine psb_c_oacc_mlt_v(x, y, info) + use psb_c_oacc_vect_mod, psb_protect_name => psb_c_oacc_mlt_v implicit none class(psb_c_base_vect_type), intent(inout) :: x @@ -9,16 +9,19 @@ subroutine c_oacc_mlt_v(x, y, info) integer(psb_ipk_) :: i, n + info = 0 + n = min(x%get_nrows(), y%get_nrows()) info = 0 n = min(x%get_nrows(), y%get_nrows()) select type(xx => x) class is (psb_c_vect_oacc) if (y%is_host()) call y%sync() if (xx%is_host()) call xx%sync() - !$acc parallel loop - do i = 1, n - y%v(i) = y%v(i) * xx%v(i) - end do + call c_inner_oacc_mlt_v(n,xx%v, y%v) +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ y%v(i) = y%v(i) * xx%v(i) +!!$ end do call y%set_dev() class default if (xx%is_dev()) call xx%sync() @@ -28,4 +31,16 @@ subroutine c_oacc_mlt_v(x, y, info) end do call y%set_host() end select -end subroutine c_oacc_mlt_v +contains + subroutine c_inner_oacc_mlt_v(n,x, y) + implicit none + integer(psb_ipk_), intent(in) :: n + complex(psb_spk_), intent(inout) :: x(:), y(:) + + integer(psb_ipk_) :: i + !$acc parallel loop present(x,y) + do i = 1, n + y(i) = (x(i)) * (y(i)) + end do + end subroutine c_inner_oacc_mlt_v +end subroutine psb_c_oacc_mlt_v diff --git a/openacc/impl/psb_c_oacc_mlt_v_2.f90 b/openacc/impl/psb_c_oacc_mlt_v_2.f90 index a6bb6cc5..f7bceae7 100644 --- a/openacc/impl/psb_c_oacc_mlt_v_2.f90 +++ b/openacc/impl/psb_c_oacc_mlt_v_2.f90 @@ -1,5 +1,5 @@ -subroutine c_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) - use psb_c_oacc_vect_mod, psb_protect_name => c_oacc_mlt_v_2 +subroutine psb_c_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) + use psb_c_oacc_vect_mod, psb_protect_name => psb_c_oacc_mlt_v_2 use psb_string_mod implicit none complex(psb_spk_), intent(in) :: alpha, beta @@ -25,33 +25,13 @@ subroutine c_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) if (xx%is_host()) call xx%sync() if (yy%is_host()) call yy%sync() if ((beta /= czero) .and. (z%is_host())) call z%sync() - if (conjgx_.and.conjgy_) then - !$acc parallel loop - do i = 1, n - z%v(i) = alpha * conjg(xx%v(i)) * conjg(yy%v(i)) + beta * z%v(i) - end do - else if (conjgx_.and.(.not.conjgy_)) then - !$acc parallel loop - do i = 1, n - z%v(i) = alpha * conjg(xx%v(i)) * (yy%v(i)) + beta * z%v(i) - end do - else if ((.not.conjgx_).and.(conjgy_)) then - !$acc parallel loop - do i = 1, n - z%v(i) = alpha * (xx%v(i)) * conjg(yy%v(i)) + beta * z%v(i) - end do - else - !$acc parallel loop - do i = 1, n - z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) - end do - - end if + call c_inner_oacc_mlt_v_2(n,alpha, xx%v, yy%v, beta, z%v, info, conjgx_, conjgy_) call z%set_dev() class default if (xx%is_dev()) call xx%sync() if (yy%is_dev()) call yy%sync() if ((beta /= czero) .and. (z%is_dev())) call z%sync() + !call c_inner_oacc_mlt_v_2(n,alpha, xx%v, yy%v, beta, z%v, info, conjgx_, conjgy_) if (conjgx_.and.conjgy_) then do i = 1, n z%v(i) = alpha * conjg(xx%v(i)) * conjg(yy%v(i)) + beta * z%v(i) @@ -67,7 +47,7 @@ subroutine c_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) else do i = 1, n z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) - end do + end do end if call z%set_host() end select @@ -75,24 +55,56 @@ subroutine c_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) if (x%is_dev()) call x%sync() if (y%is_dev()) call y%sync() if ((beta /= czero) .and. (z%is_dev())) call z%sync() - if (conjgx_.and.conjgy_) then - do i = 1, n - z%v(i) = alpha * conjg(x%v(i)) * conjg(y%v(i)) + beta * z%v(i) - end do - else if (conjgx_.and.(.not.conjgy_)) then - do i = 1, n - z%v(i) = alpha * conjg(x%v(i)) * (y%v(i)) + beta * z%v(i) - end do - else if ((.not.conjgx_).and.(conjgy_)) then - do i = 1, n - z%v(i) = alpha * (x%v(i)) * conjg(y%v(i)) + beta * z%v(i) - end do - else - do i = 1, n - z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i) - end do - end if + if (conjgx_.and.conjgy_) then + do i = 1, n + z%v(i) = alpha * conjg(x%v(i)) * conjg(y%v(i)) + beta * z%v(i) + end do + else if (conjgx_.and.(.not.conjgy_)) then + do i = 1, n + z%v(i) = alpha * conjg(x%v(i)) * (y%v(i)) + beta * z%v(i) + end do + else if ((.not.conjgx_).and.(conjgy_)) then + do i = 1, n + z%v(i) = alpha * (x%v(i)) * conjg(y%v(i)) + beta * z%v(i) + end do + else + do i = 1, n + z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i) + end do + end if call z%set_host() end select -end subroutine c_oacc_mlt_v_2 +contains + subroutine c_inner_oacc_mlt_v_2(n,alpha, x, y, beta, z, info, conjgx, conjgy) + implicit none + integer(psb_ipk_), intent(in) :: n +complex(psb_spk_), intent(in) :: alpha, beta +complex(psb_spk_), intent(inout) :: x(:), y(:), z(:) + integer(psb_ipk_), intent(out) :: info + logical, intent(in) :: conjgx, conjgy + + integer(psb_ipk_) :: i + if (conjgx.and.conjgy) then + !$acc parallel loop present(x,y,z) + do i = 1, n + z(i) = alpha * conjg(x(i)) * conjg(y(i)) + beta * z(i) + end do + else if (conjgx.and.(.not.conjgy)) then + !$acc parallel loop present(x,y,z) + do i = 1, n + z(i) = alpha * conjg(x(i)) * (y(i)) + beta * z(i) + end do + else if ((.not.conjgx).and.(conjgy)) then + !$acc parallel loop present(x,y,z) + do i = 1, n + z(i) = alpha * (x(i)) * conjg(y(i)) + beta * z(i) + end do + else + !$acc parallel loop present(x,y,z) + do i = 1, n + z(i) = alpha * (x(i)) * (y(i)) + beta * z(i) + end do + end if + end subroutine c_inner_oacc_mlt_v_2 +end subroutine psb_c_oacc_mlt_v_2 diff --git a/openacc/impl/psb_d_oacc_mlt_v.f90 b/openacc/impl/psb_d_oacc_mlt_v.f90 index bedd0247..dac62a65 100644 --- a/openacc/impl/psb_d_oacc_mlt_v.f90 +++ b/openacc/impl/psb_d_oacc_mlt_v.f90 @@ -1,6 +1,6 @@ -subroutine d_oacc_mlt_v(x, y, info) - use psb_d_oacc_vect_mod, psb_protect_name => d_oacc_mlt_v +subroutine psb_d_oacc_mlt_v(x, y, info) + use psb_d_oacc_vect_mod, psb_protect_name => psb_d_oacc_mlt_v implicit none class(psb_d_base_vect_type), intent(inout) :: x @@ -9,16 +9,19 @@ subroutine d_oacc_mlt_v(x, y, info) integer(psb_ipk_) :: i, n + info = 0 + n = min(x%get_nrows(), y%get_nrows()) info = 0 n = min(x%get_nrows(), y%get_nrows()) select type(xx => x) class is (psb_d_vect_oacc) if (y%is_host()) call y%sync() if (xx%is_host()) call xx%sync() - !$acc parallel loop - do i = 1, n - y%v(i) = y%v(i) * xx%v(i) - end do + call d_inner_oacc_mlt_v(n,xx%v, y%v) +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ y%v(i) = y%v(i) * xx%v(i) +!!$ end do call y%set_dev() class default if (xx%is_dev()) call xx%sync() @@ -28,4 +31,16 @@ subroutine d_oacc_mlt_v(x, y, info) end do call y%set_host() end select -end subroutine d_oacc_mlt_v +contains + subroutine d_inner_oacc_mlt_v(n,x, y) + implicit none + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_), intent(inout) :: x(:), y(:) + + integer(psb_ipk_) :: i + !$acc parallel loop present(x,y) + do i = 1, n + y(i) = (x(i)) * (y(i)) + end do + end subroutine d_inner_oacc_mlt_v +end subroutine psb_d_oacc_mlt_v diff --git a/openacc/impl/psb_d_oacc_mlt_v_2.f90 b/openacc/impl/psb_d_oacc_mlt_v_2.f90 index e7dd604f..3f3a457d 100644 --- a/openacc/impl/psb_d_oacc_mlt_v_2.f90 +++ b/openacc/impl/psb_d_oacc_mlt_v_2.f90 @@ -1,5 +1,5 @@ -subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) - use psb_d_oacc_vect_mod, psb_protect_name => d_oacc_mlt_v_2 +subroutine psb_d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) + use psb_d_oacc_vect_mod, psb_protect_name => psb_d_oacc_mlt_v_2 use psb_string_mod implicit none real(psb_dpk_), intent(in) :: alpha, beta @@ -25,33 +25,13 @@ subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) if (xx%is_host()) call xx%sync() if (yy%is_host()) call yy%sync() if ((beta /= dzero) .and. (z%is_host())) call z%sync() - if (conjgx_.and.conjgy_) then - !$acc parallel loop - do i = 1, n - z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) - end do - else if (conjgx_.and.(.not.conjgy_)) then - !$acc parallel loop - do i = 1, n - z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) - end do - else if ((.not.conjgx_).and.(conjgy_)) then - !$acc parallel loop - do i = 1, n - z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) - end do - else - !$acc parallel loop - do i = 1, n - z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) - end do - - end if + call d_inner_oacc_mlt_v_2(n,alpha, xx%v, yy%v, beta, z%v, info, conjgx_, conjgy_) call z%set_dev() class default if (xx%is_dev()) call xx%sync() if (yy%is_dev()) call yy%sync() if ((beta /= dzero) .and. (z%is_dev())) call z%sync() + !call d_inner_oacc_mlt_v_2(n,alpha, xx%v, yy%v, beta, z%v, info, conjgx_, conjgy_) if (conjgx_.and.conjgy_) then do i = 1, n z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) @@ -67,7 +47,7 @@ subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) else do i = 1, n z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) - end do + end do end if call z%set_host() end select @@ -75,24 +55,56 @@ subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) if (x%is_dev()) call x%sync() if (y%is_dev()) call y%sync() if ((beta /= dzero) .and. (z%is_dev())) call z%sync() - if (conjgx_.and.conjgy_) then - do i = 1, n - z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i) - end do - else if (conjgx_.and.(.not.conjgy_)) then - do i = 1, n - z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i) - end do - else if ((.not.conjgx_).and.(conjgy_)) then - do i = 1, n - z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i) - end do - else - do i = 1, n - z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i) - end do - end if + if (conjgx_.and.conjgy_) then + do i = 1, n + z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i) + end do + else if (conjgx_.and.(.not.conjgy_)) then + do i = 1, n + z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i) + end do + else if ((.not.conjgx_).and.(conjgy_)) then + do i = 1, n + z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i) + end do + else + do i = 1, n + z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i) + end do + end if call z%set_host() end select -end subroutine d_oacc_mlt_v_2 +contains + subroutine d_inner_oacc_mlt_v_2(n,alpha, x, y, beta, z, info, conjgx, conjgy) + implicit none + integer(psb_ipk_), intent(in) :: n +real(psb_dpk_), intent(in) :: alpha, beta +real(psb_dpk_), intent(inout) :: x(:), y(:), z(:) + integer(psb_ipk_), intent(out) :: info + logical, intent(in) :: conjgx, conjgy + + integer(psb_ipk_) :: i + if (conjgx.and.conjgy) then + !$acc parallel loop present(x,y,z) + do i = 1, n + z(i) = alpha * (x(i)) * (y(i)) + beta * z(i) + end do + else if (conjgx.and.(.not.conjgy)) then + !$acc parallel loop present(x,y,z) + do i = 1, n + z(i) = alpha * (x(i)) * (y(i)) + beta * z(i) + end do + else if ((.not.conjgx).and.(conjgy)) then + !$acc parallel loop present(x,y,z) + do i = 1, n + z(i) = alpha * (x(i)) * (y(i)) + beta * z(i) + end do + else + !$acc parallel loop present(x,y,z) + do i = 1, n + z(i) = alpha * (x(i)) * (y(i)) + beta * z(i) + end do + end if + end subroutine d_inner_oacc_mlt_v_2 +end subroutine psb_d_oacc_mlt_v_2 diff --git a/openacc/impl/psb_s_oacc_mlt_v.f90 b/openacc/impl/psb_s_oacc_mlt_v.f90 index fb043cf2..61a1d152 100644 --- a/openacc/impl/psb_s_oacc_mlt_v.f90 +++ b/openacc/impl/psb_s_oacc_mlt_v.f90 @@ -1,6 +1,6 @@ -subroutine s_oacc_mlt_v(x, y, info) - use psb_s_oacc_vect_mod, psb_protect_name => s_oacc_mlt_v +subroutine psb_s_oacc_mlt_v(x, y, info) + use psb_s_oacc_vect_mod, psb_protect_name => psb_s_oacc_mlt_v implicit none class(psb_s_base_vect_type), intent(inout) :: x @@ -9,16 +9,19 @@ subroutine s_oacc_mlt_v(x, y, info) integer(psb_ipk_) :: i, n + info = 0 + n = min(x%get_nrows(), y%get_nrows()) info = 0 n = min(x%get_nrows(), y%get_nrows()) select type(xx => x) class is (psb_s_vect_oacc) if (y%is_host()) call y%sync() if (xx%is_host()) call xx%sync() - !$acc parallel loop - do i = 1, n - y%v(i) = y%v(i) * xx%v(i) - end do + call s_inner_oacc_mlt_v(n,xx%v, y%v) +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ y%v(i) = y%v(i) * xx%v(i) +!!$ end do call y%set_dev() class default if (xx%is_dev()) call xx%sync() @@ -28,4 +31,16 @@ subroutine s_oacc_mlt_v(x, y, info) end do call y%set_host() end select -end subroutine s_oacc_mlt_v +contains + subroutine s_inner_oacc_mlt_v(n,x, y) + implicit none + integer(psb_ipk_), intent(in) :: n + real(psb_spk_), intent(inout) :: x(:), y(:) + + integer(psb_ipk_) :: i + !$acc parallel loop present(x,y) + do i = 1, n + y(i) = (x(i)) * (y(i)) + end do + end subroutine s_inner_oacc_mlt_v +end subroutine psb_s_oacc_mlt_v diff --git a/openacc/impl/psb_s_oacc_mlt_v_2.f90 b/openacc/impl/psb_s_oacc_mlt_v_2.f90 index 04ee8e09..bcaebfbe 100644 --- a/openacc/impl/psb_s_oacc_mlt_v_2.f90 +++ b/openacc/impl/psb_s_oacc_mlt_v_2.f90 @@ -1,5 +1,5 @@ -subroutine s_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) - use psb_s_oacc_vect_mod, psb_protect_name => s_oacc_mlt_v_2 +subroutine psb_s_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) + use psb_s_oacc_vect_mod, psb_protect_name => psb_s_oacc_mlt_v_2 use psb_string_mod implicit none real(psb_spk_), intent(in) :: alpha, beta @@ -25,33 +25,13 @@ subroutine s_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) if (xx%is_host()) call xx%sync() if (yy%is_host()) call yy%sync() if ((beta /= szero) .and. (z%is_host())) call z%sync() - if (conjgx_.and.conjgy_) then - !$acc parallel loop - do i = 1, n - z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) - end do - else if (conjgx_.and.(.not.conjgy_)) then - !$acc parallel loop - do i = 1, n - z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) - end do - else if ((.not.conjgx_).and.(conjgy_)) then - !$acc parallel loop - do i = 1, n - z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) - end do - else - !$acc parallel loop - do i = 1, n - z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) - end do - - end if + call s_inner_oacc_mlt_v_2(n,alpha, xx%v, yy%v, beta, z%v, info, conjgx_, conjgy_) call z%set_dev() class default if (xx%is_dev()) call xx%sync() if (yy%is_dev()) call yy%sync() if ((beta /= szero) .and. (z%is_dev())) call z%sync() + !call s_inner_oacc_mlt_v_2(n,alpha, xx%v, yy%v, beta, z%v, info, conjgx_, conjgy_) if (conjgx_.and.conjgy_) then do i = 1, n z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) @@ -67,7 +47,7 @@ subroutine s_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) else do i = 1, n z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) - end do + end do end if call z%set_host() end select @@ -75,24 +55,56 @@ subroutine s_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) if (x%is_dev()) call x%sync() if (y%is_dev()) call y%sync() if ((beta /= szero) .and. (z%is_dev())) call z%sync() - if (conjgx_.and.conjgy_) then - do i = 1, n - z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i) - end do - else if (conjgx_.and.(.not.conjgy_)) then - do i = 1, n - z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i) - end do - else if ((.not.conjgx_).and.(conjgy_)) then - do i = 1, n - z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i) - end do - else - do i = 1, n - z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i) - end do - end if + if (conjgx_.and.conjgy_) then + do i = 1, n + z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i) + end do + else if (conjgx_.and.(.not.conjgy_)) then + do i = 1, n + z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i) + end do + else if ((.not.conjgx_).and.(conjgy_)) then + do i = 1, n + z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i) + end do + else + do i = 1, n + z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i) + end do + end if call z%set_host() end select -end subroutine s_oacc_mlt_v_2 +contains + subroutine s_inner_oacc_mlt_v_2(n,alpha, x, y, beta, z, info, conjgx, conjgy) + implicit none + integer(psb_ipk_), intent(in) :: n +real(psb_spk_), intent(in) :: alpha, beta +real(psb_spk_), intent(inout) :: x(:), y(:), z(:) + integer(psb_ipk_), intent(out) :: info + logical, intent(in) :: conjgx, conjgy + + integer(psb_ipk_) :: i + if (conjgx.and.conjgy) then + !$acc parallel loop present(x,y,z) + do i = 1, n + z(i) = alpha * (x(i)) * (y(i)) + beta * z(i) + end do + else if (conjgx.and.(.not.conjgy)) then + !$acc parallel loop present(x,y,z) + do i = 1, n + z(i) = alpha * (x(i)) * (y(i)) + beta * z(i) + end do + else if ((.not.conjgx).and.(conjgy)) then + !$acc parallel loop present(x,y,z) + do i = 1, n + z(i) = alpha * (x(i)) * (y(i)) + beta * z(i) + end do + else + !$acc parallel loop present(x,y,z) + do i = 1, n + z(i) = alpha * (x(i)) * (y(i)) + beta * z(i) + end do + end if + end subroutine s_inner_oacc_mlt_v_2 +end subroutine psb_s_oacc_mlt_v_2 diff --git a/openacc/impl/psb_z_oacc_mlt_v.f90 b/openacc/impl/psb_z_oacc_mlt_v.f90 index 7018f009..4bc582d2 100644 --- a/openacc/impl/psb_z_oacc_mlt_v.f90 +++ b/openacc/impl/psb_z_oacc_mlt_v.f90 @@ -1,6 +1,6 @@ -subroutine z_oacc_mlt_v(x, y, info) - use psb_z_oacc_vect_mod, psb_protect_name => z_oacc_mlt_v +subroutine psb_z_oacc_mlt_v(x, y, info) + use psb_z_oacc_vect_mod, psb_protect_name => psb_z_oacc_mlt_v implicit none class(psb_z_base_vect_type), intent(inout) :: x @@ -9,16 +9,19 @@ subroutine z_oacc_mlt_v(x, y, info) integer(psb_ipk_) :: i, n + info = 0 + n = min(x%get_nrows(), y%get_nrows()) info = 0 n = min(x%get_nrows(), y%get_nrows()) select type(xx => x) class is (psb_z_vect_oacc) if (y%is_host()) call y%sync() if (xx%is_host()) call xx%sync() - !$acc parallel loop - do i = 1, n - y%v(i) = y%v(i) * xx%v(i) - end do + call z_inner_oacc_mlt_v(n,xx%v, y%v) +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ y%v(i) = y%v(i) * xx%v(i) +!!$ end do call y%set_dev() class default if (xx%is_dev()) call xx%sync() @@ -28,4 +31,16 @@ subroutine z_oacc_mlt_v(x, y, info) end do call y%set_host() end select -end subroutine z_oacc_mlt_v +contains + subroutine z_inner_oacc_mlt_v(n,x, y) + implicit none + integer(psb_ipk_), intent(in) :: n + complex(psb_dpk_), intent(inout) :: x(:), y(:) + + integer(psb_ipk_) :: i + !$acc parallel loop present(x,y) + do i = 1, n + y(i) = (x(i)) * (y(i)) + end do + end subroutine z_inner_oacc_mlt_v +end subroutine psb_z_oacc_mlt_v diff --git a/openacc/impl/psb_z_oacc_mlt_v_2.f90 b/openacc/impl/psb_z_oacc_mlt_v_2.f90 index dbc0929c..337a0a96 100644 --- a/openacc/impl/psb_z_oacc_mlt_v_2.f90 +++ b/openacc/impl/psb_z_oacc_mlt_v_2.f90 @@ -1,5 +1,5 @@ -subroutine z_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) - use psb_z_oacc_vect_mod, psb_protect_name => z_oacc_mlt_v_2 +subroutine psb_z_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) + use psb_z_oacc_vect_mod, psb_protect_name => psb_z_oacc_mlt_v_2 use psb_string_mod implicit none complex(psb_dpk_), intent(in) :: alpha, beta @@ -25,33 +25,13 @@ subroutine z_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) if (xx%is_host()) call xx%sync() if (yy%is_host()) call yy%sync() if ((beta /= zzero) .and. (z%is_host())) call z%sync() - if (conjgx_.and.conjgy_) then - !$acc parallel loop - do i = 1, n - z%v(i) = alpha * conjg(xx%v(i)) * conjg(yy%v(i)) + beta * z%v(i) - end do - else if (conjgx_.and.(.not.conjgy_)) then - !$acc parallel loop - do i = 1, n - z%v(i) = alpha * conjg(xx%v(i)) * (yy%v(i)) + beta * z%v(i) - end do - else if ((.not.conjgx_).and.(conjgy_)) then - !$acc parallel loop - do i = 1, n - z%v(i) = alpha * (xx%v(i)) * conjg(yy%v(i)) + beta * z%v(i) - end do - else - !$acc parallel loop - do i = 1, n - z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) - end do - - end if + call z_inner_oacc_mlt_v_2(n,alpha, xx%v, yy%v, beta, z%v, info, conjgx_, conjgy_) call z%set_dev() class default if (xx%is_dev()) call xx%sync() if (yy%is_dev()) call yy%sync() if ((beta /= zzero) .and. (z%is_dev())) call z%sync() + !call z_inner_oacc_mlt_v_2(n,alpha, xx%v, yy%v, beta, z%v, info, conjgx_, conjgy_) if (conjgx_.and.conjgy_) then do i = 1, n z%v(i) = alpha * conjg(xx%v(i)) * conjg(yy%v(i)) + beta * z%v(i) @@ -67,7 +47,7 @@ subroutine z_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) else do i = 1, n z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) - end do + end do end if call z%set_host() end select @@ -75,24 +55,56 @@ subroutine z_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) if (x%is_dev()) call x%sync() if (y%is_dev()) call y%sync() if ((beta /= zzero) .and. (z%is_dev())) call z%sync() - if (conjgx_.and.conjgy_) then - do i = 1, n - z%v(i) = alpha * conjg(x%v(i)) * conjg(y%v(i)) + beta * z%v(i) - end do - else if (conjgx_.and.(.not.conjgy_)) then - do i = 1, n - z%v(i) = alpha * conjg(x%v(i)) * (y%v(i)) + beta * z%v(i) - end do - else if ((.not.conjgx_).and.(conjgy_)) then - do i = 1, n - z%v(i) = alpha * (x%v(i)) * conjg(y%v(i)) + beta * z%v(i) - end do - else - do i = 1, n - z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i) - end do - end if + if (conjgx_.and.conjgy_) then + do i = 1, n + z%v(i) = alpha * conjg(x%v(i)) * conjg(y%v(i)) + beta * z%v(i) + end do + else if (conjgx_.and.(.not.conjgy_)) then + do i = 1, n + z%v(i) = alpha * conjg(x%v(i)) * (y%v(i)) + beta * z%v(i) + end do + else if ((.not.conjgx_).and.(conjgy_)) then + do i = 1, n + z%v(i) = alpha * (x%v(i)) * conjg(y%v(i)) + beta * z%v(i) + end do + else + do i = 1, n + z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i) + end do + end if call z%set_host() end select -end subroutine z_oacc_mlt_v_2 +contains + subroutine z_inner_oacc_mlt_v_2(n,alpha, x, y, beta, z, info, conjgx, conjgy) + implicit none + integer(psb_ipk_), intent(in) :: n +complex(psb_dpk_), intent(in) :: alpha, beta +complex(psb_dpk_), intent(inout) :: x(:), y(:), z(:) + integer(psb_ipk_), intent(out) :: info + logical, intent(in) :: conjgx, conjgy + + integer(psb_ipk_) :: i + if (conjgx.and.conjgy) then + !$acc parallel loop present(x,y,z) + do i = 1, n + z(i) = alpha * conjg(x(i)) * conjg(y(i)) + beta * z(i) + end do + else if (conjgx.and.(.not.conjgy)) then + !$acc parallel loop present(x,y,z) + do i = 1, n + z(i) = alpha * conjg(x(i)) * (y(i)) + beta * z(i) + end do + else if ((.not.conjgx).and.(conjgy)) then + !$acc parallel loop present(x,y,z) + do i = 1, n + z(i) = alpha * (x(i)) * conjg(y(i)) + beta * z(i) + end do + else + !$acc parallel loop present(x,y,z) + do i = 1, n + z(i) = alpha * (x(i)) * (y(i)) + beta * z(i) + end do + end if + end subroutine z_inner_oacc_mlt_v_2 +end subroutine psb_z_oacc_mlt_v_2 diff --git a/openacc/psb_c_oacc_vect_mod.F90 b/openacc/psb_c_oacc_vect_mod.F90 index fc501e04..7362ba0e 100644 --- a/openacc/psb_c_oacc_vect_mod.F90 +++ b/openacc/psb_c_oacc_vect_mod.F90 @@ -1,5 +1,6 @@ module psb_c_oacc_vect_mod use iso_c_binding + use openacc use psb_const_mod use psb_error_mod use psb_c_vect_mod @@ -50,8 +51,8 @@ module psb_c_oacc_vect_mod procedure, pass(z) :: upd_xyz => c_oacc_upd_xyz procedure, pass(y) :: mlt_a => c_oacc_mlt_a procedure, pass(z) :: mlt_a_2 => c_oacc_mlt_a_2 - procedure, pass(y) :: mlt_v => c_oacc_mlt_v - procedure, pass(z) :: mlt_v_2 => c_oacc_mlt_v_2 + procedure, pass(y) :: mlt_v => psb_c_oacc_mlt_v + procedure, pass(z) :: mlt_v_2 => psb_c_oacc_mlt_v_2 procedure, pass(x) :: scal => c_oacc_scal procedure, pass(x) :: nrm2 => c_oacc_nrm2 procedure, pass(x) :: amax => c_oacc_amax @@ -62,17 +63,17 @@ module psb_c_oacc_vect_mod end type psb_c_vect_oacc interface - subroutine c_oacc_mlt_v(x, y, info) + subroutine psb_c_oacc_mlt_v(x, y, info) import implicit none class(psb_c_base_vect_type), intent(inout) :: x class(psb_c_vect_oacc), intent(inout) :: y integer(psb_ipk_), intent(out) :: info - end subroutine c_oacc_mlt_v + end subroutine psb_c_oacc_mlt_v end interface interface - subroutine c_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) + subroutine psb_c_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) import implicit none complex(psb_spk_), intent(in) :: alpha, beta @@ -81,7 +82,7 @@ module psb_c_oacc_vect_mod class(psb_c_vect_oacc), intent(inout) :: z integer(psb_ipk_), intent(out) :: info character(len=1), intent(in), optional :: conjgx, conjgy - end subroutine c_oacc_mlt_v_2 + end subroutine psb_c_oacc_mlt_v_2 end interface contains @@ -89,15 +90,23 @@ contains subroutine c_oacc_absval1(x) implicit none class(psb_c_vect_oacc), intent(inout) :: x - integer(psb_ipk_) :: n, i + integer(psb_ipk_) :: n - if (x%is_host()) call x%sync_space() + if (x%is_host()) call x%sync() n = size(x%v) - !$acc parallel loop - do i = 1, n - x%v(i) = abs(x%v(i)) - end do + call c_inner_oacc_absval1(n,x%v) call x%set_dev() + contains + subroutine c_inner_oacc_absval1(n,x) + implicit none + complex(psb_spk_), intent(inout) :: x(:) + integer(psb_ipk_) :: n + integer(psb_ipk_) :: i + !$acc parallel loop + do i = 1, n + x(i) = abs(x(i)) + end do + end subroutine c_inner_oacc_absval1 end subroutine c_oacc_absval1 subroutine c_oacc_absval2(x, y) @@ -112,15 +121,23 @@ contains class is (psb_c_vect_oacc) if (x%is_host()) call x%sync() if (yy%is_host()) call yy%sync() - !$acc parallel loop - do i = 1, n - yy%v(i) = abs(x%v(i)) - end do + call c_inner_oacc_absval2(n,x%v,yy%v) class default if (x%is_dev()) call x%sync() if (y%is_dev()) call y%sync() call x%psb_c_base_vect_type%absval(y) end select + contains + subroutine c_inner_oacc_absval2(n,x,y) + implicit none + complex(psb_spk_), intent(inout) :: x(:),y(:) + integer(psb_ipk_) :: n + integer(psb_ipk_) :: i + !$acc parallel loop + do i = 1, n + y(i) = abs(x(i)) + end do + end subroutine c_inner_oacc_absval2 end subroutine c_oacc_absval2 subroutine c_oacc_scal(alpha, x) @@ -128,32 +145,46 @@ contains class(psb_c_vect_oacc), intent(inout) :: x complex(psb_spk_), intent(in) :: alpha integer(psb_ipk_) :: info - integer(psb_ipk_) :: i - - if (x%is_host()) call x%sync_space() - !$acc parallel loop - do i = 1, size(x%v) - x%v(i) = alpha * x%v(i) - end do + if (x%is_host()) call x%sync() + call c_inner_oacc_scal(alpha, x%v) call x%set_dev() + contains + subroutine c_inner_oacc_scal(alpha, x) + complex(psb_spk_), intent(in) :: alpha + complex(psb_spk_), intent(inout) :: x(:) + integer(psb_ipk_) :: i + !$acc parallel loop + do i = 1, size(x) + x(i) = alpha * x(i) + end do + end subroutine c_inner_oacc_scal end subroutine c_oacc_scal function c_oacc_nrm2(n, x) result(res) implicit none class(psb_c_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(in) :: n - real(psb_spk_) :: res + real(psb_spk_) :: res + real(psb_spk_) :: mx integer(psb_ipk_) :: info - real(psb_spk_) :: sum - integer(psb_ipk_) :: i - if (x%is_host()) call x%sync_space() - sum = 0.0 - !$acc parallel loop reduction(+:sum) - do i = 1, n - sum = sum + abs(x%v(i))**2 - end do - res = sqrt(sum) + if (x%is_host()) call x%sync() + mx = c_oacc_amax(n,x) + res = c_inner_oacc_nrm2(n, mx, x%v) + contains + function c_inner_oacc_nrm2(n, mx,x) result(res) + integer(psb_ipk_) :: n + complex(psb_spk_) :: x(:) + real(psb_spk_) :: mx, res + real(psb_spk_) :: sum + integer(psb_ipk_) :: i + sum = 0.0 + !$acc parallel loop reduction(+:sum) + do i = 1, n + sum = sum + abs(x(i)/mx)**2 + end do + res = mx*sqrt(sum) + end function c_inner_oacc_nrm2 end function c_oacc_nrm2 function c_oacc_amax(n, x) result(res) @@ -162,18 +193,25 @@ contains integer(psb_ipk_), intent(in) :: n real(psb_spk_) :: res integer(psb_ipk_) :: info - real(psb_spk_) :: max_val - integer(psb_ipk_) :: i - if (x%is_host()) call x%sync_space() - max_val = -huge(0.0) - !$acc parallel loop reduction(max:max_val) - do i = 1, n - if (abs(x%v(i)) > max_val) max_val = abs(x%v(i)) - end do - res = max_val + if (x%is_host()) call x%sync() + res = c_inner_oacc_amax(n, x%v) + contains + function c_inner_oacc_amax(n, x) result(res) + integer(psb_ipk_) :: n + complex(psb_spk_) :: x(:) + real(psb_spk_) :: res + real(psb_spk_) :: max_val + integer(psb_ipk_) :: i + max_val = -huge(0.0) + !$acc parallel loop reduction(max:max_val) + do i = 1, n + if (abs(x(i)) > max_val) max_val = abs(x(i)) + end do + res = max_val + end function c_inner_oacc_amax end function c_oacc_amax - + function c_oacc_asum(n, x) result(res) implicit none class(psb_c_vect_oacc), intent(inout) :: x @@ -182,14 +220,20 @@ contains integer(psb_ipk_) :: info complex(psb_spk_) :: sum integer(psb_ipk_) :: i - - if (x%is_host()) call x%sync_space() - sum = 0.0 - !$acc parallel loop reduction(+:sum) - do i = 1, n - sum = sum + abs(x%v(i)) - end do - res = sum + if (x%is_host()) call x%sync() + res = c_inner_oacc_asum(n, x%v) + contains + function c_inner_oacc_asum(n, x) result(res) + integer(psb_ipk_) :: n + complex(psb_spk_) :: x(:) + real(psb_spk_) :: res + integer(psb_ipk_) :: i + res = 0.0 + !$acc parallel loop reduction(+:res) + do i = 1, n + res = res + abs(x(i)) + end do + end function c_inner_oacc_asum end function c_oacc_asum @@ -201,7 +245,7 @@ contains integer(psb_ipk_) :: i, n info = 0 - if (y%is_dev()) call y%sync_space() + if (y%is_dev()) call y%sync() !$acc parallel loop do i = 1, size(x) y%v(i) = y%v(i) * x(i) @@ -219,7 +263,7 @@ contains integer(psb_ipk_) :: i, n info = 0 - if (z%is_dev()) call z%sync_space() + if (z%is_dev()) call z%sync() !$acc parallel loop do i = 1, size(x) z%v(i) = alpha * x(i) * y(i) + beta * z%v(i) @@ -282,18 +326,18 @@ contains !!$ class is (psb_c_vect_oacc) !!$ select type (yy => y) !!$ class is (psb_c_vect_oacc) -!!$ if (xx%is_host()) call xx%sync_space() -!!$ if (yy%is_host()) call yy%sync_space() -!!$ if ((beta /= czero) .and. (z%is_host())) call z%sync_space() +!!$ if (xx%is_host()) call xx%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ if ((beta /= czero) .and. (z%is_host())) call z%sync() !!$ !$acc parallel loop !!$ do i = 1, n !!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) !!$ end do !!$ call z%set_dev() !!$ class default -!!$ if (xx%is_dev()) call xx%sync_space() +!!$ if (xx%is_dev()) call xx%sync() !!$ if (yy%is_dev()) call yy%sync() -!!$ if ((beta /= czero) .and. (z%is_dev())) call z%sync_space() +!!$ if ((beta /= czero) .and. (z%is_dev())) call z%sync() !!$ !$acc parallel loop !!$ do i = 1, n !!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) @@ -303,7 +347,7 @@ contains !!$ class default !!$ if (x%is_dev()) call x%sync() !!$ if (y%is_dev()) call y%sync() -!!$ if ((beta /= czero) .and. (z%is_dev())) call z%sync_space() +!!$ if ((beta /= czero) .and. (z%is_dev())) call z%sync() !!$ !$acc parallel loop !!$ do i = 1, n !!$ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i) @@ -327,23 +371,36 @@ contains select type(xx => x) type is (psb_c_vect_oacc) - if ((beta /= czero) .and. y%is_host()) call y%sync_space() - if (xx%is_host()) call xx%sync_space() + if ((beta /= czero) .and. y%is_host()) call y%sync() + if (xx%is_host()) call xx%sync() nx = size(xx%v) ny = size(y%v) if ((nx < m) .or. (ny < m)) then info = psb_err_internal_error_ else - !$acc parallel loop - do i = 1, m - y%v(i) = alpha * xx%v(i) + beta * y%v(i) - end do + call c_inner_oacc_axpby(m, alpha, x%v, beta, y%v, info) end if call y%set_dev() class default if ((alpha /= czero) .and. (x%is_dev())) call x%sync() call y%axpby(m, alpha, x%v, beta, info) - end select + end select + contains + subroutine c_inner_oacc_axpby(m, alpha, x, beta, y, info) + !use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + complex(psb_spk_), intent(inout) :: x(:) + complex(psb_spk_), intent(inout) :: y(:) + complex(psb_spk_), intent(in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + !$acc parallel + !$acc loop + do i = 1, m + y(i) = alpha * x(i) + beta * y(i) + end do + !$acc end parallel + end subroutine c_inner_oacc_axpby end subroutine c_oacc_axpby_v subroutine c_oacc_axpby_a(m, alpha, x, beta, y, info) @@ -356,7 +413,7 @@ contains integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i - if ((beta /= czero) .and. (y%is_dev())) call y%sync_space() + if ((beta /= czero) .and. (y%is_dev())) call y%sync() !$acc parallel loop do i = 1, m y%v(i) = alpha * x(i) + beta * y%v(i) @@ -375,7 +432,7 @@ contains integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nx, ny, nz, i logical :: gpu_done - + write(0,*)'upd_xyz' info = psb_success_ gpu_done = .false. @@ -385,9 +442,9 @@ contains class is (psb_c_vect_oacc) select type(zz => z) class is (psb_c_vect_oacc) - if ((beta /= czero) .and. yy%is_host()) call yy%sync_space() - if ((delta /= czero) .and. zz%is_host()) call zz%sync_space() - if (xx%is_host()) call xx%sync_space() + if ((beta /= czero) .and. yy%is_host()) call yy%sync() + if ((delta /= czero) .and. zz%is_host()) call zz%sync() + if (xx%is_host()) call xx%sync() nx = size(xx%v) ny = size(yy%v) nz = size(zz%v) @@ -432,8 +489,8 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space() - if (y%is_host()) call y%sync_space() + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() !$acc parallel loop do i = 1, n @@ -459,13 +516,13 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space() + if (ii%is_host()) call ii%sync() class default call psb_errpush(info, 'c_oacc_sctb_x') return end select - if (y%is_host()) call y%sync_space() + if (y%is_host()) call y%sync() !$acc parallel loop do i = 1, n @@ -486,7 +543,7 @@ contains integer(psb_ipk_) :: i if (n == 0) return - if (y%is_dev()) call y%sync_space() + if (y%is_dev()) call y%sync() !$acc parallel loop do i = 1, n @@ -512,13 +569,13 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space() + if (ii%is_host()) call ii%sync() class default call psb_errpush(info, 'c_oacc_gthzbuf') return end select - if (x%is_host()) call x%sync_space() + if (x%is_host()) call x%sync() !$acc parallel loop do i = 1, n @@ -539,13 +596,13 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space() + if (ii%is_host()) call ii%sync() class default call psb_errpush(info, 'c_oacc_gthzv_x') return end select - if (x%is_host()) call x%sync_space() + if (x%is_host()) call x%sync() !$acc parallel loop do i = 1, n @@ -573,9 +630,9 @@ contains type is (psb_i_vect_oacc) select type(vval => val) type is (psb_c_vect_oacc) - if (vval%is_host()) call vval%sync_space() - if (virl%is_host()) call virl%sync_space() - if (x%is_host()) call x%sync_space() + if (vval%is_host()) call vval%sync() + if (virl%is_host()) call virl%sync() + if (x%is_host()) call x%sync() !$acc parallel loop do i = 1, n x%v(virl%v(i)) = vval%v(i) @@ -588,11 +645,11 @@ contains if (.not.done_oacc) then select type(virl => irl) type is (psb_i_vect_oacc) - if (virl%is_dev()) call virl%sync_space() + if (virl%is_dev()) call virl%sync() end select select type(vval => val) type is (psb_c_vect_oacc) - if (vval%is_dev()) call vval%sync_space() + if (vval%is_dev()) call vval%sync() end select call x%ins(n, irl%v, val%v, dupl, info) end if @@ -616,7 +673,7 @@ contains integer(psb_ipk_) :: i info = 0 - if (x%is_dev()) call x%sync_space() + if (x%is_dev()) call x%sync() call x%psb_c_base_vect_type%ins(n, irl, val, dupl, info) call x%set_host() !$acc update device(x%v) @@ -635,7 +692,10 @@ contains call psb_errpush(info, 'c_oacc_bld_mn', i_err=(/n, n, n, n, n/)) end if call x%set_host() - !$acc update device(x%v) + if (acc_is_present(x%v)) then + !$acc exit data delete(x%v) finalize + end if + !$acc enter data copyin(x%v) end subroutine c_oacc_bld_mn @@ -657,7 +717,10 @@ contains x%v(:) = this(:) call x%set_host() - !$acc update device(x%v) + if (acc_is_present(x%v)) then + !$acc exit data delete(x%v) finalize + end if + !$acc enter data copyin(x%v) end subroutine c_oacc_bld_x @@ -676,13 +739,13 @@ contains if (nd < n) then call x%sync() call x%psb_c_base_vect_type%asb(n, info) - if (info == psb_success_) call x%sync_space() + if (info == psb_success_) call x%sync() call x%set_host() end if else if (size(x%v) < n) then call x%psb_c_base_vect_type%asb(n, info) - if (info == psb_success_) call x%sync_space() + if (info == psb_success_) call x%sync() call x%set_host() end if end if @@ -740,10 +803,9 @@ contains complex(psb_spk_) :: res complex(psb_spk_), external :: ddot integer(psb_ipk_) :: info - integer(psb_ipk_) :: i res = czero - + !write(0,*) 'dot_v' select type(yy => y) type is (psb_c_base_vect_type) if (x%is_dev()) call x%sync() @@ -751,18 +813,26 @@ contains type is (psb_c_vect_oacc) if (x%is_host()) call x%sync() if (yy%is_host()) call yy%sync() - - !$acc parallel loop reduction(+:res) present(x%v, yy%v) - do i = 1, n - res = res + x%v(i) * yy%v(i) - end do - !$acc end parallel loop - + res = c_inner_oacc_dot(n, x%v, yy%v) class default call x%sync() res = y%dot(n, x%v) end select - + contains + function c_inner_oacc_dot(n, x, y) result(res) + implicit none + complex(psb_spk_), intent(in) :: x(:) + complex(psb_spk_), intent(in) :: y(:) + integer(psb_ipk_), intent(in) :: n + complex(psb_spk_) :: res + integer(psb_ipk_) :: i + + !$acc parallel loop reduction(+:res) present(x, y) + do i = 1, n + res = res + x(i) * y(i) + end do + !$acc end parallel loop + end function c_inner_oacc_dot end function c_oacc_vect_dot function c_oacc_dot_a(n, x, y) result(res) @@ -808,7 +878,7 @@ contains implicit none class(psb_c_vect_oacc), intent(inout) :: x if (allocated(x%v)) then - call c_oacc_create_dev(x%v) + if (.not.acc_is_present(x%v)) call c_oacc_create_dev(x%v) end if contains subroutine c_oacc_create_dev(v) @@ -886,6 +956,9 @@ contains call psb_realloc(n, x%v, info) if (info == 0) then call x%set_host() + if (acc_is_present(x%v)) then + !$acc exit data delete(x%v) finalize + end if !$acc enter data create(x%v) call x%sync_space() end if @@ -902,7 +975,9 @@ contains integer(psb_ipk_), intent(out) :: info info = 0 if (allocated(x%v)) then - !$acc exit data delete(x%v) finalize + if (acc_is_present(x%v)) then + !$acc exit data delete(x%v) finalize + end if deallocate(x%v, stat=info) end if diff --git a/openacc/psb_d_oacc_vect_mod.F90 b/openacc/psb_d_oacc_vect_mod.F90 index bfb97b5c..9ecbccb4 100644 --- a/openacc/psb_d_oacc_vect_mod.F90 +++ b/openacc/psb_d_oacc_vect_mod.F90 @@ -1,5 +1,6 @@ module psb_d_oacc_vect_mod use iso_c_binding + use openacc use psb_const_mod use psb_error_mod use psb_d_vect_mod @@ -50,8 +51,8 @@ module psb_d_oacc_vect_mod procedure, pass(z) :: upd_xyz => d_oacc_upd_xyz procedure, pass(y) :: mlt_a => d_oacc_mlt_a procedure, pass(z) :: mlt_a_2 => d_oacc_mlt_a_2 - procedure, pass(y) :: mlt_v => d_oacc_mlt_v - procedure, pass(z) :: mlt_v_2 => d_oacc_mlt_v_2 + procedure, pass(y) :: mlt_v => psb_d_oacc_mlt_v + procedure, pass(z) :: mlt_v_2 => psb_d_oacc_mlt_v_2 procedure, pass(x) :: scal => d_oacc_scal procedure, pass(x) :: nrm2 => d_oacc_nrm2 procedure, pass(x) :: amax => d_oacc_amax @@ -62,17 +63,17 @@ module psb_d_oacc_vect_mod end type psb_d_vect_oacc interface - subroutine d_oacc_mlt_v(x, y, info) + subroutine psb_d_oacc_mlt_v(x, y, info) import implicit none class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_vect_oacc), intent(inout) :: y integer(psb_ipk_), intent(out) :: info - end subroutine d_oacc_mlt_v + end subroutine psb_d_oacc_mlt_v end interface interface - subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) + subroutine psb_d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) import implicit none real(psb_dpk_), intent(in) :: alpha, beta @@ -81,7 +82,7 @@ module psb_d_oacc_vect_mod class(psb_d_vect_oacc), intent(inout) :: z integer(psb_ipk_), intent(out) :: info character(len=1), intent(in), optional :: conjgx, conjgy - end subroutine d_oacc_mlt_v_2 + end subroutine psb_d_oacc_mlt_v_2 end interface contains @@ -89,15 +90,23 @@ contains subroutine d_oacc_absval1(x) implicit none class(psb_d_vect_oacc), intent(inout) :: x - integer(psb_ipk_) :: n, i + integer(psb_ipk_) :: n - if (x%is_host()) call x%sync_space() + if (x%is_host()) call x%sync() n = size(x%v) - !$acc parallel loop - do i = 1, n - x%v(i) = abs(x%v(i)) - end do + call d_inner_oacc_absval1(n,x%v) call x%set_dev() + contains + subroutine d_inner_oacc_absval1(n,x) + implicit none + real(psb_dpk_), intent(inout) :: x(:) + integer(psb_ipk_) :: n + integer(psb_ipk_) :: i + !$acc parallel loop + do i = 1, n + x(i) = abs(x(i)) + end do + end subroutine d_inner_oacc_absval1 end subroutine d_oacc_absval1 subroutine d_oacc_absval2(x, y) @@ -112,15 +121,23 @@ contains class is (psb_d_vect_oacc) if (x%is_host()) call x%sync() if (yy%is_host()) call yy%sync() - !$acc parallel loop - do i = 1, n - yy%v(i) = abs(x%v(i)) - end do + call d_inner_oacc_absval2(n,x%v,yy%v) class default if (x%is_dev()) call x%sync() if (y%is_dev()) call y%sync() call x%psb_d_base_vect_type%absval(y) end select + contains + subroutine d_inner_oacc_absval2(n,x,y) + implicit none + real(psb_dpk_), intent(inout) :: x(:),y(:) + integer(psb_ipk_) :: n + integer(psb_ipk_) :: i + !$acc parallel loop + do i = 1, n + y(i) = abs(x(i)) + end do + end subroutine d_inner_oacc_absval2 end subroutine d_oacc_absval2 subroutine d_oacc_scal(alpha, x) @@ -128,32 +145,46 @@ contains class(psb_d_vect_oacc), intent(inout) :: x real(psb_dpk_), intent(in) :: alpha integer(psb_ipk_) :: info - integer(psb_ipk_) :: i - - if (x%is_host()) call x%sync_space() - !$acc parallel loop - do i = 1, size(x%v) - x%v(i) = alpha * x%v(i) - end do + if (x%is_host()) call x%sync() + call d_inner_oacc_scal(alpha, x%v) call x%set_dev() + contains + subroutine d_inner_oacc_scal(alpha, x) + real(psb_dpk_), intent(in) :: alpha + real(psb_dpk_), intent(inout) :: x(:) + integer(psb_ipk_) :: i + !$acc parallel loop + do i = 1, size(x) + x(i) = alpha * x(i) + end do + end subroutine d_inner_oacc_scal end subroutine d_oacc_scal function d_oacc_nrm2(n, x) result(res) implicit none class(psb_d_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(in) :: n - real(psb_dpk_) :: res + real(psb_dpk_) :: res + real(psb_dpk_) :: mx integer(psb_ipk_) :: info - real(psb_dpk_) :: sum - integer(psb_ipk_) :: i - if (x%is_host()) call x%sync_space() - sum = 0.0 - !$acc parallel loop reduction(+:sum) - do i = 1, n - sum = sum + abs(x%v(i))**2 - end do - res = sqrt(sum) + if (x%is_host()) call x%sync() + mx = d_oacc_amax(n,x) + res = d_inner_oacc_nrm2(n, mx, x%v) + contains + function d_inner_oacc_nrm2(n, mx,x) result(res) + integer(psb_ipk_) :: n + real(psb_dpk_) :: x(:) + real(psb_dpk_) :: mx, res + real(psb_dpk_) :: sum + integer(psb_ipk_) :: i + sum = 0.0 + !$acc parallel loop reduction(+:sum) + do i = 1, n + sum = sum + abs(x(i)/mx)**2 + end do + res = mx*sqrt(sum) + end function d_inner_oacc_nrm2 end function d_oacc_nrm2 function d_oacc_amax(n, x) result(res) @@ -162,18 +193,25 @@ contains integer(psb_ipk_), intent(in) :: n real(psb_dpk_) :: res integer(psb_ipk_) :: info - real(psb_dpk_) :: max_val - integer(psb_ipk_) :: i - if (x%is_host()) call x%sync_space() - max_val = -huge(0.0) - !$acc parallel loop reduction(max:max_val) - do i = 1, n - if (abs(x%v(i)) > max_val) max_val = abs(x%v(i)) - end do - res = max_val + if (x%is_host()) call x%sync() + res = d_inner_oacc_amax(n, x%v) + contains + function d_inner_oacc_amax(n, x) result(res) + integer(psb_ipk_) :: n + real(psb_dpk_) :: x(:) + real(psb_dpk_) :: res + real(psb_dpk_) :: max_val + integer(psb_ipk_) :: i + max_val = -huge(0.0) + !$acc parallel loop reduction(max:max_val) + do i = 1, n + if (abs(x(i)) > max_val) max_val = abs(x(i)) + end do + res = max_val + end function d_inner_oacc_amax end function d_oacc_amax - + function d_oacc_asum(n, x) result(res) implicit none class(psb_d_vect_oacc), intent(inout) :: x @@ -182,14 +220,20 @@ contains integer(psb_ipk_) :: info real(psb_dpk_) :: sum integer(psb_ipk_) :: i - - if (x%is_host()) call x%sync_space() - sum = 0.0 - !$acc parallel loop reduction(+:sum) - do i = 1, n - sum = sum + abs(x%v(i)) - end do - res = sum + if (x%is_host()) call x%sync() + res = d_inner_oacc_asum(n, x%v) + contains + function d_inner_oacc_asum(n, x) result(res) + integer(psb_ipk_) :: n + real(psb_dpk_) :: x(:) + real(psb_dpk_) :: res + integer(psb_ipk_) :: i + res = 0.0 + !$acc parallel loop reduction(+:res) + do i = 1, n + res = res + abs(x(i)) + end do + end function d_inner_oacc_asum end function d_oacc_asum @@ -201,7 +245,7 @@ contains integer(psb_ipk_) :: i, n info = 0 - if (y%is_dev()) call y%sync_space() + if (y%is_dev()) call y%sync() !$acc parallel loop do i = 1, size(x) y%v(i) = y%v(i) * x(i) @@ -219,7 +263,7 @@ contains integer(psb_ipk_) :: i, n info = 0 - if (z%is_dev()) call z%sync_space() + if (z%is_dev()) call z%sync() !$acc parallel loop do i = 1, size(x) z%v(i) = alpha * x(i) * y(i) + beta * z%v(i) @@ -282,18 +326,18 @@ contains !!$ class is (psb_d_vect_oacc) !!$ select type (yy => y) !!$ class is (psb_d_vect_oacc) -!!$ if (xx%is_host()) call xx%sync_space() -!!$ if (yy%is_host()) call yy%sync_space() -!!$ if ((beta /= dzero) .and. (z%is_host())) call z%sync_space() +!!$ if (xx%is_host()) call xx%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ if ((beta /= dzero) .and. (z%is_host())) call z%sync() !!$ !$acc parallel loop !!$ do i = 1, n !!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) !!$ end do !!$ call z%set_dev() !!$ class default -!!$ if (xx%is_dev()) call xx%sync_space() +!!$ if (xx%is_dev()) call xx%sync() !!$ if (yy%is_dev()) call yy%sync() -!!$ if ((beta /= dzero) .and. (z%is_dev())) call z%sync_space() +!!$ if ((beta /= dzero) .and. (z%is_dev())) call z%sync() !!$ !$acc parallel loop !!$ do i = 1, n !!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) @@ -303,7 +347,7 @@ contains !!$ class default !!$ if (x%is_dev()) call x%sync() !!$ if (y%is_dev()) call y%sync() -!!$ if ((beta /= dzero) .and. (z%is_dev())) call z%sync_space() +!!$ if ((beta /= dzero) .and. (z%is_dev())) call z%sync() !!$ !$acc parallel loop !!$ do i = 1, n !!$ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i) @@ -327,23 +371,36 @@ contains select type(xx => x) type is (psb_d_vect_oacc) - if ((beta /= dzero) .and. y%is_host()) call y%sync_space() - if (xx%is_host()) call xx%sync_space() + if ((beta /= dzero) .and. y%is_host()) call y%sync() + if (xx%is_host()) call xx%sync() nx = size(xx%v) ny = size(y%v) if ((nx < m) .or. (ny < m)) then info = psb_err_internal_error_ else - !$acc parallel loop - do i = 1, m - y%v(i) = alpha * xx%v(i) + beta * y%v(i) - end do + call d_inner_oacc_axpby(m, alpha, x%v, beta, y%v, info) end if call y%set_dev() class default if ((alpha /= dzero) .and. (x%is_dev())) call x%sync() call y%axpby(m, alpha, x%v, beta, info) - end select + end select + contains + subroutine d_inner_oacc_axpby(m, alpha, x, beta, y, info) + !use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + real(psb_dpk_), intent(inout) :: x(:) + real(psb_dpk_), intent(inout) :: y(:) + real(psb_dpk_), intent(in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + !$acc parallel + !$acc loop + do i = 1, m + y(i) = alpha * x(i) + beta * y(i) + end do + !$acc end parallel + end subroutine d_inner_oacc_axpby end subroutine d_oacc_axpby_v subroutine d_oacc_axpby_a(m, alpha, x, beta, y, info) @@ -356,7 +413,7 @@ contains integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i - if ((beta /= dzero) .and. (y%is_dev())) call y%sync_space() + if ((beta /= dzero) .and. (y%is_dev())) call y%sync() !$acc parallel loop do i = 1, m y%v(i) = alpha * x(i) + beta * y%v(i) @@ -375,7 +432,7 @@ contains integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nx, ny, nz, i logical :: gpu_done - + write(0,*)'upd_xyz' info = psb_success_ gpu_done = .false. @@ -385,9 +442,9 @@ contains class is (psb_d_vect_oacc) select type(zz => z) class is (psb_d_vect_oacc) - if ((beta /= dzero) .and. yy%is_host()) call yy%sync_space() - if ((delta /= dzero) .and. zz%is_host()) call zz%sync_space() - if (xx%is_host()) call xx%sync_space() + if ((beta /= dzero) .and. yy%is_host()) call yy%sync() + if ((delta /= dzero) .and. zz%is_host()) call zz%sync() + if (xx%is_host()) call xx%sync() nx = size(xx%v) ny = size(yy%v) nz = size(zz%v) @@ -432,8 +489,8 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space() - if (y%is_host()) call y%sync_space() + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() !$acc parallel loop do i = 1, n @@ -459,13 +516,13 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space() + if (ii%is_host()) call ii%sync() class default call psb_errpush(info, 'd_oacc_sctb_x') return end select - if (y%is_host()) call y%sync_space() + if (y%is_host()) call y%sync() !$acc parallel loop do i = 1, n @@ -486,7 +543,7 @@ contains integer(psb_ipk_) :: i if (n == 0) return - if (y%is_dev()) call y%sync_space() + if (y%is_dev()) call y%sync() !$acc parallel loop do i = 1, n @@ -512,13 +569,13 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space() + if (ii%is_host()) call ii%sync() class default call psb_errpush(info, 'd_oacc_gthzbuf') return end select - if (x%is_host()) call x%sync_space() + if (x%is_host()) call x%sync() !$acc parallel loop do i = 1, n @@ -539,13 +596,13 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space() + if (ii%is_host()) call ii%sync() class default call psb_errpush(info, 'd_oacc_gthzv_x') return end select - if (x%is_host()) call x%sync_space() + if (x%is_host()) call x%sync() !$acc parallel loop do i = 1, n @@ -573,9 +630,9 @@ contains type is (psb_i_vect_oacc) select type(vval => val) type is (psb_d_vect_oacc) - if (vval%is_host()) call vval%sync_space() - if (virl%is_host()) call virl%sync_space() - if (x%is_host()) call x%sync_space() + if (vval%is_host()) call vval%sync() + if (virl%is_host()) call virl%sync() + if (x%is_host()) call x%sync() !$acc parallel loop do i = 1, n x%v(virl%v(i)) = vval%v(i) @@ -588,11 +645,11 @@ contains if (.not.done_oacc) then select type(virl => irl) type is (psb_i_vect_oacc) - if (virl%is_dev()) call virl%sync_space() + if (virl%is_dev()) call virl%sync() end select select type(vval => val) type is (psb_d_vect_oacc) - if (vval%is_dev()) call vval%sync_space() + if (vval%is_dev()) call vval%sync() end select call x%ins(n, irl%v, val%v, dupl, info) end if @@ -616,7 +673,7 @@ contains integer(psb_ipk_) :: i info = 0 - if (x%is_dev()) call x%sync_space() + if (x%is_dev()) call x%sync() call x%psb_d_base_vect_type%ins(n, irl, val, dupl, info) call x%set_host() !$acc update device(x%v) @@ -635,7 +692,10 @@ contains call psb_errpush(info, 'd_oacc_bld_mn', i_err=(/n, n, n, n, n/)) end if call x%set_host() - !$acc update device(x%v) + if (acc_is_present(x%v)) then + !$acc exit data delete(x%v) finalize + end if + !$acc enter data copyin(x%v) end subroutine d_oacc_bld_mn @@ -657,7 +717,10 @@ contains x%v(:) = this(:) call x%set_host() - !$acc update device(x%v) + if (acc_is_present(x%v)) then + !$acc exit data delete(x%v) finalize + end if + !$acc enter data copyin(x%v) end subroutine d_oacc_bld_x @@ -676,13 +739,13 @@ contains if (nd < n) then call x%sync() call x%psb_d_base_vect_type%asb(n, info) - if (info == psb_success_) call x%sync_space() + if (info == psb_success_) call x%sync() call x%set_host() end if else if (size(x%v) < n) then call x%psb_d_base_vect_type%asb(n, info) - if (info == psb_success_) call x%sync_space() + if (info == psb_success_) call x%sync() call x%set_host() end if end if @@ -740,10 +803,9 @@ contains real(psb_dpk_) :: res real(psb_dpk_), external :: ddot integer(psb_ipk_) :: info - integer(psb_ipk_) :: i res = dzero - + !write(0,*) 'dot_v' select type(yy => y) type is (psb_d_base_vect_type) if (x%is_dev()) call x%sync() @@ -751,18 +813,26 @@ contains type is (psb_d_vect_oacc) if (x%is_host()) call x%sync() if (yy%is_host()) call yy%sync() - - !$acc parallel loop reduction(+:res) present(x%v, yy%v) - do i = 1, n - res = res + x%v(i) * yy%v(i) - end do - !$acc end parallel loop - + res = d_inner_oacc_dot(n, x%v, yy%v) class default call x%sync() res = y%dot(n, x%v) end select - + contains + function d_inner_oacc_dot(n, x, y) result(res) + implicit none + real(psb_dpk_), intent(in) :: x(:) + real(psb_dpk_), intent(in) :: y(:) + integer(psb_ipk_), intent(in) :: n + real(psb_dpk_) :: res + integer(psb_ipk_) :: i + + !$acc parallel loop reduction(+:res) present(x, y) + do i = 1, n + res = res + x(i) * y(i) + end do + !$acc end parallel loop + end function d_inner_oacc_dot end function d_oacc_vect_dot function d_oacc_dot_a(n, x, y) result(res) @@ -808,7 +878,7 @@ contains implicit none class(psb_d_vect_oacc), intent(inout) :: x if (allocated(x%v)) then - call d_oacc_create_dev(x%v) + if (.not.acc_is_present(x%v)) call d_oacc_create_dev(x%v) end if contains subroutine d_oacc_create_dev(v) @@ -886,6 +956,9 @@ contains call psb_realloc(n, x%v, info) if (info == 0) then call x%set_host() + if (acc_is_present(x%v)) then + !$acc exit data delete(x%v) finalize + end if !$acc enter data create(x%v) call x%sync_space() end if @@ -902,7 +975,9 @@ contains integer(psb_ipk_), intent(out) :: info info = 0 if (allocated(x%v)) then - !$acc exit data delete(x%v) finalize + if (acc_is_present(x%v)) then + !$acc exit data delete(x%v) finalize + end if deallocate(x%v, stat=info) end if diff --git a/openacc/psb_i_oacc_vect_mod.F90 b/openacc/psb_i_oacc_vect_mod.F90 index 72e9ada2..3dbc48f1 100644 --- a/openacc/psb_i_oacc_vect_mod.F90 +++ b/openacc/psb_i_oacc_vect_mod.F90 @@ -1,5 +1,6 @@ module psb_i_oacc_vect_mod use iso_c_binding + use openacc use psb_const_mod use psb_error_mod use psb_i_vect_mod @@ -64,8 +65,8 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space() - if (y%is_host()) call y%sync_space() + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() !$acc parallel loop do i = 1, n @@ -91,13 +92,13 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space() + if (ii%is_host()) call ii%sync() class default call psb_errpush(info, 'i_oacc_sctb_x') return end select - if (y%is_host()) call y%sync_space() + if (y%is_host()) call y%sync() !$acc parallel loop do i = 1, n @@ -118,7 +119,7 @@ contains integer(psb_ipk_) :: i if (n == 0) return - if (y%is_dev()) call y%sync_space() + if (y%is_dev()) call y%sync() !$acc parallel loop do i = 1, n @@ -144,13 +145,13 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space() + if (ii%is_host()) call ii%sync() class default call psb_errpush(info, 'i_oacc_gthzbuf') return end select - if (x%is_host()) call x%sync_space() + if (x%is_host()) call x%sync() !$acc parallel loop do i = 1, n @@ -171,13 +172,13 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space() + if (ii%is_host()) call ii%sync() class default call psb_errpush(info, 'i_oacc_gthzv_x') return end select - if (x%is_host()) call x%sync_space() + if (x%is_host()) call x%sync() !$acc parallel loop do i = 1, n @@ -205,9 +206,9 @@ contains type is (psb_i_vect_oacc) select type(vval => val) type is (psb_i_vect_oacc) - if (vval%is_host()) call vval%sync_space() - if (virl%is_host()) call virl%sync_space() - if (x%is_host()) call x%sync_space() + if (vval%is_host()) call vval%sync() + if (virl%is_host()) call virl%sync() + if (x%is_host()) call x%sync() !$acc parallel loop do i = 1, n x%v(virl%v(i)) = vval%v(i) @@ -220,11 +221,11 @@ contains if (.not.done_oacc) then select type(virl => irl) type is (psb_i_vect_oacc) - if (virl%is_dev()) call virl%sync_space() + if (virl%is_dev()) call virl%sync() end select select type(vval => val) type is (psb_i_vect_oacc) - if (vval%is_dev()) call vval%sync_space() + if (vval%is_dev()) call vval%sync() end select call x%ins(n, irl%v, val%v, dupl, info) end if @@ -248,7 +249,7 @@ contains integer(psb_ipk_) :: i info = 0 - if (x%is_dev()) call x%sync_space() + if (x%is_dev()) call x%sync() call x%psb_i_base_vect_type%ins(n, irl, val, dupl, info) call x%set_host() !$acc update device(x%v) @@ -267,7 +268,10 @@ contains call psb_errpush(info, 'i_oacc_bld_mn', i_err=(/n, n, n, n, n/)) end if call x%set_host() - !$acc update device(x%v) + if (acc_is_present(x%v)) then + !$acc exit data delete(x%v) finalize + end if + !$acc enter data copyin(x%v) end subroutine i_oacc_bld_mn @@ -289,7 +293,10 @@ contains x%v(:) = this(:) call x%set_host() - !$acc update device(x%v) + if (acc_is_present(x%v)) then + !$acc exit data delete(x%v) finalize + end if + !$acc enter data copyin(x%v) end subroutine i_oacc_bld_x @@ -308,13 +315,13 @@ contains if (nd < n) then call x%sync() call x%psb_i_base_vect_type%asb(n, info) - if (info == psb_success_) call x%sync_space() + if (info == psb_success_) call x%sync() call x%set_host() end if else if (size(x%v) < n) then call x%psb_i_base_vect_type%asb(n, info) - if (info == psb_success_) call x%sync_space() + if (info == psb_success_) call x%sync() call x%set_host() end if end if @@ -393,7 +400,7 @@ contains implicit none class(psb_i_vect_oacc), intent(inout) :: x if (allocated(x%v)) then - call i_oacc_create_dev(x%v) + if (.not.acc_is_present(x%v)) call i_oacc_create_dev(x%v) end if contains subroutine i_oacc_create_dev(v) @@ -471,6 +478,9 @@ contains call psb_realloc(n, x%v, info) if (info == 0) then call x%set_host() + if (acc_is_present(x%v)) then + !$acc exit data delete(x%v) finalize + end if !$acc enter data create(x%v) call x%sync_space() end if @@ -487,7 +497,9 @@ contains integer(psb_ipk_), intent(out) :: info info = 0 if (allocated(x%v)) then - !$acc exit data delete(x%v) finalize + if (acc_is_present(x%v)) then + !$acc exit data delete(x%v) finalize + end if deallocate(x%v, stat=info) end if diff --git a/openacc/psb_l_oacc_vect_mod.F90 b/openacc/psb_l_oacc_vect_mod.F90 index aeba4537..cdf28366 100644 --- a/openacc/psb_l_oacc_vect_mod.F90 +++ b/openacc/psb_l_oacc_vect_mod.F90 @@ -1,5 +1,6 @@ module psb_l_oacc_vect_mod use iso_c_binding + use openacc use psb_const_mod use psb_error_mod use psb_l_vect_mod @@ -66,8 +67,8 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space() - if (y%is_host()) call y%sync_space() + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() !$acc parallel loop do i = 1, n @@ -93,13 +94,13 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space() + if (ii%is_host()) call ii%sync() class default call psb_errpush(info, 'l_oacc_sctb_x') return end select - if (y%is_host()) call y%sync_space() + if (y%is_host()) call y%sync() !$acc parallel loop do i = 1, n @@ -120,7 +121,7 @@ contains integer(psb_ipk_) :: i if (n == 0) return - if (y%is_dev()) call y%sync_space() + if (y%is_dev()) call y%sync() !$acc parallel loop do i = 1, n @@ -146,13 +147,13 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space() + if (ii%is_host()) call ii%sync() class default call psb_errpush(info, 'l_oacc_gthzbuf') return end select - if (x%is_host()) call x%sync_space() + if (x%is_host()) call x%sync() !$acc parallel loop do i = 1, n @@ -173,13 +174,13 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space() + if (ii%is_host()) call ii%sync() class default call psb_errpush(info, 'l_oacc_gthzv_x') return end select - if (x%is_host()) call x%sync_space() + if (x%is_host()) call x%sync() !$acc parallel loop do i = 1, n @@ -207,9 +208,9 @@ contains type is (psb_i_vect_oacc) select type(vval => val) type is (psb_l_vect_oacc) - if (vval%is_host()) call vval%sync_space() - if (virl%is_host()) call virl%sync_space() - if (x%is_host()) call x%sync_space() + if (vval%is_host()) call vval%sync() + if (virl%is_host()) call virl%sync() + if (x%is_host()) call x%sync() !$acc parallel loop do i = 1, n x%v(virl%v(i)) = vval%v(i) @@ -222,11 +223,11 @@ contains if (.not.done_oacc) then select type(virl => irl) type is (psb_i_vect_oacc) - if (virl%is_dev()) call virl%sync_space() + if (virl%is_dev()) call virl%sync() end select select type(vval => val) type is (psb_l_vect_oacc) - if (vval%is_dev()) call vval%sync_space() + if (vval%is_dev()) call vval%sync() end select call x%ins(n, irl%v, val%v, dupl, info) end if @@ -250,7 +251,7 @@ contains integer(psb_ipk_) :: i info = 0 - if (x%is_dev()) call x%sync_space() + if (x%is_dev()) call x%sync() call x%psb_l_base_vect_type%ins(n, irl, val, dupl, info) call x%set_host() !$acc update device(x%v) @@ -269,7 +270,10 @@ contains call psb_errpush(info, 'l_oacc_bld_mn', i_err=(/n, n, n, n, n/)) end if call x%set_host() - !$acc update device(x%v) + if (acc_is_present(x%v)) then + !$acc exit data delete(x%v) finalize + end if + !$acc enter data copyin(x%v) end subroutine l_oacc_bld_mn @@ -291,7 +295,10 @@ contains x%v(:) = this(:) call x%set_host() - !$acc update device(x%v) + if (acc_is_present(x%v)) then + !$acc exit data delete(x%v) finalize + end if + !$acc enter data copyin(x%v) end subroutine l_oacc_bld_x @@ -310,13 +317,13 @@ contains if (nd < n) then call x%sync() call x%psb_l_base_vect_type%asb(n, info) - if (info == psb_success_) call x%sync_space() + if (info == psb_success_) call x%sync() call x%set_host() end if else if (size(x%v) < n) then call x%psb_l_base_vect_type%asb(n, info) - if (info == psb_success_) call x%sync_space() + if (info == psb_success_) call x%sync() call x%set_host() end if end if @@ -395,7 +402,7 @@ contains implicit none class(psb_l_vect_oacc), intent(inout) :: x if (allocated(x%v)) then - call l_oacc_create_dev(x%v) + if (.not.acc_is_present(x%v)) call l_oacc_create_dev(x%v) end if contains subroutine l_oacc_create_dev(v) @@ -473,6 +480,9 @@ contains call psb_realloc(n, x%v, info) if (info == 0) then call x%set_host() + if (acc_is_present(x%v)) then + !$acc exit data delete(x%v) finalize + end if !$acc enter data create(x%v) call x%sync_space() end if @@ -489,7 +499,9 @@ contains integer(psb_ipk_), intent(out) :: info info = 0 if (allocated(x%v)) then - !$acc exit data delete(x%v) finalize + if (acc_is_present(x%v)) then + !$acc exit data delete(x%v) finalize + end if deallocate(x%v, stat=info) end if diff --git a/openacc/psb_s_oacc_vect_mod.F90 b/openacc/psb_s_oacc_vect_mod.F90 index 47922d6a..c3b31af7 100644 --- a/openacc/psb_s_oacc_vect_mod.F90 +++ b/openacc/psb_s_oacc_vect_mod.F90 @@ -1,5 +1,6 @@ module psb_s_oacc_vect_mod use iso_c_binding + use openacc use psb_const_mod use psb_error_mod use psb_s_vect_mod @@ -50,8 +51,8 @@ module psb_s_oacc_vect_mod procedure, pass(z) :: upd_xyz => s_oacc_upd_xyz procedure, pass(y) :: mlt_a => s_oacc_mlt_a procedure, pass(z) :: mlt_a_2 => s_oacc_mlt_a_2 - procedure, pass(y) :: mlt_v => s_oacc_mlt_v - procedure, pass(z) :: mlt_v_2 => s_oacc_mlt_v_2 + procedure, pass(y) :: mlt_v => psb_s_oacc_mlt_v + procedure, pass(z) :: mlt_v_2 => psb_s_oacc_mlt_v_2 procedure, pass(x) :: scal => s_oacc_scal procedure, pass(x) :: nrm2 => s_oacc_nrm2 procedure, pass(x) :: amax => s_oacc_amax @@ -62,17 +63,17 @@ module psb_s_oacc_vect_mod end type psb_s_vect_oacc interface - subroutine s_oacc_mlt_v(x, y, info) + subroutine psb_s_oacc_mlt_v(x, y, info) import implicit none class(psb_s_base_vect_type), intent(inout) :: x class(psb_s_vect_oacc), intent(inout) :: y integer(psb_ipk_), intent(out) :: info - end subroutine s_oacc_mlt_v + end subroutine psb_s_oacc_mlt_v end interface interface - subroutine s_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) + subroutine psb_s_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) import implicit none real(psb_spk_), intent(in) :: alpha, beta @@ -81,7 +82,7 @@ module psb_s_oacc_vect_mod class(psb_s_vect_oacc), intent(inout) :: z integer(psb_ipk_), intent(out) :: info character(len=1), intent(in), optional :: conjgx, conjgy - end subroutine s_oacc_mlt_v_2 + end subroutine psb_s_oacc_mlt_v_2 end interface contains @@ -89,15 +90,23 @@ contains subroutine s_oacc_absval1(x) implicit none class(psb_s_vect_oacc), intent(inout) :: x - integer(psb_ipk_) :: n, i + integer(psb_ipk_) :: n - if (x%is_host()) call x%sync_space() + if (x%is_host()) call x%sync() n = size(x%v) - !$acc parallel loop - do i = 1, n - x%v(i) = abs(x%v(i)) - end do + call s_inner_oacc_absval1(n,x%v) call x%set_dev() + contains + subroutine s_inner_oacc_absval1(n,x) + implicit none + real(psb_spk_), intent(inout) :: x(:) + integer(psb_ipk_) :: n + integer(psb_ipk_) :: i + !$acc parallel loop + do i = 1, n + x(i) = abs(x(i)) + end do + end subroutine s_inner_oacc_absval1 end subroutine s_oacc_absval1 subroutine s_oacc_absval2(x, y) @@ -112,15 +121,23 @@ contains class is (psb_s_vect_oacc) if (x%is_host()) call x%sync() if (yy%is_host()) call yy%sync() - !$acc parallel loop - do i = 1, n - yy%v(i) = abs(x%v(i)) - end do + call s_inner_oacc_absval2(n,x%v,yy%v) class default if (x%is_dev()) call x%sync() if (y%is_dev()) call y%sync() call x%psb_s_base_vect_type%absval(y) end select + contains + subroutine s_inner_oacc_absval2(n,x,y) + implicit none + real(psb_spk_), intent(inout) :: x(:),y(:) + integer(psb_ipk_) :: n + integer(psb_ipk_) :: i + !$acc parallel loop + do i = 1, n + y(i) = abs(x(i)) + end do + end subroutine s_inner_oacc_absval2 end subroutine s_oacc_absval2 subroutine s_oacc_scal(alpha, x) @@ -128,32 +145,46 @@ contains class(psb_s_vect_oacc), intent(inout) :: x real(psb_spk_), intent(in) :: alpha integer(psb_ipk_) :: info - integer(psb_ipk_) :: i - - if (x%is_host()) call x%sync_space() - !$acc parallel loop - do i = 1, size(x%v) - x%v(i) = alpha * x%v(i) - end do + if (x%is_host()) call x%sync() + call s_inner_oacc_scal(alpha, x%v) call x%set_dev() + contains + subroutine s_inner_oacc_scal(alpha, x) + real(psb_spk_), intent(in) :: alpha + real(psb_spk_), intent(inout) :: x(:) + integer(psb_ipk_) :: i + !$acc parallel loop + do i = 1, size(x) + x(i) = alpha * x(i) + end do + end subroutine s_inner_oacc_scal end subroutine s_oacc_scal function s_oacc_nrm2(n, x) result(res) implicit none class(psb_s_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(in) :: n - real(psb_spk_) :: res + real(psb_spk_) :: res + real(psb_spk_) :: mx integer(psb_ipk_) :: info - real(psb_spk_) :: sum - integer(psb_ipk_) :: i - if (x%is_host()) call x%sync_space() - sum = 0.0 - !$acc parallel loop reduction(+:sum) - do i = 1, n - sum = sum + abs(x%v(i))**2 - end do - res = sqrt(sum) + if (x%is_host()) call x%sync() + mx = s_oacc_amax(n,x) + res = s_inner_oacc_nrm2(n, mx, x%v) + contains + function s_inner_oacc_nrm2(n, mx,x) result(res) + integer(psb_ipk_) :: n + real(psb_spk_) :: x(:) + real(psb_spk_) :: mx, res + real(psb_spk_) :: sum + integer(psb_ipk_) :: i + sum = 0.0 + !$acc parallel loop reduction(+:sum) + do i = 1, n + sum = sum + abs(x(i)/mx)**2 + end do + res = mx*sqrt(sum) + end function s_inner_oacc_nrm2 end function s_oacc_nrm2 function s_oacc_amax(n, x) result(res) @@ -162,18 +193,25 @@ contains integer(psb_ipk_), intent(in) :: n real(psb_spk_) :: res integer(psb_ipk_) :: info - real(psb_spk_) :: max_val - integer(psb_ipk_) :: i - if (x%is_host()) call x%sync_space() - max_val = -huge(0.0) - !$acc parallel loop reduction(max:max_val) - do i = 1, n - if (abs(x%v(i)) > max_val) max_val = abs(x%v(i)) - end do - res = max_val + if (x%is_host()) call x%sync() + res = s_inner_oacc_amax(n, x%v) + contains + function s_inner_oacc_amax(n, x) result(res) + integer(psb_ipk_) :: n + real(psb_spk_) :: x(:) + real(psb_spk_) :: res + real(psb_spk_) :: max_val + integer(psb_ipk_) :: i + max_val = -huge(0.0) + !$acc parallel loop reduction(max:max_val) + do i = 1, n + if (abs(x(i)) > max_val) max_val = abs(x(i)) + end do + res = max_val + end function s_inner_oacc_amax end function s_oacc_amax - + function s_oacc_asum(n, x) result(res) implicit none class(psb_s_vect_oacc), intent(inout) :: x @@ -182,14 +220,20 @@ contains integer(psb_ipk_) :: info real(psb_spk_) :: sum integer(psb_ipk_) :: i - - if (x%is_host()) call x%sync_space() - sum = 0.0 - !$acc parallel loop reduction(+:sum) - do i = 1, n - sum = sum + abs(x%v(i)) - end do - res = sum + if (x%is_host()) call x%sync() + res = s_inner_oacc_asum(n, x%v) + contains + function s_inner_oacc_asum(n, x) result(res) + integer(psb_ipk_) :: n + real(psb_spk_) :: x(:) + real(psb_spk_) :: res + integer(psb_ipk_) :: i + res = 0.0 + !$acc parallel loop reduction(+:res) + do i = 1, n + res = res + abs(x(i)) + end do + end function s_inner_oacc_asum end function s_oacc_asum @@ -201,7 +245,7 @@ contains integer(psb_ipk_) :: i, n info = 0 - if (y%is_dev()) call y%sync_space() + if (y%is_dev()) call y%sync() !$acc parallel loop do i = 1, size(x) y%v(i) = y%v(i) * x(i) @@ -219,7 +263,7 @@ contains integer(psb_ipk_) :: i, n info = 0 - if (z%is_dev()) call z%sync_space() + if (z%is_dev()) call z%sync() !$acc parallel loop do i = 1, size(x) z%v(i) = alpha * x(i) * y(i) + beta * z%v(i) @@ -282,18 +326,18 @@ contains !!$ class is (psb_s_vect_oacc) !!$ select type (yy => y) !!$ class is (psb_s_vect_oacc) -!!$ if (xx%is_host()) call xx%sync_space() -!!$ if (yy%is_host()) call yy%sync_space() -!!$ if ((beta /= szero) .and. (z%is_host())) call z%sync_space() +!!$ if (xx%is_host()) call xx%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ if ((beta /= szero) .and. (z%is_host())) call z%sync() !!$ !$acc parallel loop !!$ do i = 1, n !!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) !!$ end do !!$ call z%set_dev() !!$ class default -!!$ if (xx%is_dev()) call xx%sync_space() +!!$ if (xx%is_dev()) call xx%sync() !!$ if (yy%is_dev()) call yy%sync() -!!$ if ((beta /= szero) .and. (z%is_dev())) call z%sync_space() +!!$ if ((beta /= szero) .and. (z%is_dev())) call z%sync() !!$ !$acc parallel loop !!$ do i = 1, n !!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) @@ -303,7 +347,7 @@ contains !!$ class default !!$ if (x%is_dev()) call x%sync() !!$ if (y%is_dev()) call y%sync() -!!$ if ((beta /= szero) .and. (z%is_dev())) call z%sync_space() +!!$ if ((beta /= szero) .and. (z%is_dev())) call z%sync() !!$ !$acc parallel loop !!$ do i = 1, n !!$ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i) @@ -327,23 +371,36 @@ contains select type(xx => x) type is (psb_s_vect_oacc) - if ((beta /= szero) .and. y%is_host()) call y%sync_space() - if (xx%is_host()) call xx%sync_space() + if ((beta /= szero) .and. y%is_host()) call y%sync() + if (xx%is_host()) call xx%sync() nx = size(xx%v) ny = size(y%v) if ((nx < m) .or. (ny < m)) then info = psb_err_internal_error_ else - !$acc parallel loop - do i = 1, m - y%v(i) = alpha * xx%v(i) + beta * y%v(i) - end do + call s_inner_oacc_axpby(m, alpha, x%v, beta, y%v, info) end if call y%set_dev() class default if ((alpha /= szero) .and. (x%is_dev())) call x%sync() call y%axpby(m, alpha, x%v, beta, info) - end select + end select + contains + subroutine s_inner_oacc_axpby(m, alpha, x, beta, y, info) + !use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + real(psb_spk_), intent(inout) :: x(:) + real(psb_spk_), intent(inout) :: y(:) + real(psb_spk_), intent(in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + !$acc parallel + !$acc loop + do i = 1, m + y(i) = alpha * x(i) + beta * y(i) + end do + !$acc end parallel + end subroutine s_inner_oacc_axpby end subroutine s_oacc_axpby_v subroutine s_oacc_axpby_a(m, alpha, x, beta, y, info) @@ -356,7 +413,7 @@ contains integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i - if ((beta /= szero) .and. (y%is_dev())) call y%sync_space() + if ((beta /= szero) .and. (y%is_dev())) call y%sync() !$acc parallel loop do i = 1, m y%v(i) = alpha * x(i) + beta * y%v(i) @@ -375,7 +432,7 @@ contains integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nx, ny, nz, i logical :: gpu_done - + write(0,*)'upd_xyz' info = psb_success_ gpu_done = .false. @@ -385,9 +442,9 @@ contains class is (psb_s_vect_oacc) select type(zz => z) class is (psb_s_vect_oacc) - if ((beta /= szero) .and. yy%is_host()) call yy%sync_space() - if ((delta /= szero) .and. zz%is_host()) call zz%sync_space() - if (xx%is_host()) call xx%sync_space() + if ((beta /= szero) .and. yy%is_host()) call yy%sync() + if ((delta /= szero) .and. zz%is_host()) call zz%sync() + if (xx%is_host()) call xx%sync() nx = size(xx%v) ny = size(yy%v) nz = size(zz%v) @@ -432,8 +489,8 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space() - if (y%is_host()) call y%sync_space() + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() !$acc parallel loop do i = 1, n @@ -459,13 +516,13 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space() + if (ii%is_host()) call ii%sync() class default call psb_errpush(info, 's_oacc_sctb_x') return end select - if (y%is_host()) call y%sync_space() + if (y%is_host()) call y%sync() !$acc parallel loop do i = 1, n @@ -486,7 +543,7 @@ contains integer(psb_ipk_) :: i if (n == 0) return - if (y%is_dev()) call y%sync_space() + if (y%is_dev()) call y%sync() !$acc parallel loop do i = 1, n @@ -512,13 +569,13 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space() + if (ii%is_host()) call ii%sync() class default call psb_errpush(info, 's_oacc_gthzbuf') return end select - if (x%is_host()) call x%sync_space() + if (x%is_host()) call x%sync() !$acc parallel loop do i = 1, n @@ -539,13 +596,13 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space() + if (ii%is_host()) call ii%sync() class default call psb_errpush(info, 's_oacc_gthzv_x') return end select - if (x%is_host()) call x%sync_space() + if (x%is_host()) call x%sync() !$acc parallel loop do i = 1, n @@ -573,9 +630,9 @@ contains type is (psb_i_vect_oacc) select type(vval => val) type is (psb_s_vect_oacc) - if (vval%is_host()) call vval%sync_space() - if (virl%is_host()) call virl%sync_space() - if (x%is_host()) call x%sync_space() + if (vval%is_host()) call vval%sync() + if (virl%is_host()) call virl%sync() + if (x%is_host()) call x%sync() !$acc parallel loop do i = 1, n x%v(virl%v(i)) = vval%v(i) @@ -588,11 +645,11 @@ contains if (.not.done_oacc) then select type(virl => irl) type is (psb_i_vect_oacc) - if (virl%is_dev()) call virl%sync_space() + if (virl%is_dev()) call virl%sync() end select select type(vval => val) type is (psb_s_vect_oacc) - if (vval%is_dev()) call vval%sync_space() + if (vval%is_dev()) call vval%sync() end select call x%ins(n, irl%v, val%v, dupl, info) end if @@ -616,7 +673,7 @@ contains integer(psb_ipk_) :: i info = 0 - if (x%is_dev()) call x%sync_space() + if (x%is_dev()) call x%sync() call x%psb_s_base_vect_type%ins(n, irl, val, dupl, info) call x%set_host() !$acc update device(x%v) @@ -635,7 +692,10 @@ contains call psb_errpush(info, 's_oacc_bld_mn', i_err=(/n, n, n, n, n/)) end if call x%set_host() - !$acc update device(x%v) + if (acc_is_present(x%v)) then + !$acc exit data delete(x%v) finalize + end if + !$acc enter data copyin(x%v) end subroutine s_oacc_bld_mn @@ -657,7 +717,10 @@ contains x%v(:) = this(:) call x%set_host() - !$acc update device(x%v) + if (acc_is_present(x%v)) then + !$acc exit data delete(x%v) finalize + end if + !$acc enter data copyin(x%v) end subroutine s_oacc_bld_x @@ -676,13 +739,13 @@ contains if (nd < n) then call x%sync() call x%psb_s_base_vect_type%asb(n, info) - if (info == psb_success_) call x%sync_space() + if (info == psb_success_) call x%sync() call x%set_host() end if else if (size(x%v) < n) then call x%psb_s_base_vect_type%asb(n, info) - if (info == psb_success_) call x%sync_space() + if (info == psb_success_) call x%sync() call x%set_host() end if end if @@ -740,10 +803,9 @@ contains real(psb_spk_) :: res real(psb_spk_), external :: ddot integer(psb_ipk_) :: info - integer(psb_ipk_) :: i res = szero - + !write(0,*) 'dot_v' select type(yy => y) type is (psb_s_base_vect_type) if (x%is_dev()) call x%sync() @@ -751,18 +813,26 @@ contains type is (psb_s_vect_oacc) if (x%is_host()) call x%sync() if (yy%is_host()) call yy%sync() - - !$acc parallel loop reduction(+:res) present(x%v, yy%v) - do i = 1, n - res = res + x%v(i) * yy%v(i) - end do - !$acc end parallel loop - + res = s_inner_oacc_dot(n, x%v, yy%v) class default call x%sync() res = y%dot(n, x%v) end select - + contains + function s_inner_oacc_dot(n, x, y) result(res) + implicit none + real(psb_spk_), intent(in) :: x(:) + real(psb_spk_), intent(in) :: y(:) + integer(psb_ipk_), intent(in) :: n + real(psb_spk_) :: res + integer(psb_ipk_) :: i + + !$acc parallel loop reduction(+:res) present(x, y) + do i = 1, n + res = res + x(i) * y(i) + end do + !$acc end parallel loop + end function s_inner_oacc_dot end function s_oacc_vect_dot function s_oacc_dot_a(n, x, y) result(res) @@ -808,7 +878,7 @@ contains implicit none class(psb_s_vect_oacc), intent(inout) :: x if (allocated(x%v)) then - call s_oacc_create_dev(x%v) + if (.not.acc_is_present(x%v)) call s_oacc_create_dev(x%v) end if contains subroutine s_oacc_create_dev(v) @@ -886,6 +956,9 @@ contains call psb_realloc(n, x%v, info) if (info == 0) then call x%set_host() + if (acc_is_present(x%v)) then + !$acc exit data delete(x%v) finalize + end if !$acc enter data create(x%v) call x%sync_space() end if @@ -902,7 +975,9 @@ contains integer(psb_ipk_), intent(out) :: info info = 0 if (allocated(x%v)) then - !$acc exit data delete(x%v) finalize + if (acc_is_present(x%v)) then + !$acc exit data delete(x%v) finalize + end if deallocate(x%v, stat=info) end if diff --git a/openacc/psb_z_oacc_vect_mod.F90 b/openacc/psb_z_oacc_vect_mod.F90 index be03b1cd..bab1a0a0 100644 --- a/openacc/psb_z_oacc_vect_mod.F90 +++ b/openacc/psb_z_oacc_vect_mod.F90 @@ -1,5 +1,6 @@ module psb_z_oacc_vect_mod use iso_c_binding + use openacc use psb_const_mod use psb_error_mod use psb_z_vect_mod @@ -50,8 +51,8 @@ module psb_z_oacc_vect_mod procedure, pass(z) :: upd_xyz => z_oacc_upd_xyz procedure, pass(y) :: mlt_a => z_oacc_mlt_a procedure, pass(z) :: mlt_a_2 => z_oacc_mlt_a_2 - procedure, pass(y) :: mlt_v => z_oacc_mlt_v - procedure, pass(z) :: mlt_v_2 => z_oacc_mlt_v_2 + procedure, pass(y) :: mlt_v => psb_z_oacc_mlt_v + procedure, pass(z) :: mlt_v_2 => psb_z_oacc_mlt_v_2 procedure, pass(x) :: scal => z_oacc_scal procedure, pass(x) :: nrm2 => z_oacc_nrm2 procedure, pass(x) :: amax => z_oacc_amax @@ -62,17 +63,17 @@ module psb_z_oacc_vect_mod end type psb_z_vect_oacc interface - subroutine z_oacc_mlt_v(x, y, info) + subroutine psb_z_oacc_mlt_v(x, y, info) import implicit none class(psb_z_base_vect_type), intent(inout) :: x class(psb_z_vect_oacc), intent(inout) :: y integer(psb_ipk_), intent(out) :: info - end subroutine z_oacc_mlt_v + end subroutine psb_z_oacc_mlt_v end interface interface - subroutine z_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) + subroutine psb_z_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) import implicit none complex(psb_dpk_), intent(in) :: alpha, beta @@ -81,7 +82,7 @@ module psb_z_oacc_vect_mod class(psb_z_vect_oacc), intent(inout) :: z integer(psb_ipk_), intent(out) :: info character(len=1), intent(in), optional :: conjgx, conjgy - end subroutine z_oacc_mlt_v_2 + end subroutine psb_z_oacc_mlt_v_2 end interface contains @@ -89,15 +90,23 @@ contains subroutine z_oacc_absval1(x) implicit none class(psb_z_vect_oacc), intent(inout) :: x - integer(psb_ipk_) :: n, i + integer(psb_ipk_) :: n - if (x%is_host()) call x%sync_space() + if (x%is_host()) call x%sync() n = size(x%v) - !$acc parallel loop - do i = 1, n - x%v(i) = abs(x%v(i)) - end do + call z_inner_oacc_absval1(n,x%v) call x%set_dev() + contains + subroutine z_inner_oacc_absval1(n,x) + implicit none + complex(psb_dpk_), intent(inout) :: x(:) + integer(psb_ipk_) :: n + integer(psb_ipk_) :: i + !$acc parallel loop + do i = 1, n + x(i) = abs(x(i)) + end do + end subroutine z_inner_oacc_absval1 end subroutine z_oacc_absval1 subroutine z_oacc_absval2(x, y) @@ -112,15 +121,23 @@ contains class is (psb_z_vect_oacc) if (x%is_host()) call x%sync() if (yy%is_host()) call yy%sync() - !$acc parallel loop - do i = 1, n - yy%v(i) = abs(x%v(i)) - end do + call z_inner_oacc_absval2(n,x%v,yy%v) class default if (x%is_dev()) call x%sync() if (y%is_dev()) call y%sync() call x%psb_z_base_vect_type%absval(y) end select + contains + subroutine z_inner_oacc_absval2(n,x,y) + implicit none + complex(psb_dpk_), intent(inout) :: x(:),y(:) + integer(psb_ipk_) :: n + integer(psb_ipk_) :: i + !$acc parallel loop + do i = 1, n + y(i) = abs(x(i)) + end do + end subroutine z_inner_oacc_absval2 end subroutine z_oacc_absval2 subroutine z_oacc_scal(alpha, x) @@ -128,32 +145,46 @@ contains class(psb_z_vect_oacc), intent(inout) :: x complex(psb_dpk_), intent(in) :: alpha integer(psb_ipk_) :: info - integer(psb_ipk_) :: i - - if (x%is_host()) call x%sync_space() - !$acc parallel loop - do i = 1, size(x%v) - x%v(i) = alpha * x%v(i) - end do + if (x%is_host()) call x%sync() + call z_inner_oacc_scal(alpha, x%v) call x%set_dev() + contains + subroutine z_inner_oacc_scal(alpha, x) + complex(psb_dpk_), intent(in) :: alpha + complex(psb_dpk_), intent(inout) :: x(:) + integer(psb_ipk_) :: i + !$acc parallel loop + do i = 1, size(x) + x(i) = alpha * x(i) + end do + end subroutine z_inner_oacc_scal end subroutine z_oacc_scal function z_oacc_nrm2(n, x) result(res) implicit none class(psb_z_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(in) :: n - real(psb_dpk_) :: res + real(psb_dpk_) :: res + real(psb_dpk_) :: mx integer(psb_ipk_) :: info - real(psb_dpk_) :: sum - integer(psb_ipk_) :: i - if (x%is_host()) call x%sync_space() - sum = 0.0 - !$acc parallel loop reduction(+:sum) - do i = 1, n - sum = sum + abs(x%v(i))**2 - end do - res = sqrt(sum) + if (x%is_host()) call x%sync() + mx = z_oacc_amax(n,x) + res = z_inner_oacc_nrm2(n, mx, x%v) + contains + function z_inner_oacc_nrm2(n, mx,x) result(res) + integer(psb_ipk_) :: n + complex(psb_dpk_) :: x(:) + real(psb_dpk_) :: mx, res + real(psb_dpk_) :: sum + integer(psb_ipk_) :: i + sum = 0.0 + !$acc parallel loop reduction(+:sum) + do i = 1, n + sum = sum + abs(x(i)/mx)**2 + end do + res = mx*sqrt(sum) + end function z_inner_oacc_nrm2 end function z_oacc_nrm2 function z_oacc_amax(n, x) result(res) @@ -162,18 +193,25 @@ contains integer(psb_ipk_), intent(in) :: n real(psb_dpk_) :: res integer(psb_ipk_) :: info - real(psb_dpk_) :: max_val - integer(psb_ipk_) :: i - if (x%is_host()) call x%sync_space() - max_val = -huge(0.0) - !$acc parallel loop reduction(max:max_val) - do i = 1, n - if (abs(x%v(i)) > max_val) max_val = abs(x%v(i)) - end do - res = max_val + if (x%is_host()) call x%sync() + res = z_inner_oacc_amax(n, x%v) + contains + function z_inner_oacc_amax(n, x) result(res) + integer(psb_ipk_) :: n + complex(psb_dpk_) :: x(:) + real(psb_dpk_) :: res + real(psb_dpk_) :: max_val + integer(psb_ipk_) :: i + max_val = -huge(0.0) + !$acc parallel loop reduction(max:max_val) + do i = 1, n + if (abs(x(i)) > max_val) max_val = abs(x(i)) + end do + res = max_val + end function z_inner_oacc_amax end function z_oacc_amax - + function z_oacc_asum(n, x) result(res) implicit none class(psb_z_vect_oacc), intent(inout) :: x @@ -182,14 +220,20 @@ contains integer(psb_ipk_) :: info complex(psb_dpk_) :: sum integer(psb_ipk_) :: i - - if (x%is_host()) call x%sync_space() - sum = 0.0 - !$acc parallel loop reduction(+:sum) - do i = 1, n - sum = sum + abs(x%v(i)) - end do - res = sum + if (x%is_host()) call x%sync() + res = z_inner_oacc_asum(n, x%v) + contains + function z_inner_oacc_asum(n, x) result(res) + integer(psb_ipk_) :: n + complex(psb_dpk_) :: x(:) + real(psb_dpk_) :: res + integer(psb_ipk_) :: i + res = 0.0 + !$acc parallel loop reduction(+:res) + do i = 1, n + res = res + abs(x(i)) + end do + end function z_inner_oacc_asum end function z_oacc_asum @@ -201,7 +245,7 @@ contains integer(psb_ipk_) :: i, n info = 0 - if (y%is_dev()) call y%sync_space() + if (y%is_dev()) call y%sync() !$acc parallel loop do i = 1, size(x) y%v(i) = y%v(i) * x(i) @@ -219,7 +263,7 @@ contains integer(psb_ipk_) :: i, n info = 0 - if (z%is_dev()) call z%sync_space() + if (z%is_dev()) call z%sync() !$acc parallel loop do i = 1, size(x) z%v(i) = alpha * x(i) * y(i) + beta * z%v(i) @@ -282,18 +326,18 @@ contains !!$ class is (psb_z_vect_oacc) !!$ select type (yy => y) !!$ class is (psb_z_vect_oacc) -!!$ if (xx%is_host()) call xx%sync_space() -!!$ if (yy%is_host()) call yy%sync_space() -!!$ if ((beta /= zzero) .and. (z%is_host())) call z%sync_space() +!!$ if (xx%is_host()) call xx%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ if ((beta /= zzero) .and. (z%is_host())) call z%sync() !!$ !$acc parallel loop !!$ do i = 1, n !!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) !!$ end do !!$ call z%set_dev() !!$ class default -!!$ if (xx%is_dev()) call xx%sync_space() +!!$ if (xx%is_dev()) call xx%sync() !!$ if (yy%is_dev()) call yy%sync() -!!$ if ((beta /= zzero) .and. (z%is_dev())) call z%sync_space() +!!$ if ((beta /= zzero) .and. (z%is_dev())) call z%sync() !!$ !$acc parallel loop !!$ do i = 1, n !!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) @@ -303,7 +347,7 @@ contains !!$ class default !!$ if (x%is_dev()) call x%sync() !!$ if (y%is_dev()) call y%sync() -!!$ if ((beta /= zzero) .and. (z%is_dev())) call z%sync_space() +!!$ if ((beta /= zzero) .and. (z%is_dev())) call z%sync() !!$ !$acc parallel loop !!$ do i = 1, n !!$ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i) @@ -327,23 +371,36 @@ contains select type(xx => x) type is (psb_z_vect_oacc) - if ((beta /= zzero) .and. y%is_host()) call y%sync_space() - if (xx%is_host()) call xx%sync_space() + if ((beta /= zzero) .and. y%is_host()) call y%sync() + if (xx%is_host()) call xx%sync() nx = size(xx%v) ny = size(y%v) if ((nx < m) .or. (ny < m)) then info = psb_err_internal_error_ else - !$acc parallel loop - do i = 1, m - y%v(i) = alpha * xx%v(i) + beta * y%v(i) - end do + call z_inner_oacc_axpby(m, alpha, x%v, beta, y%v, info) end if call y%set_dev() class default if ((alpha /= zzero) .and. (x%is_dev())) call x%sync() call y%axpby(m, alpha, x%v, beta, info) - end select + end select + contains + subroutine z_inner_oacc_axpby(m, alpha, x, beta, y, info) + !use psi_serial_mod + implicit none + integer(psb_ipk_), intent(in) :: m + complex(psb_dpk_), intent(inout) :: x(:) + complex(psb_dpk_), intent(inout) :: y(:) + complex(psb_dpk_), intent(in) :: alpha, beta + integer(psb_ipk_), intent(out) :: info + !$acc parallel + !$acc loop + do i = 1, m + y(i) = alpha * x(i) + beta * y(i) + end do + !$acc end parallel + end subroutine z_inner_oacc_axpby end subroutine z_oacc_axpby_v subroutine z_oacc_axpby_a(m, alpha, x, beta, y, info) @@ -356,7 +413,7 @@ contains integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: i - if ((beta /= zzero) .and. (y%is_dev())) call y%sync_space() + if ((beta /= zzero) .and. (y%is_dev())) call y%sync() !$acc parallel loop do i = 1, m y%v(i) = alpha * x(i) + beta * y%v(i) @@ -375,7 +432,7 @@ contains integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nx, ny, nz, i logical :: gpu_done - + write(0,*)'upd_xyz' info = psb_success_ gpu_done = .false. @@ -385,9 +442,9 @@ contains class is (psb_z_vect_oacc) select type(zz => z) class is (psb_z_vect_oacc) - if ((beta /= zzero) .and. yy%is_host()) call yy%sync_space() - if ((delta /= zzero) .and. zz%is_host()) call zz%sync_space() - if (xx%is_host()) call xx%sync_space() + if ((beta /= zzero) .and. yy%is_host()) call yy%sync() + if ((delta /= zzero) .and. zz%is_host()) call zz%sync() + if (xx%is_host()) call xx%sync() nx = size(xx%v) ny = size(yy%v) nz = size(zz%v) @@ -432,8 +489,8 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space() - if (y%is_host()) call y%sync_space() + if (ii%is_host()) call ii%sync() + if (y%is_host()) call y%sync() !$acc parallel loop do i = 1, n @@ -459,13 +516,13 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space() + if (ii%is_host()) call ii%sync() class default call psb_errpush(info, 'z_oacc_sctb_x') return end select - if (y%is_host()) call y%sync_space() + if (y%is_host()) call y%sync() !$acc parallel loop do i = 1, n @@ -486,7 +543,7 @@ contains integer(psb_ipk_) :: i if (n == 0) return - if (y%is_dev()) call y%sync_space() + if (y%is_dev()) call y%sync() !$acc parallel loop do i = 1, n @@ -512,13 +569,13 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space() + if (ii%is_host()) call ii%sync() class default call psb_errpush(info, 'z_oacc_gthzbuf') return end select - if (x%is_host()) call x%sync_space() + if (x%is_host()) call x%sync() !$acc parallel loop do i = 1, n @@ -539,13 +596,13 @@ contains select type(ii => idx) class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space() + if (ii%is_host()) call ii%sync() class default call psb_errpush(info, 'z_oacc_gthzv_x') return end select - if (x%is_host()) call x%sync_space() + if (x%is_host()) call x%sync() !$acc parallel loop do i = 1, n @@ -573,9 +630,9 @@ contains type is (psb_i_vect_oacc) select type(vval => val) type is (psb_z_vect_oacc) - if (vval%is_host()) call vval%sync_space() - if (virl%is_host()) call virl%sync_space() - if (x%is_host()) call x%sync_space() + if (vval%is_host()) call vval%sync() + if (virl%is_host()) call virl%sync() + if (x%is_host()) call x%sync() !$acc parallel loop do i = 1, n x%v(virl%v(i)) = vval%v(i) @@ -588,11 +645,11 @@ contains if (.not.done_oacc) then select type(virl => irl) type is (psb_i_vect_oacc) - if (virl%is_dev()) call virl%sync_space() + if (virl%is_dev()) call virl%sync() end select select type(vval => val) type is (psb_z_vect_oacc) - if (vval%is_dev()) call vval%sync_space() + if (vval%is_dev()) call vval%sync() end select call x%ins(n, irl%v, val%v, dupl, info) end if @@ -616,7 +673,7 @@ contains integer(psb_ipk_) :: i info = 0 - if (x%is_dev()) call x%sync_space() + if (x%is_dev()) call x%sync() call x%psb_z_base_vect_type%ins(n, irl, val, dupl, info) call x%set_host() !$acc update device(x%v) @@ -635,7 +692,10 @@ contains call psb_errpush(info, 'z_oacc_bld_mn', i_err=(/n, n, n, n, n/)) end if call x%set_host() - !$acc update device(x%v) + if (acc_is_present(x%v)) then + !$acc exit data delete(x%v) finalize + end if + !$acc enter data copyin(x%v) end subroutine z_oacc_bld_mn @@ -657,7 +717,10 @@ contains x%v(:) = this(:) call x%set_host() - !$acc update device(x%v) + if (acc_is_present(x%v)) then + !$acc exit data delete(x%v) finalize + end if + !$acc enter data copyin(x%v) end subroutine z_oacc_bld_x @@ -676,13 +739,13 @@ contains if (nd < n) then call x%sync() call x%psb_z_base_vect_type%asb(n, info) - if (info == psb_success_) call x%sync_space() + if (info == psb_success_) call x%sync() call x%set_host() end if else if (size(x%v) < n) then call x%psb_z_base_vect_type%asb(n, info) - if (info == psb_success_) call x%sync_space() + if (info == psb_success_) call x%sync() call x%set_host() end if end if @@ -740,10 +803,9 @@ contains complex(psb_dpk_) :: res complex(psb_dpk_), external :: ddot integer(psb_ipk_) :: info - integer(psb_ipk_) :: i res = zzero - + !write(0,*) 'dot_v' select type(yy => y) type is (psb_z_base_vect_type) if (x%is_dev()) call x%sync() @@ -751,18 +813,26 @@ contains type is (psb_z_vect_oacc) if (x%is_host()) call x%sync() if (yy%is_host()) call yy%sync() - - !$acc parallel loop reduction(+:res) present(x%v, yy%v) - do i = 1, n - res = res + x%v(i) * yy%v(i) - end do - !$acc end parallel loop - + res = z_inner_oacc_dot(n, x%v, yy%v) class default call x%sync() res = y%dot(n, x%v) end select - + contains + function z_inner_oacc_dot(n, x, y) result(res) + implicit none + complex(psb_dpk_), intent(in) :: x(:) + complex(psb_dpk_), intent(in) :: y(:) + integer(psb_ipk_), intent(in) :: n + complex(psb_dpk_) :: res + integer(psb_ipk_) :: i + + !$acc parallel loop reduction(+:res) present(x, y) + do i = 1, n + res = res + x(i) * y(i) + end do + !$acc end parallel loop + end function z_inner_oacc_dot end function z_oacc_vect_dot function z_oacc_dot_a(n, x, y) result(res) @@ -808,7 +878,7 @@ contains implicit none class(psb_z_vect_oacc), intent(inout) :: x if (allocated(x%v)) then - call z_oacc_create_dev(x%v) + if (.not.acc_is_present(x%v)) call z_oacc_create_dev(x%v) end if contains subroutine z_oacc_create_dev(v) @@ -886,6 +956,9 @@ contains call psb_realloc(n, x%v, info) if (info == 0) then call x%set_host() + if (acc_is_present(x%v)) then + !$acc exit data delete(x%v) finalize + end if !$acc enter data create(x%v) call x%sync_space() end if @@ -902,7 +975,9 @@ contains integer(psb_ipk_), intent(out) :: info info = 0 if (allocated(x%v)) then - !$acc exit data delete(x%v) finalize + if (acc_is_present(x%v)) then + !$acc exit data delete(x%v) finalize + end if deallocate(x%v, stat=info) end if From 95c546aaddeb4280de1b768d7de4f12accc1c336 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Mon, 26 Aug 2024 08:22:35 +0200 Subject: [PATCH 32/39] Fix OpenACC version of ELL vect_mv --- openacc/impl/psb_c_oacc_ell_vect_mv.F90 | 13 +++++++------ openacc/impl/psb_d_oacc_ell_vect_mv.F90 | 13 +++++++------ openacc/impl/psb_s_oacc_ell_vect_mv.F90 | 13 +++++++------ openacc/impl/psb_z_oacc_ell_vect_mv.F90 | 13 +++++++------ 4 files changed, 28 insertions(+), 24 deletions(-) diff --git a/openacc/impl/psb_c_oacc_ell_vect_mv.F90 b/openacc/impl/psb_c_oacc_ell_vect_mv.F90 index e65d00ba..f3e78d98 100644 --- a/openacc/impl/psb_c_oacc_ell_vect_mv.F90 +++ b/openacc/impl/psb_c_oacc_ell_vect_mv.F90 @@ -10,13 +10,13 @@ contains integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - integer(psb_ipk_) :: m, n, nzt + integer(psb_ipk_) :: m, n, nzt, nc info = psb_success_ m = a%get_nrows() n = a%get_ncols() nzt = a%nzt - + nc = size(a%ja,2) if ((n /= size(x%v)) .or. (m /= size(y%v))) then write(0,*) 'Size error ', m, n, size(x%v), size(y%v) info = psb_err_invalid_mat_state_ @@ -27,14 +27,15 @@ contains if (x%is_host()) call x%sync() if (y%is_host()) call y%sync() - call inner_spmv(m, n, nzt, alpha, a%val, a%ja, x%v, beta, y%v, info) + call inner_spmv(m, n, nc, alpha, a%val, a%ja, x%v, beta, y%v, info) + call y%set_dev() contains - subroutine inner_spmv(m, n, nzt, alpha, val, ja, x, beta, y, info) + subroutine inner_spmv(m, n, nc, alpha, val, ja, x, beta, y, info) implicit none - integer(psb_ipk_) :: m, n, nzt + integer(psb_ipk_) :: m, n, nc complex(psb_spk_), intent(in) :: alpha, beta complex(psb_spk_) :: val(:,:), x(:), y(:) integer(psb_ipk_) :: ja(:,:) @@ -52,7 +53,7 @@ contains do i = ii, ii + isz - 1 tmp = 0.0_psb_dpk_ !$acc loop seq - do j = 1, nzt + do j = 1, nc if (ja(i,j) > 0) then tmp = tmp + val(i,j) * x(ja(i,j)) end if diff --git a/openacc/impl/psb_d_oacc_ell_vect_mv.F90 b/openacc/impl/psb_d_oacc_ell_vect_mv.F90 index ebcdb405..4239d049 100644 --- a/openacc/impl/psb_d_oacc_ell_vect_mv.F90 +++ b/openacc/impl/psb_d_oacc_ell_vect_mv.F90 @@ -10,13 +10,13 @@ contains integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - integer(psb_ipk_) :: m, n, nzt + integer(psb_ipk_) :: m, n, nzt, nc info = psb_success_ m = a%get_nrows() n = a%get_ncols() nzt = a%nzt - + nc = size(a%ja,2) if ((n /= size(x%v)) .or. (m /= size(y%v))) then write(0,*) 'Size error ', m, n, size(x%v), size(y%v) info = psb_err_invalid_mat_state_ @@ -27,14 +27,15 @@ contains if (x%is_host()) call x%sync() if (y%is_host()) call y%sync() - call inner_spmv(m, n, nzt, alpha, a%val, a%ja, x%v, beta, y%v, info) + call inner_spmv(m, n, nc, alpha, a%val, a%ja, x%v, beta, y%v, info) + call y%set_dev() contains - subroutine inner_spmv(m, n, nzt, alpha, val, ja, x, beta, y, info) + subroutine inner_spmv(m, n, nc, alpha, val, ja, x, beta, y, info) implicit none - integer(psb_ipk_) :: m, n, nzt + integer(psb_ipk_) :: m, n, nc real(psb_dpk_), intent(in) :: alpha, beta real(psb_dpk_) :: val(:,:), x(:), y(:) integer(psb_ipk_) :: ja(:,:) @@ -52,7 +53,7 @@ contains do i = ii, ii + isz - 1 tmp = 0.0_psb_dpk_ !$acc loop seq - do j = 1, nzt + do j = 1, nc if (ja(i,j) > 0) then tmp = tmp + val(i,j) * x(ja(i,j)) end if diff --git a/openacc/impl/psb_s_oacc_ell_vect_mv.F90 b/openacc/impl/psb_s_oacc_ell_vect_mv.F90 index f48ba041..bbbdd6a3 100644 --- a/openacc/impl/psb_s_oacc_ell_vect_mv.F90 +++ b/openacc/impl/psb_s_oacc_ell_vect_mv.F90 @@ -10,13 +10,13 @@ contains integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - integer(psb_ipk_) :: m, n, nzt + integer(psb_ipk_) :: m, n, nzt, nc info = psb_success_ m = a%get_nrows() n = a%get_ncols() nzt = a%nzt - + nc = size(a%ja,2) if ((n /= size(x%v)) .or. (m /= size(y%v))) then write(0,*) 'Size error ', m, n, size(x%v), size(y%v) info = psb_err_invalid_mat_state_ @@ -27,14 +27,15 @@ contains if (x%is_host()) call x%sync() if (y%is_host()) call y%sync() - call inner_spmv(m, n, nzt, alpha, a%val, a%ja, x%v, beta, y%v, info) + call inner_spmv(m, n, nc, alpha, a%val, a%ja, x%v, beta, y%v, info) + call y%set_dev() contains - subroutine inner_spmv(m, n, nzt, alpha, val, ja, x, beta, y, info) + subroutine inner_spmv(m, n, nc, alpha, val, ja, x, beta, y, info) implicit none - integer(psb_ipk_) :: m, n, nzt + integer(psb_ipk_) :: m, n, nc real(psb_spk_), intent(in) :: alpha, beta real(psb_spk_) :: val(:,:), x(:), y(:) integer(psb_ipk_) :: ja(:,:) @@ -52,7 +53,7 @@ contains do i = ii, ii + isz - 1 tmp = 0.0_psb_dpk_ !$acc loop seq - do j = 1, nzt + do j = 1, nc if (ja(i,j) > 0) then tmp = tmp + val(i,j) * x(ja(i,j)) end if diff --git a/openacc/impl/psb_z_oacc_ell_vect_mv.F90 b/openacc/impl/psb_z_oacc_ell_vect_mv.F90 index ecb61adf..ad8460b8 100644 --- a/openacc/impl/psb_z_oacc_ell_vect_mv.F90 +++ b/openacc/impl/psb_z_oacc_ell_vect_mv.F90 @@ -10,13 +10,13 @@ contains integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - integer(psb_ipk_) :: m, n, nzt + integer(psb_ipk_) :: m, n, nzt, nc info = psb_success_ m = a%get_nrows() n = a%get_ncols() nzt = a%nzt - + nc = size(a%ja,2) if ((n /= size(x%v)) .or. (m /= size(y%v))) then write(0,*) 'Size error ', m, n, size(x%v), size(y%v) info = psb_err_invalid_mat_state_ @@ -27,14 +27,15 @@ contains if (x%is_host()) call x%sync() if (y%is_host()) call y%sync() - call inner_spmv(m, n, nzt, alpha, a%val, a%ja, x%v, beta, y%v, info) + call inner_spmv(m, n, nc, alpha, a%val, a%ja, x%v, beta, y%v, info) + call y%set_dev() contains - subroutine inner_spmv(m, n, nzt, alpha, val, ja, x, beta, y, info) + subroutine inner_spmv(m, n, nc, alpha, val, ja, x, beta, y, info) implicit none - integer(psb_ipk_) :: m, n, nzt + integer(psb_ipk_) :: m, n, nc complex(psb_dpk_), intent(in) :: alpha, beta complex(psb_dpk_) :: val(:,:), x(:), y(:) integer(psb_ipk_) :: ja(:,:) @@ -52,7 +53,7 @@ contains do i = ii, ii + isz - 1 tmp = 0.0_psb_dpk_ !$acc loop seq - do j = 1, nzt + do j = 1, nc if (ja(i,j) > 0) then tmp = tmp + val(i,j) * x(ja(i,j)) end if From 479135c62d99319b3c6c1123b45302976f4cceb2 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Thu, 29 Aug 2024 16:36:24 +0200 Subject: [PATCH 33/39] Merge some changes from V4 --- openacc/impl/psb_c_oacc_csr_cp_from_coo.F90 | 7 +- openacc/impl/psb_c_oacc_csr_cp_from_fmt.F90 | 6 +- openacc/impl/psb_c_oacc_csr_mv_from_coo.F90 | 6 +- openacc/impl/psb_c_oacc_csr_mv_from_fmt.F90 | 6 +- openacc/impl/psb_c_oacc_csr_vect_mv.F90 | 4 +- openacc/impl/psb_c_oacc_ell_cp_from_coo.F90 | 61 +---- openacc/impl/psb_c_oacc_ell_cp_from_fmt.F90 | 6 +- openacc/impl/psb_c_oacc_ell_mv_from_coo.F90 | 8 +- openacc/impl/psb_c_oacc_ell_mv_from_fmt.F90 | 6 +- openacc/impl/psb_c_oacc_hll_cp_from_coo.F90 | 72 +----- openacc/impl/psb_c_oacc_hll_cp_from_fmt.F90 | 6 +- openacc/impl/psb_c_oacc_hll_mv_from_coo.F90 | 6 +- openacc/impl/psb_c_oacc_hll_mv_from_fmt.F90 | 7 +- openacc/impl/psb_c_oacc_hll_vect_mv.F90 | 32 +-- openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 | 7 +- openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 | 6 +- openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 | 6 +- openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 | 6 +- openacc/impl/psb_d_oacc_csr_vect_mv.F90 | 4 +- openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 | 61 +---- openacc/impl/psb_d_oacc_ell_cp_from_fmt.F90 | 6 +- openacc/impl/psb_d_oacc_ell_mv_from_coo.F90 | 8 +- openacc/impl/psb_d_oacc_ell_mv_from_fmt.F90 | 6 +- openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 | 72 +----- openacc/impl/psb_d_oacc_hll_cp_from_fmt.F90 | 6 +- openacc/impl/psb_d_oacc_hll_mv_from_coo.F90 | 6 +- openacc/impl/psb_d_oacc_hll_mv_from_fmt.F90 | 7 +- openacc/impl/psb_d_oacc_hll_vect_mv.F90 | 32 +-- openacc/impl/psb_s_oacc_csr_cp_from_coo.F90 | 7 +- openacc/impl/psb_s_oacc_csr_cp_from_fmt.F90 | 6 +- openacc/impl/psb_s_oacc_csr_mv_from_coo.F90 | 6 +- openacc/impl/psb_s_oacc_csr_mv_from_fmt.F90 | 6 +- openacc/impl/psb_s_oacc_csr_vect_mv.F90 | 4 +- openacc/impl/psb_s_oacc_ell_cp_from_coo.F90 | 61 +---- openacc/impl/psb_s_oacc_ell_cp_from_fmt.F90 | 6 +- openacc/impl/psb_s_oacc_ell_mv_from_coo.F90 | 8 +- openacc/impl/psb_s_oacc_ell_mv_from_fmt.F90 | 6 +- openacc/impl/psb_s_oacc_hll_cp_from_coo.F90 | 72 +----- openacc/impl/psb_s_oacc_hll_cp_from_fmt.F90 | 6 +- openacc/impl/psb_s_oacc_hll_mv_from_coo.F90 | 6 +- openacc/impl/psb_s_oacc_hll_mv_from_fmt.F90 | 7 +- openacc/impl/psb_s_oacc_hll_vect_mv.F90 | 32 +-- openacc/impl/psb_z_oacc_csr_cp_from_coo.F90 | 7 +- openacc/impl/psb_z_oacc_csr_cp_from_fmt.F90 | 6 +- openacc/impl/psb_z_oacc_csr_mv_from_coo.F90 | 6 +- openacc/impl/psb_z_oacc_csr_mv_from_fmt.F90 | 6 +- openacc/impl/psb_z_oacc_csr_vect_mv.F90 | 4 +- openacc/impl/psb_z_oacc_ell_cp_from_coo.F90 | 61 +---- openacc/impl/psb_z_oacc_ell_cp_from_fmt.F90 | 6 +- openacc/impl/psb_z_oacc_ell_mv_from_coo.F90 | 8 +- openacc/impl/psb_z_oacc_ell_mv_from_fmt.F90 | 6 +- openacc/impl/psb_z_oacc_hll_cp_from_coo.F90 | 72 +----- openacc/impl/psb_z_oacc_hll_cp_from_fmt.F90 | 6 +- openacc/impl/psb_z_oacc_hll_mv_from_coo.F90 | 6 +- openacc/impl/psb_z_oacc_hll_mv_from_fmt.F90 | 7 +- openacc/impl/psb_z_oacc_hll_vect_mv.F90 | 32 +-- openacc/psb_c_oacc_csr_mat_mod.F90 | 102 +++----- openacc/psb_c_oacc_ell_mat_mod.F90 | 133 +++------- openacc/psb_c_oacc_hll_mat_mod.F90 | 133 +++------- openacc/psb_c_oacc_vect_mod.F90 | 253 ++++++-------------- openacc/psb_d_oacc_csr_mat_mod.F90 | 102 +++----- openacc/psb_d_oacc_ell_mat_mod.F90 | 133 +++------- openacc/psb_d_oacc_hll_mat_mod.F90 | 133 +++------- openacc/psb_d_oacc_vect_mod.F90 | 253 ++++++-------------- openacc/psb_i_oacc_vect_mod.F90 | 85 ++----- openacc/psb_l_oacc_vect_mod.F90 | 85 ++----- openacc/psb_s_oacc_csr_mat_mod.F90 | 102 +++----- openacc/psb_s_oacc_ell_mat_mod.F90 | 133 +++------- openacc/psb_s_oacc_hll_mat_mod.F90 | 133 +++------- openacc/psb_s_oacc_vect_mod.F90 | 253 ++++++-------------- openacc/psb_z_oacc_csr_mat_mod.F90 | 102 +++----- openacc/psb_z_oacc_ell_mat_mod.F90 | 133 +++------- openacc/psb_z_oacc_hll_mat_mod.F90 | 133 +++------- openacc/psb_z_oacc_vect_mod.F90 | 253 ++++++-------------- 74 files changed, 974 insertions(+), 2612 deletions(-) diff --git a/openacc/impl/psb_c_oacc_csr_cp_from_coo.F90 b/openacc/impl/psb_c_oacc_csr_cp_from_coo.F90 index 70380c95..a411cc6a 100644 --- a/openacc/impl/psb_c_oacc_csr_cp_from_coo.F90 +++ b/openacc/impl/psb_c_oacc_csr_cp_from_coo.F90 @@ -10,11 +10,12 @@ contains info = psb_success_ + call a%free_space() call a%psb_c_csr_sparse_mat%cp_from_coo(b, info) if (info /= 0) goto 9999 - - call a%set_dev() - if (info /= 0) goto 9999 + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_c_oacc_csr_cp_from_fmt.F90 b/openacc/impl/psb_c_oacc_csr_cp_from_fmt.F90 index 7e664791..a8cd93a0 100644 --- a/openacc/impl/psb_c_oacc_csr_cp_from_fmt.F90 +++ b/openacc/impl/psb_c_oacc_csr_cp_from_fmt.F90 @@ -14,10 +14,12 @@ contains type is (psb_c_coo_sparse_mat) call a%cp_from_coo(b, info) class default + call a%free_space() call a%psb_c_csr_sparse_mat%cp_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irp) + call a%sync_space() + call a%set_host() + call a%sync() end select end subroutine psb_c_oacc_csr_cp_from_fmt diff --git a/openacc/impl/psb_c_oacc_csr_mv_from_coo.F90 b/openacc/impl/psb_c_oacc_csr_mv_from_coo.F90 index f8c5c39d..30691030 100644 --- a/openacc/impl/psb_c_oacc_csr_mv_from_coo.F90 +++ b/openacc/impl/psb_c_oacc_csr_mv_from_coo.F90 @@ -10,10 +10,12 @@ contains info = psb_success_ + call a%free_space() call a%psb_c_csr_sparse_mat%mv_from_coo(b, info) if (info /= 0) goto 9999 - - !$acc update device(a%val, a%ja, a%irp) + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_c_oacc_csr_mv_from_fmt.F90 b/openacc/impl/psb_c_oacc_csr_mv_from_fmt.F90 index 7ba971b4..b37011c0 100644 --- a/openacc/impl/psb_c_oacc_csr_mv_from_fmt.F90 +++ b/openacc/impl/psb_c_oacc_csr_mv_from_fmt.F90 @@ -14,10 +14,12 @@ contains type is (psb_c_coo_sparse_mat) call a%mv_from_coo(b, info) class default + call a%free_space() call a%psb_c_csr_sparse_mat%mv_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irp) + call a%sync_space() + call a%set_host() + call a%sync() end select end subroutine psb_c_oacc_csr_mv_from_fmt diff --git a/openacc/impl/psb_c_oacc_csr_vect_mv.F90 b/openacc/impl/psb_c_oacc_csr_vect_mv.F90 index 0fd1ed35..db56d9fc 100644 --- a/openacc/impl/psb_c_oacc_csr_vect_mv.F90 +++ b/openacc/impl/psb_c_oacc_csr_vect_mv.F90 @@ -16,8 +16,8 @@ contains m = a%get_nrows() n = a%get_ncols() - if ((n /= size(x%v)) .or. (n /= size(y%v))) then - write(0,*) 'Size error ', m, n, size(x%v), size(y%v) + if ((n /= size(x%v)) .or. (m /= size(y%v))) then + write(0,*) 'ocsrmv Size error ', m, n, size(x%v), size(y%v) info = psb_err_invalid_mat_state_ return end if diff --git a/openacc/impl/psb_c_oacc_ell_cp_from_coo.F90 b/openacc/impl/psb_c_oacc_ell_cp_from_coo.F90 index 7bd17478..836874fe 100644 --- a/openacc/impl/psb_c_oacc_ell_cp_from_coo.F90 +++ b/openacc/impl/psb_c_oacc_ell_cp_from_coo.F90 @@ -8,65 +8,14 @@ contains class(psb_c_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: i, j, k, row, col, nz_per_row - complex(psb_spk_) :: value - integer(psb_ipk_), allocatable :: row_counts(:) - integer(psb_ipk_) :: hacksize, nza info = psb_success_ - hacksize = 1 - - call a%set_nrows(b%get_nrows()) - call a%set_ncols(b%get_ncols()) - nz_per_row = a%nzt - - if (.not.allocated(a%val)) then - allocate(a%val(a%get_nrows(), nz_per_row)) - allocate(a%ja(a%get_nrows(), nz_per_row)) - allocate(a%irn(a%get_nrows())) - allocate(a%idiag(a%get_nrows())) - end if - a%val = czero - a%ja = -1 - a%irn = 0 - a%idiag = 0 - - allocate(row_counts(a%get_nrows())) - row_counts = 0 - - nza = b%get_nzeros() - - !$acc parallel loop present(b, a, row_counts) - do k = 1, nza - row = b%ia(k) - col = b%ja(k) - value = b%val(k) - if (row_counts(row) < nz_per_row) then - a%val(row, row_counts(row) + 1) = value - a%ja(row, row_counts(row) + 1) = col - row_counts(row) = row_counts(row) + 1 - else - info = psb_err_invalid_mat_state_ - !goto 9999 - end if - end do - - a%irn = row_counts - - !$acc parallel loop present(a) - do i = 1, a%get_nrows() - do j = 1, nz_per_row - if (a%ja(i, j) == i) then - a%idiag(i) = j - exit - end if - end do - end do - - deallocate(row_counts) - - call a%set_dev() + call a%free_space() + call a%psb_c_ell_sparse_mat%cp_from_coo(b, info) if (info /= 0) goto 9999 + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_c_oacc_ell_cp_from_fmt.F90 b/openacc/impl/psb_c_oacc_ell_cp_from_fmt.F90 index 7ee231c5..31d6c4b4 100644 --- a/openacc/impl/psb_c_oacc_ell_cp_from_fmt.F90 +++ b/openacc/impl/psb_c_oacc_ell_cp_from_fmt.F90 @@ -14,10 +14,12 @@ contains type is (psb_c_coo_sparse_mat) call a%cp_from_coo(b, info) class default + call a%free_space() call a%psb_c_ell_sparse_mat%cp_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irn, a%idiag) + call a%sync_space() + call a%set_host() + call a%sync() end select end subroutine psb_c_oacc_ell_cp_from_fmt diff --git a/openacc/impl/psb_c_oacc_ell_mv_from_coo.F90 b/openacc/impl/psb_c_oacc_ell_mv_from_coo.F90 index 7e703aa2..1ca43435 100644 --- a/openacc/impl/psb_c_oacc_ell_mv_from_coo.F90 +++ b/openacc/impl/psb_c_oacc_ell_mv_from_coo.F90 @@ -9,11 +9,13 @@ contains integer(psb_ipk_), intent(out) :: info info = psb_success_ - + + call a%free_space() call a%psb_c_ell_sparse_mat%mv_from_coo(b, info) if (info /= 0) goto 9999 - - !$acc update device(a%val, a%ja, a%irn, a%idiag) + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_c_oacc_ell_mv_from_fmt.F90 b/openacc/impl/psb_c_oacc_ell_mv_from_fmt.F90 index 7d1f790d..95798429 100644 --- a/openacc/impl/psb_c_oacc_ell_mv_from_fmt.F90 +++ b/openacc/impl/psb_c_oacc_ell_mv_from_fmt.F90 @@ -14,10 +14,12 @@ contains type is (psb_c_coo_sparse_mat) call a%mv_from_coo(b, info) class default + call a%free_space() call a%psb_c_ell_sparse_mat%mv_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irn, a%idiag) + call a%sync_space() + call a%set_host() + call a%sync() end select end subroutine psb_c_oacc_ell_mv_from_fmt diff --git a/openacc/impl/psb_c_oacc_hll_cp_from_coo.F90 b/openacc/impl/psb_c_oacc_hll_cp_from_coo.F90 index 4c12cdf8..32391cc8 100644 --- a/openacc/impl/psb_c_oacc_hll_cp_from_coo.F90 +++ b/openacc/impl/psb_c_oacc_hll_cp_from_coo.F90 @@ -2,78 +2,20 @@ submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_cp_from_coo_impl use psb_base_mod contains module subroutine psb_c_oacc_hll_cp_from_coo(a, b, info) - implicit none + implicit none class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a - class(psb_c_coo_sparse_mat), intent(in) :: b + class(psb_c_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: i, j, k, row, col, nz_per_row - complex(psb_spk_) :: value - integer(psb_ipk_), allocatable :: row_counts(:) - integer(psb_ipk_) :: hacksize, nza - info = psb_success_ - hacksize = 32 ! Assuming a default hack size of 32 - - call a%set_nrows(b%get_nrows()) - call a%set_ncols(b%get_ncols()) - nz_per_row = a%nzt - - if (.not.allocated(a%val)) then - allocate(a%val(nz_per_row * a%get_nrows())) - allocate(a%ja(nz_per_row * a%get_nrows())) - allocate(a%irn(a%get_nrows())) - allocate(a%idiag(a%get_nrows())) - allocate(a%hkoffs((a%get_nrows() + hacksize - 1) / hacksize)) - end if - a%val = czero - a%ja = -1 - a%irn = 0 - a%idiag = 0 - - allocate(row_counts(a%get_nrows())) - row_counts = 0 - - nza = b%get_nzeros() - - !$acc parallel loop present(b, a, row_counts) - do k = 1, nza - row = b%ia(k) - col = b%ja(k) - value = b%val(k) - if (row_counts(row) < nz_per_row) then - a%val(row_counts(row) + 1 + (row - 1) * nz_per_row) = value - a%ja(row_counts(row) + 1 + (row - 1) * nz_per_row) = col - row_counts(row) = row_counts(row) + 1 - else - info = psb_err_invalid_mat_state_ - !goto 9999 - end if - end do - - a%irn = row_counts - - !$acc parallel loop present(a) - do i = 1, a%get_nrows() - do j = 1, nz_per_row - if (a%ja(j + (i - 1) * nz_per_row) == i) then - a%idiag(i) = j - exit - end if - end do - end do - - ! Calculate hkoffs for HLL format - !$acc parallel loop present(a) - do i = 1, size(a%hkoffs) - a%hkoffs(i) = (i - 1) * hacksize - end do - - deallocate(row_counts) - call a%set_dev() + call a%free_space() + call a%psb_c_hll_sparse_mat%cp_from_coo(b, info) if (info /= 0) goto 9999 + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_c_oacc_hll_cp_from_fmt.F90 b/openacc/impl/psb_c_oacc_hll_cp_from_fmt.F90 index af6cc1d5..e442b668 100644 --- a/openacc/impl/psb_c_oacc_hll_cp_from_fmt.F90 +++ b/openacc/impl/psb_c_oacc_hll_cp_from_fmt.F90 @@ -14,10 +14,12 @@ contains type is (psb_c_coo_sparse_mat) call a%cp_from_coo(b, info) class default + call a%free_space() call a%psb_c_hll_sparse_mat%cp_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + call a%sync_space() + call a%set_host() + call a%sync() end select end subroutine psb_c_oacc_hll_cp_from_fmt diff --git a/openacc/impl/psb_c_oacc_hll_mv_from_coo.F90 b/openacc/impl/psb_c_oacc_hll_mv_from_coo.F90 index dec52d40..30d723fe 100644 --- a/openacc/impl/psb_c_oacc_hll_mv_from_coo.F90 +++ b/openacc/impl/psb_c_oacc_hll_mv_from_coo.F90 @@ -10,10 +10,12 @@ contains info = psb_success_ + call a%free_space() call a%psb_c_hll_sparse_mat%mv_from_coo(b, info) if (info /= 0) goto 9999 - - !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_c_oacc_hll_mv_from_fmt.F90 b/openacc/impl/psb_c_oacc_hll_mv_from_fmt.F90 index f2a064cb..0ac69af8 100644 --- a/openacc/impl/psb_c_oacc_hll_mv_from_fmt.F90 +++ b/openacc/impl/psb_c_oacc_hll_mv_from_fmt.F90 @@ -14,11 +14,12 @@ contains type is (psb_c_coo_sparse_mat) call a%mv_from_coo(b, info) class default + call a%free_space() call a%psb_c_hll_sparse_mat%mv_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + call a%sync_space() + call a%set_host() + call a%sync() end select - end subroutine psb_c_oacc_hll_mv_from_fmt end submodule psb_c_oacc_hll_mv_from_fmt_impl diff --git a/openacc/impl/psb_c_oacc_hll_vect_mv.F90 b/openacc/impl/psb_c_oacc_hll_vect_mv.F90 index 3b74d11a..68141e42 100644 --- a/openacc/impl/psb_c_oacc_hll_vect_mv.F90 +++ b/openacc/impl/psb_c_oacc_hll_vect_mv.F90 @@ -40,28 +40,28 @@ contains complex(psb_spk_) :: val(:), x(:), y(:) integer(psb_ipk_) :: ja(:), hkoffs(:) integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: i, j, idx, k + integer(psb_ipk_) :: i, j, idx, k, ipnt,ir,nr,nlc,isz,ii complex(psb_spk_) :: tmp info = 0 - - !$acc parallel loop present(val, ja, hkoffs, x, y) + !$acc parallel loop private(nlc, isz,ir,nr) do i = 1, nhacks - do k = 0, hksz - 1 - idx = hkoffs(i) + k - if (idx <= hkoffs(i + 1) - 1) then - tmp = 0.0_psb_dpk_ - !$acc loop seq - do j = hkoffs(i) + k, hkoffs(i + 1) - 1, hksz - if (ja(j) > 0) then - tmp = tmp + val(j) * x(ja(j)) - end if - end do - y(k + 1 + (i - 1) * hksz) = alpha * tmp + beta * y(k + 1 + (i - 1) * hksz) - end if + isz = hkoffs(i + 1) - hkoffs(i) + nlc = isz/hksz + ir = (i-1)*hksz + nr = min(hksz,m-ir) + !$acc loop independent private(tmp,ii,ipnt) + do ii = 1, nr + ipnt = hkoffs(i) + ii + tmp = czero + !$acc loop seq + do j = 1, nlc + tmp = tmp + val(ipnt) * x(ja(ipnt)) + ipnt = ipnt + hksz + end do + y(ii+ir) = alpha * tmp + beta * y(ii+ir) end do end do end subroutine inner_spmv - end subroutine psb_c_oacc_hll_vect_mv end submodule psb_c_oacc_hll_vect_mv_impl diff --git a/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 b/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 index ec92f618..50da7692 100644 --- a/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 +++ b/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 @@ -10,11 +10,12 @@ contains info = psb_success_ + call a%free_space() call a%psb_d_csr_sparse_mat%cp_from_coo(b, info) if (info /= 0) goto 9999 - - call a%set_dev() - if (info /= 0) goto 9999 + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 b/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 index 37541ea9..0d35e247 100644 --- a/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 +++ b/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 @@ -14,10 +14,12 @@ contains type is (psb_d_coo_sparse_mat) call a%cp_from_coo(b, info) class default + call a%free_space() call a%psb_d_csr_sparse_mat%cp_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irp) + call a%sync_space() + call a%set_host() + call a%sync() end select end subroutine psb_d_oacc_csr_cp_from_fmt diff --git a/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 b/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 index 2ed9b032..97fa07d1 100644 --- a/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 +++ b/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 @@ -10,10 +10,12 @@ contains info = psb_success_ + call a%free_space() call a%psb_d_csr_sparse_mat%mv_from_coo(b, info) if (info /= 0) goto 9999 - - !$acc update device(a%val, a%ja, a%irp) + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 b/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 index 197ed911..e992f41a 100644 --- a/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 +++ b/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 @@ -14,10 +14,12 @@ contains type is (psb_d_coo_sparse_mat) call a%mv_from_coo(b, info) class default + call a%free_space() call a%psb_d_csr_sparse_mat%mv_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irp) + call a%sync_space() + call a%set_host() + call a%sync() end select end subroutine psb_d_oacc_csr_mv_from_fmt diff --git a/openacc/impl/psb_d_oacc_csr_vect_mv.F90 b/openacc/impl/psb_d_oacc_csr_vect_mv.F90 index 1dca2ba2..0001cc76 100644 --- a/openacc/impl/psb_d_oacc_csr_vect_mv.F90 +++ b/openacc/impl/psb_d_oacc_csr_vect_mv.F90 @@ -16,8 +16,8 @@ contains m = a%get_nrows() n = a%get_ncols() - if ((n /= size(x%v)) .or. (n /= size(y%v))) then - write(0,*) 'Size error ', m, n, size(x%v), size(y%v) + if ((n /= size(x%v)) .or. (m /= size(y%v))) then + write(0,*) 'ocsrmv Size error ', m, n, size(x%v), size(y%v) info = psb_err_invalid_mat_state_ return end if diff --git a/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 b/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 index c13d1edd..6c24098e 100644 --- a/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 +++ b/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 @@ -8,65 +8,14 @@ contains class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: i, j, k, row, col, nz_per_row - real(psb_dpk_) :: value - integer(psb_ipk_), allocatable :: row_counts(:) - integer(psb_ipk_) :: hacksize, nza info = psb_success_ - hacksize = 1 - - call a%set_nrows(b%get_nrows()) - call a%set_ncols(b%get_ncols()) - nz_per_row = a%nzt - - if (.not.allocated(a%val)) then - allocate(a%val(a%get_nrows(), nz_per_row)) - allocate(a%ja(a%get_nrows(), nz_per_row)) - allocate(a%irn(a%get_nrows())) - allocate(a%idiag(a%get_nrows())) - end if - a%val = dzero - a%ja = -1 - a%irn = 0 - a%idiag = 0 - - allocate(row_counts(a%get_nrows())) - row_counts = 0 - - nza = b%get_nzeros() - - !$acc parallel loop present(b, a, row_counts) - do k = 1, nza - row = b%ia(k) - col = b%ja(k) - value = b%val(k) - if (row_counts(row) < nz_per_row) then - a%val(row, row_counts(row) + 1) = value - a%ja(row, row_counts(row) + 1) = col - row_counts(row) = row_counts(row) + 1 - else - info = psb_err_invalid_mat_state_ - !goto 9999 - end if - end do - - a%irn = row_counts - - !$acc parallel loop present(a) - do i = 1, a%get_nrows() - do j = 1, nz_per_row - if (a%ja(i, j) == i) then - a%idiag(i) = j - exit - end if - end do - end do - - deallocate(row_counts) - - call a%set_dev() + call a%free_space() + call a%psb_d_ell_sparse_mat%cp_from_coo(b, info) if (info /= 0) goto 9999 + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_d_oacc_ell_cp_from_fmt.F90 b/openacc/impl/psb_d_oacc_ell_cp_from_fmt.F90 index b62a40d5..991681e9 100644 --- a/openacc/impl/psb_d_oacc_ell_cp_from_fmt.F90 +++ b/openacc/impl/psb_d_oacc_ell_cp_from_fmt.F90 @@ -14,10 +14,12 @@ contains type is (psb_d_coo_sparse_mat) call a%cp_from_coo(b, info) class default + call a%free_space() call a%psb_d_ell_sparse_mat%cp_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irn, a%idiag) + call a%sync_space() + call a%set_host() + call a%sync() end select end subroutine psb_d_oacc_ell_cp_from_fmt diff --git a/openacc/impl/psb_d_oacc_ell_mv_from_coo.F90 b/openacc/impl/psb_d_oacc_ell_mv_from_coo.F90 index 688d182b..9214ba3f 100644 --- a/openacc/impl/psb_d_oacc_ell_mv_from_coo.F90 +++ b/openacc/impl/psb_d_oacc_ell_mv_from_coo.F90 @@ -9,11 +9,13 @@ contains integer(psb_ipk_), intent(out) :: info info = psb_success_ - + + call a%free_space() call a%psb_d_ell_sparse_mat%mv_from_coo(b, info) if (info /= 0) goto 9999 - - !$acc update device(a%val, a%ja, a%irn, a%idiag) + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_d_oacc_ell_mv_from_fmt.F90 b/openacc/impl/psb_d_oacc_ell_mv_from_fmt.F90 index 8bda6e6e..292165fc 100644 --- a/openacc/impl/psb_d_oacc_ell_mv_from_fmt.F90 +++ b/openacc/impl/psb_d_oacc_ell_mv_from_fmt.F90 @@ -14,10 +14,12 @@ contains type is (psb_d_coo_sparse_mat) call a%mv_from_coo(b, info) class default + call a%free_space() call a%psb_d_ell_sparse_mat%mv_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irn, a%idiag) + call a%sync_space() + call a%set_host() + call a%sync() end select end subroutine psb_d_oacc_ell_mv_from_fmt diff --git a/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 b/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 index 18bd768b..e39a29b7 100644 --- a/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 +++ b/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 @@ -2,78 +2,20 @@ submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_cp_from_coo_impl use psb_base_mod contains module subroutine psb_d_oacc_hll_cp_from_coo(a, b, info) - implicit none + implicit none class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a - class(psb_d_coo_sparse_mat), intent(in) :: b + class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: i, j, k, row, col, nz_per_row - real(psb_dpk_) :: value - integer(psb_ipk_), allocatable :: row_counts(:) - integer(psb_ipk_) :: hacksize, nza - info = psb_success_ - hacksize = 32 ! Assuming a default hack size of 32 - - call a%set_nrows(b%get_nrows()) - call a%set_ncols(b%get_ncols()) - nz_per_row = a%nzt - - if (.not.allocated(a%val)) then - allocate(a%val(nz_per_row * a%get_nrows())) - allocate(a%ja(nz_per_row * a%get_nrows())) - allocate(a%irn(a%get_nrows())) - allocate(a%idiag(a%get_nrows())) - allocate(a%hkoffs((a%get_nrows() + hacksize - 1) / hacksize)) - end if - a%val = dzero - a%ja = -1 - a%irn = 0 - a%idiag = 0 - - allocate(row_counts(a%get_nrows())) - row_counts = 0 - - nza = b%get_nzeros() - - !$acc parallel loop present(b, a, row_counts) - do k = 1, nza - row = b%ia(k) - col = b%ja(k) - value = b%val(k) - if (row_counts(row) < nz_per_row) then - a%val(row_counts(row) + 1 + (row - 1) * nz_per_row) = value - a%ja(row_counts(row) + 1 + (row - 1) * nz_per_row) = col - row_counts(row) = row_counts(row) + 1 - else - info = psb_err_invalid_mat_state_ - !goto 9999 - end if - end do - - a%irn = row_counts - - !$acc parallel loop present(a) - do i = 1, a%get_nrows() - do j = 1, nz_per_row - if (a%ja(j + (i - 1) * nz_per_row) == i) then - a%idiag(i) = j - exit - end if - end do - end do - - ! Calculate hkoffs for HLL format - !$acc parallel loop present(a) - do i = 1, size(a%hkoffs) - a%hkoffs(i) = (i - 1) * hacksize - end do - - deallocate(row_counts) - call a%set_dev() + call a%free_space() + call a%psb_d_hll_sparse_mat%cp_from_coo(b, info) if (info /= 0) goto 9999 + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_d_oacc_hll_cp_from_fmt.F90 b/openacc/impl/psb_d_oacc_hll_cp_from_fmt.F90 index fb99737c..a838e31e 100644 --- a/openacc/impl/psb_d_oacc_hll_cp_from_fmt.F90 +++ b/openacc/impl/psb_d_oacc_hll_cp_from_fmt.F90 @@ -14,10 +14,12 @@ contains type is (psb_d_coo_sparse_mat) call a%cp_from_coo(b, info) class default + call a%free_space() call a%psb_d_hll_sparse_mat%cp_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + call a%sync_space() + call a%set_host() + call a%sync() end select end subroutine psb_d_oacc_hll_cp_from_fmt diff --git a/openacc/impl/psb_d_oacc_hll_mv_from_coo.F90 b/openacc/impl/psb_d_oacc_hll_mv_from_coo.F90 index 7bf22c13..29494a39 100644 --- a/openacc/impl/psb_d_oacc_hll_mv_from_coo.F90 +++ b/openacc/impl/psb_d_oacc_hll_mv_from_coo.F90 @@ -10,10 +10,12 @@ contains info = psb_success_ + call a%free_space() call a%psb_d_hll_sparse_mat%mv_from_coo(b, info) if (info /= 0) goto 9999 - - !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_d_oacc_hll_mv_from_fmt.F90 b/openacc/impl/psb_d_oacc_hll_mv_from_fmt.F90 index e6615365..e03e2f30 100644 --- a/openacc/impl/psb_d_oacc_hll_mv_from_fmt.F90 +++ b/openacc/impl/psb_d_oacc_hll_mv_from_fmt.F90 @@ -14,11 +14,12 @@ contains type is (psb_d_coo_sparse_mat) call a%mv_from_coo(b, info) class default + call a%free_space() call a%psb_d_hll_sparse_mat%mv_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + call a%sync_space() + call a%set_host() + call a%sync() end select - end subroutine psb_d_oacc_hll_mv_from_fmt end submodule psb_d_oacc_hll_mv_from_fmt_impl diff --git a/openacc/impl/psb_d_oacc_hll_vect_mv.F90 b/openacc/impl/psb_d_oacc_hll_vect_mv.F90 index 875b646f..e7c47b7a 100644 --- a/openacc/impl/psb_d_oacc_hll_vect_mv.F90 +++ b/openacc/impl/psb_d_oacc_hll_vect_mv.F90 @@ -40,28 +40,28 @@ contains real(psb_dpk_) :: val(:), x(:), y(:) integer(psb_ipk_) :: ja(:), hkoffs(:) integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: i, j, idx, k + integer(psb_ipk_) :: i, j, idx, k, ipnt,ir,nr,nlc,isz,ii real(psb_dpk_) :: tmp info = 0 - - !$acc parallel loop present(val, ja, hkoffs, x, y) + !$acc parallel loop private(nlc, isz,ir,nr) do i = 1, nhacks - do k = 0, hksz - 1 - idx = hkoffs(i) + k - if (idx <= hkoffs(i + 1) - 1) then - tmp = 0.0_psb_dpk_ - !$acc loop seq - do j = hkoffs(i) + k, hkoffs(i + 1) - 1, hksz - if (ja(j) > 0) then - tmp = tmp + val(j) * x(ja(j)) - end if - end do - y(k + 1 + (i - 1) * hksz) = alpha * tmp + beta * y(k + 1 + (i - 1) * hksz) - end if + isz = hkoffs(i + 1) - hkoffs(i) + nlc = isz/hksz + ir = (i-1)*hksz + nr = min(hksz,m-ir) + !$acc loop independent private(tmp,ii,ipnt) + do ii = 1, nr + ipnt = hkoffs(i) + ii + tmp = dzero + !$acc loop seq + do j = 1, nlc + tmp = tmp + val(ipnt) * x(ja(ipnt)) + ipnt = ipnt + hksz + end do + y(ii+ir) = alpha * tmp + beta * y(ii+ir) end do end do end subroutine inner_spmv - end subroutine psb_d_oacc_hll_vect_mv end submodule psb_d_oacc_hll_vect_mv_impl diff --git a/openacc/impl/psb_s_oacc_csr_cp_from_coo.F90 b/openacc/impl/psb_s_oacc_csr_cp_from_coo.F90 index 94ef67b3..3c2fb1cb 100644 --- a/openacc/impl/psb_s_oacc_csr_cp_from_coo.F90 +++ b/openacc/impl/psb_s_oacc_csr_cp_from_coo.F90 @@ -10,11 +10,12 @@ contains info = psb_success_ + call a%free_space() call a%psb_s_csr_sparse_mat%cp_from_coo(b, info) if (info /= 0) goto 9999 - - call a%set_dev() - if (info /= 0) goto 9999 + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_s_oacc_csr_cp_from_fmt.F90 b/openacc/impl/psb_s_oacc_csr_cp_from_fmt.F90 index 2c64b5fe..e47959f8 100644 --- a/openacc/impl/psb_s_oacc_csr_cp_from_fmt.F90 +++ b/openacc/impl/psb_s_oacc_csr_cp_from_fmt.F90 @@ -14,10 +14,12 @@ contains type is (psb_s_coo_sparse_mat) call a%cp_from_coo(b, info) class default + call a%free_space() call a%psb_s_csr_sparse_mat%cp_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irp) + call a%sync_space() + call a%set_host() + call a%sync() end select end subroutine psb_s_oacc_csr_cp_from_fmt diff --git a/openacc/impl/psb_s_oacc_csr_mv_from_coo.F90 b/openacc/impl/psb_s_oacc_csr_mv_from_coo.F90 index e531d309..fdbf3a0c 100644 --- a/openacc/impl/psb_s_oacc_csr_mv_from_coo.F90 +++ b/openacc/impl/psb_s_oacc_csr_mv_from_coo.F90 @@ -10,10 +10,12 @@ contains info = psb_success_ + call a%free_space() call a%psb_s_csr_sparse_mat%mv_from_coo(b, info) if (info /= 0) goto 9999 - - !$acc update device(a%val, a%ja, a%irp) + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_s_oacc_csr_mv_from_fmt.F90 b/openacc/impl/psb_s_oacc_csr_mv_from_fmt.F90 index a9dc0c70..a7a581b8 100644 --- a/openacc/impl/psb_s_oacc_csr_mv_from_fmt.F90 +++ b/openacc/impl/psb_s_oacc_csr_mv_from_fmt.F90 @@ -14,10 +14,12 @@ contains type is (psb_s_coo_sparse_mat) call a%mv_from_coo(b, info) class default + call a%free_space() call a%psb_s_csr_sparse_mat%mv_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irp) + call a%sync_space() + call a%set_host() + call a%sync() end select end subroutine psb_s_oacc_csr_mv_from_fmt diff --git a/openacc/impl/psb_s_oacc_csr_vect_mv.F90 b/openacc/impl/psb_s_oacc_csr_vect_mv.F90 index c2bbd6b1..13ee1651 100644 --- a/openacc/impl/psb_s_oacc_csr_vect_mv.F90 +++ b/openacc/impl/psb_s_oacc_csr_vect_mv.F90 @@ -16,8 +16,8 @@ contains m = a%get_nrows() n = a%get_ncols() - if ((n /= size(x%v)) .or. (n /= size(y%v))) then - write(0,*) 'Size error ', m, n, size(x%v), size(y%v) + if ((n /= size(x%v)) .or. (m /= size(y%v))) then + write(0,*) 'ocsrmv Size error ', m, n, size(x%v), size(y%v) info = psb_err_invalid_mat_state_ return end if diff --git a/openacc/impl/psb_s_oacc_ell_cp_from_coo.F90 b/openacc/impl/psb_s_oacc_ell_cp_from_coo.F90 index 9aaaff73..0f6fbc48 100644 --- a/openacc/impl/psb_s_oacc_ell_cp_from_coo.F90 +++ b/openacc/impl/psb_s_oacc_ell_cp_from_coo.F90 @@ -8,65 +8,14 @@ contains class(psb_s_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: i, j, k, row, col, nz_per_row - real(psb_spk_) :: value - integer(psb_ipk_), allocatable :: row_counts(:) - integer(psb_ipk_) :: hacksize, nza info = psb_success_ - hacksize = 1 - - call a%set_nrows(b%get_nrows()) - call a%set_ncols(b%get_ncols()) - nz_per_row = a%nzt - - if (.not.allocated(a%val)) then - allocate(a%val(a%get_nrows(), nz_per_row)) - allocate(a%ja(a%get_nrows(), nz_per_row)) - allocate(a%irn(a%get_nrows())) - allocate(a%idiag(a%get_nrows())) - end if - a%val = szero - a%ja = -1 - a%irn = 0 - a%idiag = 0 - - allocate(row_counts(a%get_nrows())) - row_counts = 0 - - nza = b%get_nzeros() - - !$acc parallel loop present(b, a, row_counts) - do k = 1, nza - row = b%ia(k) - col = b%ja(k) - value = b%val(k) - if (row_counts(row) < nz_per_row) then - a%val(row, row_counts(row) + 1) = value - a%ja(row, row_counts(row) + 1) = col - row_counts(row) = row_counts(row) + 1 - else - info = psb_err_invalid_mat_state_ - !goto 9999 - end if - end do - - a%irn = row_counts - - !$acc parallel loop present(a) - do i = 1, a%get_nrows() - do j = 1, nz_per_row - if (a%ja(i, j) == i) then - a%idiag(i) = j - exit - end if - end do - end do - - deallocate(row_counts) - - call a%set_dev() + call a%free_space() + call a%psb_s_ell_sparse_mat%cp_from_coo(b, info) if (info /= 0) goto 9999 + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_s_oacc_ell_cp_from_fmt.F90 b/openacc/impl/psb_s_oacc_ell_cp_from_fmt.F90 index d4c1a233..793c2779 100644 --- a/openacc/impl/psb_s_oacc_ell_cp_from_fmt.F90 +++ b/openacc/impl/psb_s_oacc_ell_cp_from_fmt.F90 @@ -14,10 +14,12 @@ contains type is (psb_s_coo_sparse_mat) call a%cp_from_coo(b, info) class default + call a%free_space() call a%psb_s_ell_sparse_mat%cp_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irn, a%idiag) + call a%sync_space() + call a%set_host() + call a%sync() end select end subroutine psb_s_oacc_ell_cp_from_fmt diff --git a/openacc/impl/psb_s_oacc_ell_mv_from_coo.F90 b/openacc/impl/psb_s_oacc_ell_mv_from_coo.F90 index d6bbec13..ba82049f 100644 --- a/openacc/impl/psb_s_oacc_ell_mv_from_coo.F90 +++ b/openacc/impl/psb_s_oacc_ell_mv_from_coo.F90 @@ -9,11 +9,13 @@ contains integer(psb_ipk_), intent(out) :: info info = psb_success_ - + + call a%free_space() call a%psb_s_ell_sparse_mat%mv_from_coo(b, info) if (info /= 0) goto 9999 - - !$acc update device(a%val, a%ja, a%irn, a%idiag) + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_s_oacc_ell_mv_from_fmt.F90 b/openacc/impl/psb_s_oacc_ell_mv_from_fmt.F90 index ebb82901..df789664 100644 --- a/openacc/impl/psb_s_oacc_ell_mv_from_fmt.F90 +++ b/openacc/impl/psb_s_oacc_ell_mv_from_fmt.F90 @@ -14,10 +14,12 @@ contains type is (psb_s_coo_sparse_mat) call a%mv_from_coo(b, info) class default + call a%free_space() call a%psb_s_ell_sparse_mat%mv_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irn, a%idiag) + call a%sync_space() + call a%set_host() + call a%sync() end select end subroutine psb_s_oacc_ell_mv_from_fmt diff --git a/openacc/impl/psb_s_oacc_hll_cp_from_coo.F90 b/openacc/impl/psb_s_oacc_hll_cp_from_coo.F90 index 34a0b5d5..dfba3c6c 100644 --- a/openacc/impl/psb_s_oacc_hll_cp_from_coo.F90 +++ b/openacc/impl/psb_s_oacc_hll_cp_from_coo.F90 @@ -2,78 +2,20 @@ submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_cp_from_coo_impl use psb_base_mod contains module subroutine psb_s_oacc_hll_cp_from_coo(a, b, info) - implicit none + implicit none class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a - class(psb_s_coo_sparse_mat), intent(in) :: b + class(psb_s_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: i, j, k, row, col, nz_per_row - real(psb_spk_) :: value - integer(psb_ipk_), allocatable :: row_counts(:) - integer(psb_ipk_) :: hacksize, nza - info = psb_success_ - hacksize = 32 ! Assuming a default hack size of 32 - - call a%set_nrows(b%get_nrows()) - call a%set_ncols(b%get_ncols()) - nz_per_row = a%nzt - - if (.not.allocated(a%val)) then - allocate(a%val(nz_per_row * a%get_nrows())) - allocate(a%ja(nz_per_row * a%get_nrows())) - allocate(a%irn(a%get_nrows())) - allocate(a%idiag(a%get_nrows())) - allocate(a%hkoffs((a%get_nrows() + hacksize - 1) / hacksize)) - end if - a%val = szero - a%ja = -1 - a%irn = 0 - a%idiag = 0 - - allocate(row_counts(a%get_nrows())) - row_counts = 0 - - nza = b%get_nzeros() - - !$acc parallel loop present(b, a, row_counts) - do k = 1, nza - row = b%ia(k) - col = b%ja(k) - value = b%val(k) - if (row_counts(row) < nz_per_row) then - a%val(row_counts(row) + 1 + (row - 1) * nz_per_row) = value - a%ja(row_counts(row) + 1 + (row - 1) * nz_per_row) = col - row_counts(row) = row_counts(row) + 1 - else - info = psb_err_invalid_mat_state_ - !goto 9999 - end if - end do - - a%irn = row_counts - - !$acc parallel loop present(a) - do i = 1, a%get_nrows() - do j = 1, nz_per_row - if (a%ja(j + (i - 1) * nz_per_row) == i) then - a%idiag(i) = j - exit - end if - end do - end do - - ! Calculate hkoffs for HLL format - !$acc parallel loop present(a) - do i = 1, size(a%hkoffs) - a%hkoffs(i) = (i - 1) * hacksize - end do - - deallocate(row_counts) - call a%set_dev() + call a%free_space() + call a%psb_s_hll_sparse_mat%cp_from_coo(b, info) if (info /= 0) goto 9999 + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_s_oacc_hll_cp_from_fmt.F90 b/openacc/impl/psb_s_oacc_hll_cp_from_fmt.F90 index 4d023f8b..849e03b7 100644 --- a/openacc/impl/psb_s_oacc_hll_cp_from_fmt.F90 +++ b/openacc/impl/psb_s_oacc_hll_cp_from_fmt.F90 @@ -14,10 +14,12 @@ contains type is (psb_s_coo_sparse_mat) call a%cp_from_coo(b, info) class default + call a%free_space() call a%psb_s_hll_sparse_mat%cp_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + call a%sync_space() + call a%set_host() + call a%sync() end select end subroutine psb_s_oacc_hll_cp_from_fmt diff --git a/openacc/impl/psb_s_oacc_hll_mv_from_coo.F90 b/openacc/impl/psb_s_oacc_hll_mv_from_coo.F90 index 08b553b7..c22818fa 100644 --- a/openacc/impl/psb_s_oacc_hll_mv_from_coo.F90 +++ b/openacc/impl/psb_s_oacc_hll_mv_from_coo.F90 @@ -10,10 +10,12 @@ contains info = psb_success_ + call a%free_space() call a%psb_s_hll_sparse_mat%mv_from_coo(b, info) if (info /= 0) goto 9999 - - !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_s_oacc_hll_mv_from_fmt.F90 b/openacc/impl/psb_s_oacc_hll_mv_from_fmt.F90 index d5867289..992b1c7b 100644 --- a/openacc/impl/psb_s_oacc_hll_mv_from_fmt.F90 +++ b/openacc/impl/psb_s_oacc_hll_mv_from_fmt.F90 @@ -14,11 +14,12 @@ contains type is (psb_s_coo_sparse_mat) call a%mv_from_coo(b, info) class default + call a%free_space() call a%psb_s_hll_sparse_mat%mv_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + call a%sync_space() + call a%set_host() + call a%sync() end select - end subroutine psb_s_oacc_hll_mv_from_fmt end submodule psb_s_oacc_hll_mv_from_fmt_impl diff --git a/openacc/impl/psb_s_oacc_hll_vect_mv.F90 b/openacc/impl/psb_s_oacc_hll_vect_mv.F90 index 9d9e9197..efe9a9ca 100644 --- a/openacc/impl/psb_s_oacc_hll_vect_mv.F90 +++ b/openacc/impl/psb_s_oacc_hll_vect_mv.F90 @@ -40,28 +40,28 @@ contains real(psb_spk_) :: val(:), x(:), y(:) integer(psb_ipk_) :: ja(:), hkoffs(:) integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: i, j, idx, k + integer(psb_ipk_) :: i, j, idx, k, ipnt,ir,nr,nlc,isz,ii real(psb_spk_) :: tmp info = 0 - - !$acc parallel loop present(val, ja, hkoffs, x, y) + !$acc parallel loop private(nlc, isz,ir,nr) do i = 1, nhacks - do k = 0, hksz - 1 - idx = hkoffs(i) + k - if (idx <= hkoffs(i + 1) - 1) then - tmp = 0.0_psb_dpk_ - !$acc loop seq - do j = hkoffs(i) + k, hkoffs(i + 1) - 1, hksz - if (ja(j) > 0) then - tmp = tmp + val(j) * x(ja(j)) - end if - end do - y(k + 1 + (i - 1) * hksz) = alpha * tmp + beta * y(k + 1 + (i - 1) * hksz) - end if + isz = hkoffs(i + 1) - hkoffs(i) + nlc = isz/hksz + ir = (i-1)*hksz + nr = min(hksz,m-ir) + !$acc loop independent private(tmp,ii,ipnt) + do ii = 1, nr + ipnt = hkoffs(i) + ii + tmp = szero + !$acc loop seq + do j = 1, nlc + tmp = tmp + val(ipnt) * x(ja(ipnt)) + ipnt = ipnt + hksz + end do + y(ii+ir) = alpha * tmp + beta * y(ii+ir) end do end do end subroutine inner_spmv - end subroutine psb_s_oacc_hll_vect_mv end submodule psb_s_oacc_hll_vect_mv_impl diff --git a/openacc/impl/psb_z_oacc_csr_cp_from_coo.F90 b/openacc/impl/psb_z_oacc_csr_cp_from_coo.F90 index 0485c9ca..6c40d0d2 100644 --- a/openacc/impl/psb_z_oacc_csr_cp_from_coo.F90 +++ b/openacc/impl/psb_z_oacc_csr_cp_from_coo.F90 @@ -10,11 +10,12 @@ contains info = psb_success_ + call a%free_space() call a%psb_z_csr_sparse_mat%cp_from_coo(b, info) if (info /= 0) goto 9999 - - call a%set_dev() - if (info /= 0) goto 9999 + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_z_oacc_csr_cp_from_fmt.F90 b/openacc/impl/psb_z_oacc_csr_cp_from_fmt.F90 index f2c68816..3025fde2 100644 --- a/openacc/impl/psb_z_oacc_csr_cp_from_fmt.F90 +++ b/openacc/impl/psb_z_oacc_csr_cp_from_fmt.F90 @@ -14,10 +14,12 @@ contains type is (psb_z_coo_sparse_mat) call a%cp_from_coo(b, info) class default + call a%free_space() call a%psb_z_csr_sparse_mat%cp_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irp) + call a%sync_space() + call a%set_host() + call a%sync() end select end subroutine psb_z_oacc_csr_cp_from_fmt diff --git a/openacc/impl/psb_z_oacc_csr_mv_from_coo.F90 b/openacc/impl/psb_z_oacc_csr_mv_from_coo.F90 index 44b01b68..6dae625a 100644 --- a/openacc/impl/psb_z_oacc_csr_mv_from_coo.F90 +++ b/openacc/impl/psb_z_oacc_csr_mv_from_coo.F90 @@ -10,10 +10,12 @@ contains info = psb_success_ + call a%free_space() call a%psb_z_csr_sparse_mat%mv_from_coo(b, info) if (info /= 0) goto 9999 - - !$acc update device(a%val, a%ja, a%irp) + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_z_oacc_csr_mv_from_fmt.F90 b/openacc/impl/psb_z_oacc_csr_mv_from_fmt.F90 index bf777e85..1d7dd723 100644 --- a/openacc/impl/psb_z_oacc_csr_mv_from_fmt.F90 +++ b/openacc/impl/psb_z_oacc_csr_mv_from_fmt.F90 @@ -14,10 +14,12 @@ contains type is (psb_z_coo_sparse_mat) call a%mv_from_coo(b, info) class default + call a%free_space() call a%psb_z_csr_sparse_mat%mv_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irp) + call a%sync_space() + call a%set_host() + call a%sync() end select end subroutine psb_z_oacc_csr_mv_from_fmt diff --git a/openacc/impl/psb_z_oacc_csr_vect_mv.F90 b/openacc/impl/psb_z_oacc_csr_vect_mv.F90 index b8da5c8f..cb34dce1 100644 --- a/openacc/impl/psb_z_oacc_csr_vect_mv.F90 +++ b/openacc/impl/psb_z_oacc_csr_vect_mv.F90 @@ -16,8 +16,8 @@ contains m = a%get_nrows() n = a%get_ncols() - if ((n /= size(x%v)) .or. (n /= size(y%v))) then - write(0,*) 'Size error ', m, n, size(x%v), size(y%v) + if ((n /= size(x%v)) .or. (m /= size(y%v))) then + write(0,*) 'ocsrmv Size error ', m, n, size(x%v), size(y%v) info = psb_err_invalid_mat_state_ return end if diff --git a/openacc/impl/psb_z_oacc_ell_cp_from_coo.F90 b/openacc/impl/psb_z_oacc_ell_cp_from_coo.F90 index e4d3b731..6dd60bd7 100644 --- a/openacc/impl/psb_z_oacc_ell_cp_from_coo.F90 +++ b/openacc/impl/psb_z_oacc_ell_cp_from_coo.F90 @@ -8,65 +8,14 @@ contains class(psb_z_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: i, j, k, row, col, nz_per_row - complex(psb_dpk_) :: value - integer(psb_ipk_), allocatable :: row_counts(:) - integer(psb_ipk_) :: hacksize, nza info = psb_success_ - hacksize = 1 - - call a%set_nrows(b%get_nrows()) - call a%set_ncols(b%get_ncols()) - nz_per_row = a%nzt - - if (.not.allocated(a%val)) then - allocate(a%val(a%get_nrows(), nz_per_row)) - allocate(a%ja(a%get_nrows(), nz_per_row)) - allocate(a%irn(a%get_nrows())) - allocate(a%idiag(a%get_nrows())) - end if - a%val = zzero - a%ja = -1 - a%irn = 0 - a%idiag = 0 - - allocate(row_counts(a%get_nrows())) - row_counts = 0 - - nza = b%get_nzeros() - - !$acc parallel loop present(b, a, row_counts) - do k = 1, nza - row = b%ia(k) - col = b%ja(k) - value = b%val(k) - if (row_counts(row) < nz_per_row) then - a%val(row, row_counts(row) + 1) = value - a%ja(row, row_counts(row) + 1) = col - row_counts(row) = row_counts(row) + 1 - else - info = psb_err_invalid_mat_state_ - !goto 9999 - end if - end do - - a%irn = row_counts - - !$acc parallel loop present(a) - do i = 1, a%get_nrows() - do j = 1, nz_per_row - if (a%ja(i, j) == i) then - a%idiag(i) = j - exit - end if - end do - end do - - deallocate(row_counts) - - call a%set_dev() + call a%free_space() + call a%psb_z_ell_sparse_mat%cp_from_coo(b, info) if (info /= 0) goto 9999 + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_z_oacc_ell_cp_from_fmt.F90 b/openacc/impl/psb_z_oacc_ell_cp_from_fmt.F90 index 98404ae2..60d94bb2 100644 --- a/openacc/impl/psb_z_oacc_ell_cp_from_fmt.F90 +++ b/openacc/impl/psb_z_oacc_ell_cp_from_fmt.F90 @@ -14,10 +14,12 @@ contains type is (psb_z_coo_sparse_mat) call a%cp_from_coo(b, info) class default + call a%free_space() call a%psb_z_ell_sparse_mat%cp_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irn, a%idiag) + call a%sync_space() + call a%set_host() + call a%sync() end select end subroutine psb_z_oacc_ell_cp_from_fmt diff --git a/openacc/impl/psb_z_oacc_ell_mv_from_coo.F90 b/openacc/impl/psb_z_oacc_ell_mv_from_coo.F90 index 26388e5e..db70b944 100644 --- a/openacc/impl/psb_z_oacc_ell_mv_from_coo.F90 +++ b/openacc/impl/psb_z_oacc_ell_mv_from_coo.F90 @@ -9,11 +9,13 @@ contains integer(psb_ipk_), intent(out) :: info info = psb_success_ - + + call a%free_space() call a%psb_z_ell_sparse_mat%mv_from_coo(b, info) if (info /= 0) goto 9999 - - !$acc update device(a%val, a%ja, a%irn, a%idiag) + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_z_oacc_ell_mv_from_fmt.F90 b/openacc/impl/psb_z_oacc_ell_mv_from_fmt.F90 index e0f75828..f99b3817 100644 --- a/openacc/impl/psb_z_oacc_ell_mv_from_fmt.F90 +++ b/openacc/impl/psb_z_oacc_ell_mv_from_fmt.F90 @@ -14,10 +14,12 @@ contains type is (psb_z_coo_sparse_mat) call a%mv_from_coo(b, info) class default + call a%free_space() call a%psb_z_ell_sparse_mat%mv_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irn, a%idiag) + call a%sync_space() + call a%set_host() + call a%sync() end select end subroutine psb_z_oacc_ell_mv_from_fmt diff --git a/openacc/impl/psb_z_oacc_hll_cp_from_coo.F90 b/openacc/impl/psb_z_oacc_hll_cp_from_coo.F90 index 62be2252..e018e762 100644 --- a/openacc/impl/psb_z_oacc_hll_cp_from_coo.F90 +++ b/openacc/impl/psb_z_oacc_hll_cp_from_coo.F90 @@ -2,78 +2,20 @@ submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_cp_from_coo_impl use psb_base_mod contains module subroutine psb_z_oacc_hll_cp_from_coo(a, b, info) - implicit none + implicit none class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a - class(psb_z_coo_sparse_mat), intent(in) :: b + class(psb_z_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: i, j, k, row, col, nz_per_row - complex(psb_dpk_) :: value - integer(psb_ipk_), allocatable :: row_counts(:) - integer(psb_ipk_) :: hacksize, nza - info = psb_success_ - hacksize = 32 ! Assuming a default hack size of 32 - - call a%set_nrows(b%get_nrows()) - call a%set_ncols(b%get_ncols()) - nz_per_row = a%nzt - - if (.not.allocated(a%val)) then - allocate(a%val(nz_per_row * a%get_nrows())) - allocate(a%ja(nz_per_row * a%get_nrows())) - allocate(a%irn(a%get_nrows())) - allocate(a%idiag(a%get_nrows())) - allocate(a%hkoffs((a%get_nrows() + hacksize - 1) / hacksize)) - end if - a%val = zzero - a%ja = -1 - a%irn = 0 - a%idiag = 0 - - allocate(row_counts(a%get_nrows())) - row_counts = 0 - - nza = b%get_nzeros() - - !$acc parallel loop present(b, a, row_counts) - do k = 1, nza - row = b%ia(k) - col = b%ja(k) - value = b%val(k) - if (row_counts(row) < nz_per_row) then - a%val(row_counts(row) + 1 + (row - 1) * nz_per_row) = value - a%ja(row_counts(row) + 1 + (row - 1) * nz_per_row) = col - row_counts(row) = row_counts(row) + 1 - else - info = psb_err_invalid_mat_state_ - !goto 9999 - end if - end do - - a%irn = row_counts - - !$acc parallel loop present(a) - do i = 1, a%get_nrows() - do j = 1, nz_per_row - if (a%ja(j + (i - 1) * nz_per_row) == i) then - a%idiag(i) = j - exit - end if - end do - end do - - ! Calculate hkoffs for HLL format - !$acc parallel loop present(a) - do i = 1, size(a%hkoffs) - a%hkoffs(i) = (i - 1) * hacksize - end do - - deallocate(row_counts) - call a%set_dev() + call a%free_space() + call a%psb_z_hll_sparse_mat%cp_from_coo(b, info) if (info /= 0) goto 9999 + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_z_oacc_hll_cp_from_fmt.F90 b/openacc/impl/psb_z_oacc_hll_cp_from_fmt.F90 index f267e1c6..7b18b255 100644 --- a/openacc/impl/psb_z_oacc_hll_cp_from_fmt.F90 +++ b/openacc/impl/psb_z_oacc_hll_cp_from_fmt.F90 @@ -14,10 +14,12 @@ contains type is (psb_z_coo_sparse_mat) call a%cp_from_coo(b, info) class default + call a%free_space() call a%psb_z_hll_sparse_mat%cp_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + call a%sync_space() + call a%set_host() + call a%sync() end select end subroutine psb_z_oacc_hll_cp_from_fmt diff --git a/openacc/impl/psb_z_oacc_hll_mv_from_coo.F90 b/openacc/impl/psb_z_oacc_hll_mv_from_coo.F90 index 2ff574d3..267f13f3 100644 --- a/openacc/impl/psb_z_oacc_hll_mv_from_coo.F90 +++ b/openacc/impl/psb_z_oacc_hll_mv_from_coo.F90 @@ -10,10 +10,12 @@ contains info = psb_success_ + call a%free_space() call a%psb_z_hll_sparse_mat%mv_from_coo(b, info) if (info /= 0) goto 9999 - - !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + call a%sync_space() + call a%set_host() + call a%sync() return diff --git a/openacc/impl/psb_z_oacc_hll_mv_from_fmt.F90 b/openacc/impl/psb_z_oacc_hll_mv_from_fmt.F90 index 5fa00e38..151dc6ce 100644 --- a/openacc/impl/psb_z_oacc_hll_mv_from_fmt.F90 +++ b/openacc/impl/psb_z_oacc_hll_mv_from_fmt.F90 @@ -14,11 +14,12 @@ contains type is (psb_z_coo_sparse_mat) call a%mv_from_coo(b, info) class default + call a%free_space() call a%psb_z_hll_sparse_mat%mv_from_fmt(b, info) if (info /= 0) return - - !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + call a%sync_space() + call a%set_host() + call a%sync() end select - end subroutine psb_z_oacc_hll_mv_from_fmt end submodule psb_z_oacc_hll_mv_from_fmt_impl diff --git a/openacc/impl/psb_z_oacc_hll_vect_mv.F90 b/openacc/impl/psb_z_oacc_hll_vect_mv.F90 index 89d970c0..dbadf034 100644 --- a/openacc/impl/psb_z_oacc_hll_vect_mv.F90 +++ b/openacc/impl/psb_z_oacc_hll_vect_mv.F90 @@ -40,28 +40,28 @@ contains complex(psb_dpk_) :: val(:), x(:), y(:) integer(psb_ipk_) :: ja(:), hkoffs(:) integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: i, j, idx, k + integer(psb_ipk_) :: i, j, idx, k, ipnt,ir,nr,nlc,isz,ii complex(psb_dpk_) :: tmp info = 0 - - !$acc parallel loop present(val, ja, hkoffs, x, y) + !$acc parallel loop private(nlc, isz,ir,nr) do i = 1, nhacks - do k = 0, hksz - 1 - idx = hkoffs(i) + k - if (idx <= hkoffs(i + 1) - 1) then - tmp = 0.0_psb_dpk_ - !$acc loop seq - do j = hkoffs(i) + k, hkoffs(i + 1) - 1, hksz - if (ja(j) > 0) then - tmp = tmp + val(j) * x(ja(j)) - end if - end do - y(k + 1 + (i - 1) * hksz) = alpha * tmp + beta * y(k + 1 + (i - 1) * hksz) - end if + isz = hkoffs(i + 1) - hkoffs(i) + nlc = isz/hksz + ir = (i-1)*hksz + nr = min(hksz,m-ir) + !$acc loop independent private(tmp,ii,ipnt) + do ii = 1, nr + ipnt = hkoffs(i) + ii + tmp = zzero + !$acc loop seq + do j = 1, nlc + tmp = tmp + val(ipnt) * x(ja(ipnt)) + ipnt = ipnt + hksz + end do + y(ii+ir) = alpha * tmp + beta * y(ii+ir) end do end do end subroutine inner_spmv - end subroutine psb_z_oacc_hll_vect_mv end submodule psb_z_oacc_hll_vect_mv_impl diff --git a/openacc/psb_c_oacc_csr_mat_mod.F90 b/openacc/psb_c_oacc_csr_mat_mod.F90 index 00e79570..07734762 100644 --- a/openacc/psb_c_oacc_csr_mat_mod.F90 +++ b/openacc/psb_c_oacc_csr_mat_mod.F90 @@ -1,6 +1,7 @@ module psb_c_oacc_csr_mat_mod use iso_c_binding + use openacc use psb_c_mat_mod use psb_c_oacc_vect_mod !use oaccsparse_mod @@ -35,6 +36,7 @@ module psb_c_oacc_csr_mat_mod procedure, pass(a) :: set_host => c_oacc_csr_set_host procedure, pass(a) :: set_sync => c_oacc_csr_set_sync procedure, pass(a) :: set_dev => c_oacc_csr_set_dev + procedure, pass(a) :: free_space => c_oacc_csr_free_space procedure, pass(a) :: sync_space => c_oacc_csr_sync_space procedure, pass(a) :: sync => c_oacc_csr_sync end type psb_c_oacc_csr_sparse_mat @@ -154,22 +156,26 @@ module psb_c_oacc_csr_mat_mod contains - subroutine c_oacc_csr_free(a) + subroutine c_oacc_csr_free_space(a) use psb_base_mod implicit none class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - if (allocated(a%val)) then - !$acc exit data delete(a%val) - end if - if (allocated(a%ja)) then - !$acc exit data delete(a%ja) - end if - if (allocated(a%irp)) then - !$acc exit data delete(a%irp) - end if + if (allocated(a%val)) call acc_delete_finalize(a%val) + if (allocated(a%ja)) call acc_delete_finalize(a%ja) + if (allocated(a%irp)) call acc_delete_finalize(a%irp) + + return + end subroutine c_oacc_csr_free_space + + subroutine c_oacc_csr_free(a) + use psb_base_mod + implicit none + class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + call a%free_space() call a%psb_c_csr_sparse_mat%free() return @@ -193,7 +199,7 @@ contains function c_oacc_csr_get_fmt() result(res) implicit none character(len=5) :: res - res = 'CSR_oacc' + res = 'CSROA' end function c_oacc_csr_get_fmt subroutine c_oacc_csr_all(m, n, nz, a, info) @@ -202,19 +208,8 @@ contains class(psb_c_oacc_csr_sparse_mat), intent(out) :: a integer(psb_ipk_), intent(out) :: info - info = 0 - if (allocated(a%val)) then - !$acc exit data delete(a%val) finalize - deallocate(a%val, stat=info) - end if - if (allocated(a%ja)) then - !$acc exit data delete(a%ja) finalize - deallocate(a%ja, stat=info) - end if - if (allocated(a%irp)) then - !$acc exit data delete(a%irp) finalize - deallocate(a%irp, stat=info) - end if + info = 0 + call a%free() call a%set_nrows(m) call a%set_ncols(n) @@ -274,26 +269,9 @@ contains subroutine c_oacc_csr_sync_space(a) implicit none class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a - if (allocated(a%val)) then - call c_oacc_create_dev(a%val) - end if - if (allocated(a%ja)) then - call i_oacc_create_dev(a%ja) - end if - if (allocated(a%irp)) then - call i_oacc_create_dev(a%irp) - end if - contains - subroutine c_oacc_create_dev(v) - implicit none - complex(psb_spk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine c_oacc_create_dev - subroutine i_oacc_create_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev + if (allocated(a%val)) call acc_create(a%val) + if (allocated(a%ja)) call acc_create(a%ja) + if (allocated(a%irp)) call acc_create(a%irp) end subroutine c_oacc_csr_sync_space subroutine c_oacc_csr_sync(a) @@ -304,40 +282,16 @@ contains tmpa => a if (a%is_dev()) then - call c_oacc_csr_to_host(a%val) - call i_oacc_csr_to_host(a%ja) - call i_oacc_csr_to_host(a%irp) + call acc_update_self(a%val) + call acc_update_self(a%ja) + call acc_update_self(a%irp) else if (a%is_host()) then - call c_oacc_csr_to_dev(a%val) - call i_oacc_csr_to_dev(a%ja) - call i_oacc_csr_to_dev(a%irp) + call acc_update_device(a%val) + call acc_update_device(a%ja) + call acc_update_device(a%irp) end if call tmpa%set_sync() end subroutine c_oacc_csr_sync - subroutine c_oacc_csr_to_dev(v) - implicit none - complex(psb_spk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine c_oacc_csr_to_dev - - subroutine c_oacc_csr_to_host(v) - implicit none - complex(psb_spk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine c_oacc_csr_to_host - - subroutine i_oacc_csr_to_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine i_oacc_csr_to_dev - - subroutine i_oacc_csr_to_host(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine i_oacc_csr_to_host - end module psb_c_oacc_csr_mat_mod diff --git a/openacc/psb_c_oacc_ell_mat_mod.F90 b/openacc/psb_c_oacc_ell_mat_mod.F90 index 5e5dc302..d23d4d0a 100644 --- a/openacc/psb_c_oacc_ell_mat_mod.F90 +++ b/openacc/psb_c_oacc_ell_mat_mod.F90 @@ -1,5 +1,6 @@ module psb_c_oacc_ell_mat_mod use iso_c_binding + use openacc use psb_c_mat_mod use psb_c_ell_mat_mod use psb_c_oacc_vect_mod @@ -21,6 +22,7 @@ module psb_c_oacc_ell_mat_mod procedure, pass(a) :: set_dev => c_oacc_ell_set_dev procedure, pass(a) :: sync_space => c_oacc_ell_sync_space procedure, pass(a) :: sync => c_oacc_ell_sync + procedure, pass(a) :: free_space => c_oacc_ell_free_space procedure, pass(a) :: free => c_oacc_ell_free procedure, pass(a) :: vect_mv => psb_c_oacc_ell_vect_mv procedure, pass(a) :: in_vect_sv => psb_c_oacc_ell_inner_vect_sv @@ -152,31 +154,32 @@ module psb_c_oacc_ell_mat_mod contains - subroutine c_oacc_ell_free(a) + subroutine c_oacc_ell_free_space(a) use psb_base_mod implicit none class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - if (allocated(a%val)) then - !$acc exit data delete(a%val) - end if - if (allocated(a%ja)) then - !$acc exit data delete(a%ja) - end if - if (allocated(a%irn)) then - !$acc exit data delete(a%irn) - end if - if (allocated(a%idiag)) then - !$acc exit data delete(a%idiag) - end if + if (allocated(a%val)) call acc_delete_finalize(a%val) + if (allocated(a%ja)) call acc_delete_finalize(a%ja) + if (allocated(a%irn)) call acc_delete_finalize(a%irn) + if (allocated(a%idiag)) call acc_delete_finalize(a%idiag) + + return + end subroutine c_oacc_ell_free_space + + subroutine c_oacc_ell_free(a) + use psb_base_mod + implicit none + class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + call a%free_space() call a%psb_c_ell_sparse_mat%free() return end subroutine c_oacc_ell_free - function c_oacc_ell_sizeof(a) result(res) implicit none class(psb_c_oacc_ell_sparse_mat), intent(in) :: a @@ -196,41 +199,12 @@ contains implicit none class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a - if (allocated(a%val)) then - call c_oacc_create_dev(a%val) - end if - if (allocated(a%ja)) then - call i_oacc_create_dev(a%ja) - end if - if (allocated(a%irn)) then - call i_oacc_create_dev_scalar(a%irn) - end if - if (allocated(a%idiag)) then - call i_oacc_create_dev_scalar(a%idiag) - end if - - contains - subroutine c_oacc_create_dev(v) - implicit none - complex(psb_spk_), intent(in) :: v(:,:) - !$acc enter data copyin(v) - end subroutine c_oacc_create_dev - - subroutine i_oacc_create_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:,:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev - - subroutine i_oacc_create_dev_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev_scalar + if (allocated(a%val)) call acc_create(a%val) + if (allocated(a%ja)) call acc_create(a%ja) + if (allocated(a%irn)) call acc_create(a%irn) + if (allocated(a%idiag)) call acc_create(a%idiag) end subroutine c_oacc_ell_sync_space - - function c_oacc_ell_is_host(a) result(res) implicit none class(psb_c_oacc_ell_sparse_mat), intent(in) :: a @@ -279,7 +253,7 @@ contains function c_oacc_ell_get_fmt() result(res) implicit none character(len=5) :: res - res = 'ELL_oacc' + res = 'ELLOA' end function c_oacc_ell_get_fmt subroutine c_oacc_ell_sync(a) @@ -290,64 +264,17 @@ contains tmpa => a if (a%is_dev()) then - call c_oacc_ell_to_host(a%val) - call i_oacc_ell_to_host(a%ja) - call i_oacc_ell_to_host_scalar(a%irn) - call i_oacc_ell_to_host_scalar(a%idiag) + call acc_update_self(a%val) + call acc_update_self(a%ja) + call acc_update_self(a%irn) + call acc_update_self(a%idiag) else if (a%is_host()) then - call c_oacc_ell_to_dev(a%val) - call i_oacc_ell_to_dev(a%ja) - call i_oacc_ell_to_dev_scalar(a%irn) - call i_oacc_ell_to_dev_scalar(a%idiag) + call acc_update_device(a%val) + call acc_update_device(a%ja) + call acc_update_device(a%irn) + call acc_update_device(a%idiag) end if call tmpa%set_sync() end subroutine c_oacc_ell_sync - subroutine c_oacc_ell_to_dev_scalar(v) - implicit none - complex(psb_spk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine c_oacc_ell_to_dev_scalar - - subroutine c_oacc_ell_to_dev(v) - implicit none - complex(psb_spk_), intent(in) :: v(:,:) - !$acc update device(v) - end subroutine c_oacc_ell_to_dev - - subroutine c_oacc_ell_to_host(v) - implicit none - complex(psb_spk_), intent(in) :: v(:,:) - !$acc update self(v) - end subroutine c_oacc_ell_to_host - - subroutine c_oacc_ell_to_host_scalar(v) - implicit none - complex(psb_spk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine c_oacc_ell_to_host_scalar - - subroutine i_oacc_ell_to_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:,:) - !$acc update device(v) - end subroutine i_oacc_ell_to_dev - - subroutine i_oacc_ell_to_dev_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine i_oacc_ell_to_dev_scalar - - subroutine i_oacc_ell_to_host(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:,:) - !$acc update self(v) - end subroutine i_oacc_ell_to_host - - subroutine i_oacc_ell_to_host_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine i_oacc_ell_to_host_scalar end module psb_c_oacc_ell_mat_mod diff --git a/openacc/psb_c_oacc_hll_mat_mod.F90 b/openacc/psb_c_oacc_hll_mat_mod.F90 index faad0a1b..2d2095bf 100644 --- a/openacc/psb_c_oacc_hll_mat_mod.F90 +++ b/openacc/psb_c_oacc_hll_mat_mod.F90 @@ -1,5 +1,6 @@ module psb_c_oacc_hll_mat_mod use iso_c_binding + use openacc use psb_c_mat_mod use psb_c_hll_mat_mod use psb_c_oacc_vect_mod @@ -21,6 +22,7 @@ module psb_c_oacc_hll_mat_mod procedure, pass(a) :: set_dev => c_oacc_hll_set_dev procedure, pass(a) :: sync_space => c_oacc_hll_sync_space procedure, pass(a) :: sync => c_oacc_hll_sync + procedure, pass(a) :: free_space => c_oacc_hll_free_space procedure, pass(a) :: free => c_oacc_hll_free procedure, pass(a) :: vect_mv => psb_c_oacc_hll_vect_mv procedure, pass(a) :: in_vect_sv => psb_c_oacc_hll_inner_vect_sv @@ -152,28 +154,28 @@ module psb_c_oacc_hll_mat_mod contains - subroutine c_oacc_hll_free(a) + subroutine c_oacc_hll_free_space(a) use psb_base_mod implicit none class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - if (allocated(a%val)) then - !$acc exit data delete(a%val) - end if - if (allocated(a%ja)) then - !$acc exit data delete(a%ja) - end if - if (allocated(a%irn)) then - !$acc exit data delete(a%irn) - end if - if (allocated(a%idiag)) then - !$acc exit data delete(a%idiag) - end if - if (allocated(a%hkoffs)) then - !$acc exit data delete(a%hkoffs) - end if + if (allocated(a%val)) call acc_delete_finalize(a%val) + if (allocated(a%ja)) call acc_delete_finalize(a%ja) + if (allocated(a%irn)) call acc_delete_finalize(a%irn) + if (allocated(a%idiag)) call acc_delete_finalize(a%idiag) + if (allocated(a%hkoffs)) call acc_delete_finalize(a%hkoffs) + + return + end subroutine c_oacc_hll_free_space + + subroutine c_oacc_hll_free(a) + use psb_base_mod + implicit none + class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + call a%free_space() call a%psb_c_hll_sparse_mat%free() return @@ -244,48 +246,18 @@ contains function c_oacc_hll_get_fmt() result(res) implicit none character(len=5) :: res - res = 'HLL_oacc' + res = 'HLLOA' end function c_oacc_hll_get_fmt subroutine c_oacc_hll_sync_space(a) implicit none class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a - if (allocated(a%val)) then - call c_oacc_create_dev(a%val) - end if - if (allocated(a%ja)) then - call i_oacc_create_dev(a%ja) - end if - if (allocated(a%irn)) then - call i_oacc_create_dev_scalar(a%irn) - end if - if (allocated(a%idiag)) then - call i_oacc_create_dev_scalar(a%idiag) - end if - if (allocated(a%hkoffs)) then - call i_oacc_create_dev_scalar(a%hkoffs) - end if - - contains - subroutine c_oacc_create_dev(v) - implicit none - complex(psb_spk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine c_oacc_create_dev - - subroutine i_oacc_create_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev - - subroutine i_oacc_create_dev_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev_scalar - + if (allocated(a%val)) call acc_create(a%val) + if (allocated(a%ja)) call acc_create(a%ja) + if (allocated(a%irn)) call acc_create(a%irn) + if (allocated(a%idiag)) call acc_create(a%idiag) + if (allocated(a%hkoffs)) call acc_create(a%hkoffs) end subroutine c_oacc_hll_sync_space @@ -297,56 +269,19 @@ contains tmpa => a if (a%is_dev()) then - call c_oacc_hll_to_host(a%val) - call i_oacc_hll_to_host(a%ja) - call i_oacc_hll_to_host_scalar(a%irn) - call i_oacc_hll_to_host_scalar(a%idiag) - call i_oacc_hll_to_host_scalar(a%hkoffs) + call acc_update_self(a%val) + call acc_update_self(a%ja) + call acc_update_self(a%irn) + call acc_update_self(a%idiag) + call acc_update_self(a%hkoffs) else if (a%is_host()) then - call c_oacc_hll_to_dev(a%val) - call i_oacc_hll_to_dev(a%ja) - call i_oacc_hll_to_dev_scalar(a%irn) - call i_oacc_hll_to_dev_scalar(a%idiag) - call i_oacc_hll_to_dev_scalar(a%hkoffs) + call acc_update_device(a%val) + call acc_update_device(a%ja) + call acc_update_device(a%irn) + call acc_update_device(a%idiag) + call acc_update_device(a%hkoffs) end if call tmpa%set_sync() end subroutine c_oacc_hll_sync - subroutine c_oacc_hll_to_host(v) - implicit none - complex(psb_spk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine c_oacc_hll_to_host - - subroutine c_oacc_hll_to_dev(v) - implicit none - complex(psb_spk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine c_oacc_hll_to_dev - - subroutine i_oacc_hll_to_host(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine i_oacc_hll_to_host - - subroutine i_oacc_hll_to_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine i_oacc_hll_to_dev - - subroutine i_oacc_hll_to_host_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine i_oacc_hll_to_host_scalar - - subroutine i_oacc_hll_to_dev_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine i_oacc_hll_to_dev_scalar - - end module psb_c_oacc_hll_mat_mod diff --git a/openacc/psb_c_oacc_vect_mod.F90 b/openacc/psb_c_oacc_vect_mod.F90 index 7362ba0e..79ff0ca3 100644 --- a/openacc/psb_c_oacc_vect_mod.F90 +++ b/openacc/psb_c_oacc_vect_mod.F90 @@ -59,7 +59,7 @@ module psb_c_oacc_vect_mod procedure, pass(x) :: asum => c_oacc_asum procedure, pass(x) :: absval1 => c_oacc_absval1 procedure, pass(x) :: absval2 => c_oacc_absval2 - + final :: c_oacc_final_vect_free end type psb_c_vect_oacc interface @@ -164,21 +164,25 @@ contains implicit none class(psb_c_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(in) :: n - real(psb_spk_) :: res - real(psb_spk_) :: mx + real(psb_spk_) :: res integer(psb_ipk_) :: info if (x%is_host()) call x%sync() - mx = c_oacc_amax(n,x) - res = c_inner_oacc_nrm2(n, mx, x%v) +!!$ write(0,*)'oacc_nrm2' + res = c_inner_oacc_nrm2(n, x%v) contains - function c_inner_oacc_nrm2(n, mx,x) result(res) + function c_inner_oacc_nrm2(n, x) result(res) integer(psb_ipk_) :: n complex(psb_spk_) :: x(:) - real(psb_spk_) :: mx, res - real(psb_spk_) :: sum + real(psb_spk_) :: res + real(psb_spk_) :: sum, mx integer(psb_ipk_) :: i - sum = 0.0 + mx = szero + !$acc parallel loop reduction(max:mx) + do i = 1, n + if (abs(x(i)) > mx) mx = abs(x(i)) + end do + sum = szero !$acc parallel loop reduction(+:sum) do i = 1, n sum = sum + abs(x(i)/mx)**2 @@ -203,7 +207,7 @@ contains real(psb_spk_) :: res real(psb_spk_) :: max_val integer(psb_ipk_) :: i - max_val = -huge(0.0) + max_val = szero !$acc parallel loop reduction(max:max_val) do i = 1, n if (abs(x(i)) > max_val) max_val = abs(x(i)) @@ -228,7 +232,7 @@ contains complex(psb_spk_) :: x(:) real(psb_spk_) :: res integer(psb_ipk_) :: i - res = 0.0 + res = szero !$acc parallel loop reduction(+:res) do i = 1, n res = res + abs(x(i)) @@ -271,92 +275,6 @@ contains call z%set_host() end subroutine c_oacc_mlt_a_2 - -!!$ subroutine c_oacc_mlt_v(x, y, info) -!!$ implicit none -!!$ class(psb_c_base_vect_type), intent(inout) :: x -!!$ class(psb_c_vect_oacc), intent(inout) :: y -!!$ integer(psb_ipk_), intent(out) :: info -!!$ -!!$ integer(psb_ipk_) :: i, n -!!$ -!!$ info = 0 -!!$ n = min(x%get_nrows(), y%get_nrows()) -!!$ select type(xx => x) -!!$ type is (psb_c_base_vect_type) -!!$ if (y%is_dev()) call y%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ y%v(i) = y%v(i) * xx%v(i) -!!$ end do -!!$ call y%set_host() -!!$ class default -!!$ if (xx%is_dev()) call xx%sync() -!!$ if (y%is_dev()) call y%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ y%v(i) = y%v(i) * xx%v(i) -!!$ end do -!!$ call y%set_host() -!!$ end select -!!$ end subroutine c_oacc_mlt_v -!!$ -!!$ subroutine c_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) -!!$ use psi_serial_mod -!!$ use psb_string_mod -!!$ implicit none -!!$ complex(psb_spk_), intent(in) :: alpha, beta -!!$ class(psb_c_base_vect_type), intent(inout) :: x -!!$ class(psb_c_base_vect_type), intent(inout) :: y -!!$ class(psb_c_vect_oacc), intent(inout) :: z -!!$ integer(psb_ipk_), intent(out) :: info -!!$ character(len=1), intent(in), optional :: conjgx, conjgy -!!$ integer(psb_ipk_) :: i, n -!!$ logical :: conjgx_, conjgy_ -!!$ -!!$ conjgx_ = .false. -!!$ conjgy_ = .false. -!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C') -!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C') -!!$ -!!$ n = min(x%get_nrows(), y%get_nrows(), z%get_nrows()) -!!$ -!!$ info = 0 -!!$ select type(xx => x) -!!$ class is (psb_c_vect_oacc) -!!$ select type (yy => y) -!!$ class is (psb_c_vect_oacc) -!!$ if (xx%is_host()) call xx%sync() -!!$ if (yy%is_host()) call yy%sync() -!!$ if ((beta /= czero) .and. (z%is_host())) call z%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) -!!$ end do -!!$ call z%set_dev() -!!$ class default -!!$ if (xx%is_dev()) call xx%sync() -!!$ if (yy%is_dev()) call yy%sync() -!!$ if ((beta /= czero) .and. (z%is_dev())) call z%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) -!!$ end do -!!$ call z%set_host() -!!$ end select -!!$ class default -!!$ if (x%is_dev()) call x%sync() -!!$ if (y%is_dev()) call y%sync() -!!$ if ((beta /= czero) .and. (z%is_dev())) call z%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i) -!!$ end do -!!$ call z%set_host() -!!$ end select -!!$ end subroutine c_oacc_mlt_v_2 - - subroutine c_oacc_axpby_v(m, alpha, x, beta, y, info) !use psi_serial_mod implicit none @@ -414,7 +332,7 @@ contains integer(psb_ipk_) :: i if ((beta /= czero) .and. (y%is_dev())) call y%sync() - !$acc parallel loop + do i = 1, m y%v(i) = alpha * x(i) + beta * y%v(i) end do @@ -432,44 +350,44 @@ contains integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nx, ny, nz, i logical :: gpu_done - write(0,*)'upd_xyz' + info = psb_success_ gpu_done = .false. select type(xx => x) class is (psb_c_vect_oacc) - select type(yy => y) + select type(yy => y) + class is (psb_c_vect_oacc) + select type(zz => z) class is (psb_c_vect_oacc) - select type(zz => z) - class is (psb_c_vect_oacc) - if ((beta /= czero) .and. yy%is_host()) call yy%sync() - if ((delta /= czero) .and. zz%is_host()) call zz%sync() - if (xx%is_host()) call xx%sync() - nx = size(xx%v) - ny = size(yy%v) - nz = size(zz%v) - if ((nx < m) .or. (ny < m) .or. (nz < m)) then - info = psb_err_internal_error_ - else - !$acc parallel loop - do i = 1, m - yy%v(i) = alpha * xx%v(i) + beta * yy%v(i) - zz%v(i) = gamma * yy%v(i) + delta * zz%v(i) - end do - end if - call yy%set_dev() - call zz%set_dev() - gpu_done = .true. - end select + if ((beta /= czero) .and. yy%is_host()) call yy%sync() + if ((delta /= czero) .and. zz%is_host()) call zz%sync() + if (xx%is_host()) call xx%sync() + nx = size(xx%v) + ny = size(yy%v) + nz = size(zz%v) + if ((nx < m) .or. (ny < m) .or. (nz < m)) then + info = psb_err_internal_error_ + else + !$acc parallel loop + do i = 1, m + yy%v(i) = alpha * xx%v(i) + beta * yy%v(i) + zz%v(i) = gamma * yy%v(i) + delta * zz%v(i) + end do + end if + call yy%set_dev() + call zz%set_dev() + gpu_done = .true. end select + end select end select if (.not. gpu_done) then - if (x%is_host()) call x%sync() - if (y%is_host()) call y%sync() - if (z%is_host()) call z%sync() - call y%axpby(m, alpha, x, beta, info) - call z%axpby(m, gamma, y, delta, info) + if (x%is_host()) call x%sync() + if (y%is_host()) call y%sync() + if (z%is_host()) call z%sync() + call y%axpby(m, alpha, x, beta, info) + call z%axpby(m, gamma, y, delta, info) end if end subroutine c_oacc_upd_xyz @@ -676,7 +594,7 @@ contains if (x%is_dev()) call x%sync() call x%psb_c_base_vect_type%ins(n, irl, val, dupl, info) call x%set_host() - !$acc update device(x%v) + end subroutine c_oacc_ins_a @@ -687,16 +605,14 @@ contains class(psb_c_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: info + call x%free(info) call x%all(n, info) if (info /= 0) then call psb_errpush(info, 'c_oacc_bld_mn', i_err=(/n, n, n, n, n/)) end if call x%set_host() - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if - !$acc enter data copyin(x%v) - + call x%sync_space() + end subroutine c_oacc_bld_mn @@ -707,6 +623,7 @@ contains class(psb_c_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: info + call x%free(info) call psb_realloc(size(this), x%v, info) if (info /= 0) then info = psb_err_alloc_request_ @@ -714,13 +631,9 @@ contains i_err=(/size(this), izero, izero, izero, izero/)) return end if - x%v(:) = this(:) call x%set_host() - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if - !$acc enter data copyin(x%v) + call x%sync_space() end subroutine c_oacc_bld_x @@ -848,54 +761,21 @@ contains end function c_oacc_dot_a - ! subroutine c_oacc_set_vect(x,y) - ! implicit none - ! class(psb_c_vect_oacc), intent(inout) :: x - ! complex(psb_spk_), intent(in) :: y(:) - ! integer(psb_ipk_) :: info - - ! if (size(x%v) /= size(y)) then - ! call x%free(info) - ! call x%all(size(y),info) - ! end if - ! x%v(:) = y(:) - ! call x%set_host() - ! end subroutine c_oacc_set_vect - - subroutine c_oacc_to_dev(v) - implicit none - complex(psb_spk_) :: v(:) - !$acc update device(v) - end subroutine c_oacc_to_dev - - subroutine c_oacc_to_host(v) - implicit none - complex(psb_spk_) :: v(:) - !$acc update self(v) - end subroutine c_oacc_to_host subroutine c_oacc_sync_space(x) implicit none class(psb_c_vect_oacc), intent(inout) :: x - if (allocated(x%v)) then - if (.not.acc_is_present(x%v)) call c_oacc_create_dev(x%v) - end if - contains - subroutine c_oacc_create_dev(v) - implicit none - complex(psb_spk_) :: v(:) - !$acc enter data copyin(v) - end subroutine c_oacc_create_dev + if (allocated(x%v)) call acc_create(x%v) end subroutine c_oacc_sync_space subroutine c_oacc_sync(x) implicit none class(psb_c_vect_oacc), intent(inout) :: x if (x%is_dev()) then - call c_oacc_to_host(x%v) + call acc_update_self(x%v) end if if (x%is_host()) then - call c_oacc_to_dev(x%v) + call acc_update_device(x%v) end if call x%set_sync() end subroutine c_oacc_sync @@ -954,33 +834,36 @@ contains integer(psb_ipk_), intent(out) :: info call psb_realloc(n, x%v, info) - if (info == 0) then - call x%set_host() - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if - !$acc enter data create(x%v) - call x%sync_space() - end if if (info /= 0) then info = psb_err_alloc_request_ call psb_errpush(info, 'c_oacc_all', & i_err=(/n, n, n, n, n/)) end if + call x%set_host() + call x%sync_space() end subroutine c_oacc_vect_all + subroutine c_oacc_final_vect_free(x) + implicit none + type(psb_c_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + info = 0 + if (allocated(x%v)) then + if (acc_is_present(x%v)) call acc_delete_finalize(x%v) + deallocate(x%v, stat=info) + end if + + end subroutine c_oacc_final_vect_free + subroutine c_oacc_vect_free(x, info) implicit none class(psb_c_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 if (allocated(x%v)) then - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if + if (acc_is_present(x%v)) call acc_delete_finalize(x%v) deallocate(x%v, stat=info) end if - end subroutine c_oacc_vect_free function c_oacc_get_size(x) result(res) diff --git a/openacc/psb_d_oacc_csr_mat_mod.F90 b/openacc/psb_d_oacc_csr_mat_mod.F90 index 8b7e111e..74031e89 100644 --- a/openacc/psb_d_oacc_csr_mat_mod.F90 +++ b/openacc/psb_d_oacc_csr_mat_mod.F90 @@ -1,6 +1,7 @@ module psb_d_oacc_csr_mat_mod use iso_c_binding + use openacc use psb_d_mat_mod use psb_d_oacc_vect_mod !use oaccsparse_mod @@ -35,6 +36,7 @@ module psb_d_oacc_csr_mat_mod procedure, pass(a) :: set_host => d_oacc_csr_set_host procedure, pass(a) :: set_sync => d_oacc_csr_set_sync procedure, pass(a) :: set_dev => d_oacc_csr_set_dev + procedure, pass(a) :: free_space => d_oacc_csr_free_space procedure, pass(a) :: sync_space => d_oacc_csr_sync_space procedure, pass(a) :: sync => d_oacc_csr_sync end type psb_d_oacc_csr_sparse_mat @@ -154,22 +156,26 @@ module psb_d_oacc_csr_mat_mod contains - subroutine d_oacc_csr_free(a) + subroutine d_oacc_csr_free_space(a) use psb_base_mod implicit none class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - if (allocated(a%val)) then - !$acc exit data delete(a%val) - end if - if (allocated(a%ja)) then - !$acc exit data delete(a%ja) - end if - if (allocated(a%irp)) then - !$acc exit data delete(a%irp) - end if + if (allocated(a%val)) call acc_delete_finalize(a%val) + if (allocated(a%ja)) call acc_delete_finalize(a%ja) + if (allocated(a%irp)) call acc_delete_finalize(a%irp) + + return + end subroutine d_oacc_csr_free_space + + subroutine d_oacc_csr_free(a) + use psb_base_mod + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + call a%free_space() call a%psb_d_csr_sparse_mat%free() return @@ -193,7 +199,7 @@ contains function d_oacc_csr_get_fmt() result(res) implicit none character(len=5) :: res - res = 'CSR_oacc' + res = 'CSROA' end function d_oacc_csr_get_fmt subroutine d_oacc_csr_all(m, n, nz, a, info) @@ -202,19 +208,8 @@ contains class(psb_d_oacc_csr_sparse_mat), intent(out) :: a integer(psb_ipk_), intent(out) :: info - info = 0 - if (allocated(a%val)) then - !$acc exit data delete(a%val) finalize - deallocate(a%val, stat=info) - end if - if (allocated(a%ja)) then - !$acc exit data delete(a%ja) finalize - deallocate(a%ja, stat=info) - end if - if (allocated(a%irp)) then - !$acc exit data delete(a%irp) finalize - deallocate(a%irp, stat=info) - end if + info = 0 + call a%free() call a%set_nrows(m) call a%set_ncols(n) @@ -274,26 +269,9 @@ contains subroutine d_oacc_csr_sync_space(a) implicit none class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a - if (allocated(a%val)) then - call d_oacc_create_dev(a%val) - end if - if (allocated(a%ja)) then - call i_oacc_create_dev(a%ja) - end if - if (allocated(a%irp)) then - call i_oacc_create_dev(a%irp) - end if - contains - subroutine d_oacc_create_dev(v) - implicit none - real(psb_dpk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine d_oacc_create_dev - subroutine i_oacc_create_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev + if (allocated(a%val)) call acc_create(a%val) + if (allocated(a%ja)) call acc_create(a%ja) + if (allocated(a%irp)) call acc_create(a%irp) end subroutine d_oacc_csr_sync_space subroutine d_oacc_csr_sync(a) @@ -304,40 +282,16 @@ contains tmpa => a if (a%is_dev()) then - call d_oacc_csr_to_host(a%val) - call i_oacc_csr_to_host(a%ja) - call i_oacc_csr_to_host(a%irp) + call acc_update_self(a%val) + call acc_update_self(a%ja) + call acc_update_self(a%irp) else if (a%is_host()) then - call d_oacc_csr_to_dev(a%val) - call i_oacc_csr_to_dev(a%ja) - call i_oacc_csr_to_dev(a%irp) + call acc_update_device(a%val) + call acc_update_device(a%ja) + call acc_update_device(a%irp) end if call tmpa%set_sync() end subroutine d_oacc_csr_sync - subroutine d_oacc_csr_to_dev(v) - implicit none - real(psb_dpk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine d_oacc_csr_to_dev - - subroutine d_oacc_csr_to_host(v) - implicit none - real(psb_dpk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine d_oacc_csr_to_host - - subroutine i_oacc_csr_to_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine i_oacc_csr_to_dev - - subroutine i_oacc_csr_to_host(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine i_oacc_csr_to_host - end module psb_d_oacc_csr_mat_mod diff --git a/openacc/psb_d_oacc_ell_mat_mod.F90 b/openacc/psb_d_oacc_ell_mat_mod.F90 index 962ad2db..45ffc92d 100644 --- a/openacc/psb_d_oacc_ell_mat_mod.F90 +++ b/openacc/psb_d_oacc_ell_mat_mod.F90 @@ -1,5 +1,6 @@ module psb_d_oacc_ell_mat_mod use iso_c_binding + use openacc use psb_d_mat_mod use psb_d_ell_mat_mod use psb_d_oacc_vect_mod @@ -21,6 +22,7 @@ module psb_d_oacc_ell_mat_mod procedure, pass(a) :: set_dev => d_oacc_ell_set_dev procedure, pass(a) :: sync_space => d_oacc_ell_sync_space procedure, pass(a) :: sync => d_oacc_ell_sync + procedure, pass(a) :: free_space => d_oacc_ell_free_space procedure, pass(a) :: free => d_oacc_ell_free procedure, pass(a) :: vect_mv => psb_d_oacc_ell_vect_mv procedure, pass(a) :: in_vect_sv => psb_d_oacc_ell_inner_vect_sv @@ -152,31 +154,32 @@ module psb_d_oacc_ell_mat_mod contains - subroutine d_oacc_ell_free(a) + subroutine d_oacc_ell_free_space(a) use psb_base_mod implicit none class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - if (allocated(a%val)) then - !$acc exit data delete(a%val) - end if - if (allocated(a%ja)) then - !$acc exit data delete(a%ja) - end if - if (allocated(a%irn)) then - !$acc exit data delete(a%irn) - end if - if (allocated(a%idiag)) then - !$acc exit data delete(a%idiag) - end if + if (allocated(a%val)) call acc_delete_finalize(a%val) + if (allocated(a%ja)) call acc_delete_finalize(a%ja) + if (allocated(a%irn)) call acc_delete_finalize(a%irn) + if (allocated(a%idiag)) call acc_delete_finalize(a%idiag) + + return + end subroutine d_oacc_ell_free_space + + subroutine d_oacc_ell_free(a) + use psb_base_mod + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + call a%free_space() call a%psb_d_ell_sparse_mat%free() return end subroutine d_oacc_ell_free - function d_oacc_ell_sizeof(a) result(res) implicit none class(psb_d_oacc_ell_sparse_mat), intent(in) :: a @@ -196,41 +199,12 @@ contains implicit none class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a - if (allocated(a%val)) then - call d_oacc_create_dev(a%val) - end if - if (allocated(a%ja)) then - call i_oacc_create_dev(a%ja) - end if - if (allocated(a%irn)) then - call i_oacc_create_dev_scalar(a%irn) - end if - if (allocated(a%idiag)) then - call i_oacc_create_dev_scalar(a%idiag) - end if - - contains - subroutine d_oacc_create_dev(v) - implicit none - real(psb_dpk_), intent(in) :: v(:,:) - !$acc enter data copyin(v) - end subroutine d_oacc_create_dev - - subroutine i_oacc_create_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:,:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev - - subroutine i_oacc_create_dev_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev_scalar + if (allocated(a%val)) call acc_create(a%val) + if (allocated(a%ja)) call acc_create(a%ja) + if (allocated(a%irn)) call acc_create(a%irn) + if (allocated(a%idiag)) call acc_create(a%idiag) end subroutine d_oacc_ell_sync_space - - function d_oacc_ell_is_host(a) result(res) implicit none class(psb_d_oacc_ell_sparse_mat), intent(in) :: a @@ -279,7 +253,7 @@ contains function d_oacc_ell_get_fmt() result(res) implicit none character(len=5) :: res - res = 'ELL_oacc' + res = 'ELLOA' end function d_oacc_ell_get_fmt subroutine d_oacc_ell_sync(a) @@ -290,64 +264,17 @@ contains tmpa => a if (a%is_dev()) then - call d_oacc_ell_to_host(a%val) - call i_oacc_ell_to_host(a%ja) - call i_oacc_ell_to_host_scalar(a%irn) - call i_oacc_ell_to_host_scalar(a%idiag) + call acc_update_self(a%val) + call acc_update_self(a%ja) + call acc_update_self(a%irn) + call acc_update_self(a%idiag) else if (a%is_host()) then - call d_oacc_ell_to_dev(a%val) - call i_oacc_ell_to_dev(a%ja) - call i_oacc_ell_to_dev_scalar(a%irn) - call i_oacc_ell_to_dev_scalar(a%idiag) + call acc_update_device(a%val) + call acc_update_device(a%ja) + call acc_update_device(a%irn) + call acc_update_device(a%idiag) end if call tmpa%set_sync() end subroutine d_oacc_ell_sync - subroutine d_oacc_ell_to_dev_scalar(v) - implicit none - real(psb_dpk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine d_oacc_ell_to_dev_scalar - - subroutine d_oacc_ell_to_dev(v) - implicit none - real(psb_dpk_), intent(in) :: v(:,:) - !$acc update device(v) - end subroutine d_oacc_ell_to_dev - - subroutine d_oacc_ell_to_host(v) - implicit none - real(psb_dpk_), intent(in) :: v(:,:) - !$acc update self(v) - end subroutine d_oacc_ell_to_host - - subroutine d_oacc_ell_to_host_scalar(v) - implicit none - real(psb_dpk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine d_oacc_ell_to_host_scalar - - subroutine i_oacc_ell_to_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:,:) - !$acc update device(v) - end subroutine i_oacc_ell_to_dev - - subroutine i_oacc_ell_to_dev_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine i_oacc_ell_to_dev_scalar - - subroutine i_oacc_ell_to_host(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:,:) - !$acc update self(v) - end subroutine i_oacc_ell_to_host - - subroutine i_oacc_ell_to_host_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine i_oacc_ell_to_host_scalar end module psb_d_oacc_ell_mat_mod diff --git a/openacc/psb_d_oacc_hll_mat_mod.F90 b/openacc/psb_d_oacc_hll_mat_mod.F90 index b1c36a65..8009f085 100644 --- a/openacc/psb_d_oacc_hll_mat_mod.F90 +++ b/openacc/psb_d_oacc_hll_mat_mod.F90 @@ -1,5 +1,6 @@ module psb_d_oacc_hll_mat_mod use iso_c_binding + use openacc use psb_d_mat_mod use psb_d_hll_mat_mod use psb_d_oacc_vect_mod @@ -21,6 +22,7 @@ module psb_d_oacc_hll_mat_mod procedure, pass(a) :: set_dev => d_oacc_hll_set_dev procedure, pass(a) :: sync_space => d_oacc_hll_sync_space procedure, pass(a) :: sync => d_oacc_hll_sync + procedure, pass(a) :: free_space => d_oacc_hll_free_space procedure, pass(a) :: free => d_oacc_hll_free procedure, pass(a) :: vect_mv => psb_d_oacc_hll_vect_mv procedure, pass(a) :: in_vect_sv => psb_d_oacc_hll_inner_vect_sv @@ -152,28 +154,28 @@ module psb_d_oacc_hll_mat_mod contains - subroutine d_oacc_hll_free(a) + subroutine d_oacc_hll_free_space(a) use psb_base_mod implicit none class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - if (allocated(a%val)) then - !$acc exit data delete(a%val) - end if - if (allocated(a%ja)) then - !$acc exit data delete(a%ja) - end if - if (allocated(a%irn)) then - !$acc exit data delete(a%irn) - end if - if (allocated(a%idiag)) then - !$acc exit data delete(a%idiag) - end if - if (allocated(a%hkoffs)) then - !$acc exit data delete(a%hkoffs) - end if + if (allocated(a%val)) call acc_delete_finalize(a%val) + if (allocated(a%ja)) call acc_delete_finalize(a%ja) + if (allocated(a%irn)) call acc_delete_finalize(a%irn) + if (allocated(a%idiag)) call acc_delete_finalize(a%idiag) + if (allocated(a%hkoffs)) call acc_delete_finalize(a%hkoffs) + + return + end subroutine d_oacc_hll_free_space + + subroutine d_oacc_hll_free(a) + use psb_base_mod + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + call a%free_space() call a%psb_d_hll_sparse_mat%free() return @@ -244,48 +246,18 @@ contains function d_oacc_hll_get_fmt() result(res) implicit none character(len=5) :: res - res = 'HLL_oacc' + res = 'HLLOA' end function d_oacc_hll_get_fmt subroutine d_oacc_hll_sync_space(a) implicit none class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a - if (allocated(a%val)) then - call d_oacc_create_dev(a%val) - end if - if (allocated(a%ja)) then - call i_oacc_create_dev(a%ja) - end if - if (allocated(a%irn)) then - call i_oacc_create_dev_scalar(a%irn) - end if - if (allocated(a%idiag)) then - call i_oacc_create_dev_scalar(a%idiag) - end if - if (allocated(a%hkoffs)) then - call i_oacc_create_dev_scalar(a%hkoffs) - end if - - contains - subroutine d_oacc_create_dev(v) - implicit none - real(psb_dpk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine d_oacc_create_dev - - subroutine i_oacc_create_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev - - subroutine i_oacc_create_dev_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev_scalar - + if (allocated(a%val)) call acc_create(a%val) + if (allocated(a%ja)) call acc_create(a%ja) + if (allocated(a%irn)) call acc_create(a%irn) + if (allocated(a%idiag)) call acc_create(a%idiag) + if (allocated(a%hkoffs)) call acc_create(a%hkoffs) end subroutine d_oacc_hll_sync_space @@ -297,56 +269,19 @@ contains tmpa => a if (a%is_dev()) then - call d_oacc_hll_to_host(a%val) - call i_oacc_hll_to_host(a%ja) - call i_oacc_hll_to_host_scalar(a%irn) - call i_oacc_hll_to_host_scalar(a%idiag) - call i_oacc_hll_to_host_scalar(a%hkoffs) + call acc_update_self(a%val) + call acc_update_self(a%ja) + call acc_update_self(a%irn) + call acc_update_self(a%idiag) + call acc_update_self(a%hkoffs) else if (a%is_host()) then - call d_oacc_hll_to_dev(a%val) - call i_oacc_hll_to_dev(a%ja) - call i_oacc_hll_to_dev_scalar(a%irn) - call i_oacc_hll_to_dev_scalar(a%idiag) - call i_oacc_hll_to_dev_scalar(a%hkoffs) + call acc_update_device(a%val) + call acc_update_device(a%ja) + call acc_update_device(a%irn) + call acc_update_device(a%idiag) + call acc_update_device(a%hkoffs) end if call tmpa%set_sync() end subroutine d_oacc_hll_sync - subroutine d_oacc_hll_to_host(v) - implicit none - real(psb_dpk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine d_oacc_hll_to_host - - subroutine d_oacc_hll_to_dev(v) - implicit none - real(psb_dpk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine d_oacc_hll_to_dev - - subroutine i_oacc_hll_to_host(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine i_oacc_hll_to_host - - subroutine i_oacc_hll_to_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine i_oacc_hll_to_dev - - subroutine i_oacc_hll_to_host_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine i_oacc_hll_to_host_scalar - - subroutine i_oacc_hll_to_dev_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine i_oacc_hll_to_dev_scalar - - end module psb_d_oacc_hll_mat_mod diff --git a/openacc/psb_d_oacc_vect_mod.F90 b/openacc/psb_d_oacc_vect_mod.F90 index 9ecbccb4..c7804bc1 100644 --- a/openacc/psb_d_oacc_vect_mod.F90 +++ b/openacc/psb_d_oacc_vect_mod.F90 @@ -59,7 +59,7 @@ module psb_d_oacc_vect_mod procedure, pass(x) :: asum => d_oacc_asum procedure, pass(x) :: absval1 => d_oacc_absval1 procedure, pass(x) :: absval2 => d_oacc_absval2 - + final :: d_oacc_final_vect_free end type psb_d_vect_oacc interface @@ -164,21 +164,25 @@ contains implicit none class(psb_d_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(in) :: n - real(psb_dpk_) :: res - real(psb_dpk_) :: mx + real(psb_dpk_) :: res integer(psb_ipk_) :: info if (x%is_host()) call x%sync() - mx = d_oacc_amax(n,x) - res = d_inner_oacc_nrm2(n, mx, x%v) +!!$ write(0,*)'oacc_nrm2' + res = d_inner_oacc_nrm2(n, x%v) contains - function d_inner_oacc_nrm2(n, mx,x) result(res) + function d_inner_oacc_nrm2(n, x) result(res) integer(psb_ipk_) :: n real(psb_dpk_) :: x(:) - real(psb_dpk_) :: mx, res - real(psb_dpk_) :: sum + real(psb_dpk_) :: res + real(psb_dpk_) :: sum, mx integer(psb_ipk_) :: i - sum = 0.0 + mx = dzero + !$acc parallel loop reduction(max:mx) + do i = 1, n + if (abs(x(i)) > mx) mx = abs(x(i)) + end do + sum = dzero !$acc parallel loop reduction(+:sum) do i = 1, n sum = sum + abs(x(i)/mx)**2 @@ -203,7 +207,7 @@ contains real(psb_dpk_) :: res real(psb_dpk_) :: max_val integer(psb_ipk_) :: i - max_val = -huge(0.0) + max_val = dzero !$acc parallel loop reduction(max:max_val) do i = 1, n if (abs(x(i)) > max_val) max_val = abs(x(i)) @@ -228,7 +232,7 @@ contains real(psb_dpk_) :: x(:) real(psb_dpk_) :: res integer(psb_ipk_) :: i - res = 0.0 + res = dzero !$acc parallel loop reduction(+:res) do i = 1, n res = res + abs(x(i)) @@ -271,92 +275,6 @@ contains call z%set_host() end subroutine d_oacc_mlt_a_2 - -!!$ subroutine d_oacc_mlt_v(x, y, info) -!!$ implicit none -!!$ class(psb_d_base_vect_type), intent(inout) :: x -!!$ class(psb_d_vect_oacc), intent(inout) :: y -!!$ integer(psb_ipk_), intent(out) :: info -!!$ -!!$ integer(psb_ipk_) :: i, n -!!$ -!!$ info = 0 -!!$ n = min(x%get_nrows(), y%get_nrows()) -!!$ select type(xx => x) -!!$ type is (psb_d_base_vect_type) -!!$ if (y%is_dev()) call y%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ y%v(i) = y%v(i) * xx%v(i) -!!$ end do -!!$ call y%set_host() -!!$ class default -!!$ if (xx%is_dev()) call xx%sync() -!!$ if (y%is_dev()) call y%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ y%v(i) = y%v(i) * xx%v(i) -!!$ end do -!!$ call y%set_host() -!!$ end select -!!$ end subroutine d_oacc_mlt_v -!!$ -!!$ subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) -!!$ use psi_serial_mod -!!$ use psb_string_mod -!!$ implicit none -!!$ real(psb_dpk_), intent(in) :: alpha, beta -!!$ class(psb_d_base_vect_type), intent(inout) :: x -!!$ class(psb_d_base_vect_type), intent(inout) :: y -!!$ class(psb_d_vect_oacc), intent(inout) :: z -!!$ integer(psb_ipk_), intent(out) :: info -!!$ character(len=1), intent(in), optional :: conjgx, conjgy -!!$ integer(psb_ipk_) :: i, n -!!$ logical :: conjgx_, conjgy_ -!!$ -!!$ conjgx_ = .false. -!!$ conjgy_ = .false. -!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C') -!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C') -!!$ -!!$ n = min(x%get_nrows(), y%get_nrows(), z%get_nrows()) -!!$ -!!$ info = 0 -!!$ select type(xx => x) -!!$ class is (psb_d_vect_oacc) -!!$ select type (yy => y) -!!$ class is (psb_d_vect_oacc) -!!$ if (xx%is_host()) call xx%sync() -!!$ if (yy%is_host()) call yy%sync() -!!$ if ((beta /= dzero) .and. (z%is_host())) call z%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) -!!$ end do -!!$ call z%set_dev() -!!$ class default -!!$ if (xx%is_dev()) call xx%sync() -!!$ if (yy%is_dev()) call yy%sync() -!!$ if ((beta /= dzero) .and. (z%is_dev())) call z%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) -!!$ end do -!!$ call z%set_host() -!!$ end select -!!$ class default -!!$ if (x%is_dev()) call x%sync() -!!$ if (y%is_dev()) call y%sync() -!!$ if ((beta /= dzero) .and. (z%is_dev())) call z%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i) -!!$ end do -!!$ call z%set_host() -!!$ end select -!!$ end subroutine d_oacc_mlt_v_2 - - subroutine d_oacc_axpby_v(m, alpha, x, beta, y, info) !use psi_serial_mod implicit none @@ -414,7 +332,7 @@ contains integer(psb_ipk_) :: i if ((beta /= dzero) .and. (y%is_dev())) call y%sync() - !$acc parallel loop + do i = 1, m y%v(i) = alpha * x(i) + beta * y%v(i) end do @@ -432,44 +350,44 @@ contains integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nx, ny, nz, i logical :: gpu_done - write(0,*)'upd_xyz' + info = psb_success_ gpu_done = .false. select type(xx => x) class is (psb_d_vect_oacc) - select type(yy => y) + select type(yy => y) + class is (psb_d_vect_oacc) + select type(zz => z) class is (psb_d_vect_oacc) - select type(zz => z) - class is (psb_d_vect_oacc) - if ((beta /= dzero) .and. yy%is_host()) call yy%sync() - if ((delta /= dzero) .and. zz%is_host()) call zz%sync() - if (xx%is_host()) call xx%sync() - nx = size(xx%v) - ny = size(yy%v) - nz = size(zz%v) - if ((nx < m) .or. (ny < m) .or. (nz < m)) then - info = psb_err_internal_error_ - else - !$acc parallel loop - do i = 1, m - yy%v(i) = alpha * xx%v(i) + beta * yy%v(i) - zz%v(i) = gamma * yy%v(i) + delta * zz%v(i) - end do - end if - call yy%set_dev() - call zz%set_dev() - gpu_done = .true. - end select + if ((beta /= dzero) .and. yy%is_host()) call yy%sync() + if ((delta /= dzero) .and. zz%is_host()) call zz%sync() + if (xx%is_host()) call xx%sync() + nx = size(xx%v) + ny = size(yy%v) + nz = size(zz%v) + if ((nx < m) .or. (ny < m) .or. (nz < m)) then + info = psb_err_internal_error_ + else + !$acc parallel loop + do i = 1, m + yy%v(i) = alpha * xx%v(i) + beta * yy%v(i) + zz%v(i) = gamma * yy%v(i) + delta * zz%v(i) + end do + end if + call yy%set_dev() + call zz%set_dev() + gpu_done = .true. end select + end select end select if (.not. gpu_done) then - if (x%is_host()) call x%sync() - if (y%is_host()) call y%sync() - if (z%is_host()) call z%sync() - call y%axpby(m, alpha, x, beta, info) - call z%axpby(m, gamma, y, delta, info) + if (x%is_host()) call x%sync() + if (y%is_host()) call y%sync() + if (z%is_host()) call z%sync() + call y%axpby(m, alpha, x, beta, info) + call z%axpby(m, gamma, y, delta, info) end if end subroutine d_oacc_upd_xyz @@ -676,7 +594,7 @@ contains if (x%is_dev()) call x%sync() call x%psb_d_base_vect_type%ins(n, irl, val, dupl, info) call x%set_host() - !$acc update device(x%v) + end subroutine d_oacc_ins_a @@ -687,16 +605,14 @@ contains class(psb_d_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: info + call x%free(info) call x%all(n, info) if (info /= 0) then call psb_errpush(info, 'd_oacc_bld_mn', i_err=(/n, n, n, n, n/)) end if call x%set_host() - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if - !$acc enter data copyin(x%v) - + call x%sync_space() + end subroutine d_oacc_bld_mn @@ -707,6 +623,7 @@ contains class(psb_d_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: info + call x%free(info) call psb_realloc(size(this), x%v, info) if (info /= 0) then info = psb_err_alloc_request_ @@ -714,13 +631,9 @@ contains i_err=(/size(this), izero, izero, izero, izero/)) return end if - x%v(:) = this(:) call x%set_host() - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if - !$acc enter data copyin(x%v) + call x%sync_space() end subroutine d_oacc_bld_x @@ -848,54 +761,21 @@ contains end function d_oacc_dot_a - ! subroutine d_oacc_set_vect(x,y) - ! implicit none - ! class(psb_d_vect_oacc), intent(inout) :: x - ! real(psb_dpk_), intent(in) :: y(:) - ! integer(psb_ipk_) :: info - - ! if (size(x%v) /= size(y)) then - ! call x%free(info) - ! call x%all(size(y),info) - ! end if - ! x%v(:) = y(:) - ! call x%set_host() - ! end subroutine d_oacc_set_vect - - subroutine d_oacc_to_dev(v) - implicit none - real(psb_dpk_) :: v(:) - !$acc update device(v) - end subroutine d_oacc_to_dev - - subroutine d_oacc_to_host(v) - implicit none - real(psb_dpk_) :: v(:) - !$acc update self(v) - end subroutine d_oacc_to_host subroutine d_oacc_sync_space(x) implicit none class(psb_d_vect_oacc), intent(inout) :: x - if (allocated(x%v)) then - if (.not.acc_is_present(x%v)) call d_oacc_create_dev(x%v) - end if - contains - subroutine d_oacc_create_dev(v) - implicit none - real(psb_dpk_) :: v(:) - !$acc enter data copyin(v) - end subroutine d_oacc_create_dev + if (allocated(x%v)) call acc_create(x%v) end subroutine d_oacc_sync_space subroutine d_oacc_sync(x) implicit none class(psb_d_vect_oacc), intent(inout) :: x if (x%is_dev()) then - call d_oacc_to_host(x%v) + call acc_update_self(x%v) end if if (x%is_host()) then - call d_oacc_to_dev(x%v) + call acc_update_device(x%v) end if call x%set_sync() end subroutine d_oacc_sync @@ -954,33 +834,36 @@ contains integer(psb_ipk_), intent(out) :: info call psb_realloc(n, x%v, info) - if (info == 0) then - call x%set_host() - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if - !$acc enter data create(x%v) - call x%sync_space() - end if if (info /= 0) then info = psb_err_alloc_request_ call psb_errpush(info, 'd_oacc_all', & i_err=(/n, n, n, n, n/)) end if + call x%set_host() + call x%sync_space() end subroutine d_oacc_vect_all + subroutine d_oacc_final_vect_free(x) + implicit none + type(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + info = 0 + if (allocated(x%v)) then + if (acc_is_present(x%v)) call acc_delete_finalize(x%v) + deallocate(x%v, stat=info) + end if + + end subroutine d_oacc_final_vect_free + subroutine d_oacc_vect_free(x, info) implicit none class(psb_d_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 if (allocated(x%v)) then - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if + if (acc_is_present(x%v)) call acc_delete_finalize(x%v) deallocate(x%v, stat=info) end if - end subroutine d_oacc_vect_free function d_oacc_get_size(x) result(res) diff --git a/openacc/psb_i_oacc_vect_mod.F90 b/openacc/psb_i_oacc_vect_mod.F90 index 3dbc48f1..8c813134 100644 --- a/openacc/psb_i_oacc_vect_mod.F90 +++ b/openacc/psb_i_oacc_vect_mod.F90 @@ -42,7 +42,7 @@ module psb_i_oacc_vect_mod procedure, pass(x) :: get_size => i_oacc_get_size - + final :: i_oacc_final_vect_free end type psb_i_vect_oacc @@ -252,7 +252,7 @@ contains if (x%is_dev()) call x%sync() call x%psb_i_base_vect_type%ins(n, irl, val, dupl, info) call x%set_host() - !$acc update device(x%v) + end subroutine i_oacc_ins_a @@ -263,16 +263,14 @@ contains class(psb_i_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: info + call x%free(info) call x%all(n, info) if (info /= 0) then call psb_errpush(info, 'i_oacc_bld_mn', i_err=(/n, n, n, n, n/)) end if call x%set_host() - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if - !$acc enter data copyin(x%v) - + call x%sync_space() + end subroutine i_oacc_bld_mn @@ -283,6 +281,7 @@ contains class(psb_i_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: info + call x%free(info) call psb_realloc(size(this), x%v, info) if (info /= 0) then info = psb_err_alloc_request_ @@ -290,13 +289,9 @@ contains i_err=(/size(this), izero, izero, izero, izero/)) return end if - x%v(:) = this(:) call x%set_host() - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if - !$acc enter data copyin(x%v) + call x%sync_space() end subroutine i_oacc_bld_x @@ -370,54 +365,21 @@ contains end function i_oacc_get_fmt - ! subroutine i_oacc_set_vect(x,y) - ! implicit none - ! class(psb_i_vect_oacc), intent(inout) :: x - ! integer(psb_ipk_), intent(in) :: y(:) - ! integer(psb_ipk_) :: info - - ! if (size(x%v) /= size(y)) then - ! call x%free(info) - ! call x%all(size(y),info) - ! end if - ! x%v(:) = y(:) - ! call x%set_host() - ! end subroutine i_oacc_set_vect - - subroutine i_oacc_to_dev(v) - implicit none - integer(psb_ipk_) :: v(:) - !$acc update device(v) - end subroutine i_oacc_to_dev - - subroutine i_oacc_to_host(v) - implicit none - integer(psb_ipk_) :: v(:) - !$acc update self(v) - end subroutine i_oacc_to_host subroutine i_oacc_sync_space(x) implicit none class(psb_i_vect_oacc), intent(inout) :: x - if (allocated(x%v)) then - if (.not.acc_is_present(x%v)) call i_oacc_create_dev(x%v) - end if - contains - subroutine i_oacc_create_dev(v) - implicit none - integer(psb_ipk_) :: v(:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev + if (allocated(x%v)) call acc_create(x%v) end subroutine i_oacc_sync_space subroutine i_oacc_sync(x) implicit none class(psb_i_vect_oacc), intent(inout) :: x if (x%is_dev()) then - call i_oacc_to_host(x%v) + call acc_update_self(x%v) end if if (x%is_host()) then - call i_oacc_to_dev(x%v) + call acc_update_device(x%v) end if call x%set_sync() end subroutine i_oacc_sync @@ -476,33 +438,36 @@ contains integer(psb_ipk_), intent(out) :: info call psb_realloc(n, x%v, info) - if (info == 0) then - call x%set_host() - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if - !$acc enter data create(x%v) - call x%sync_space() - end if if (info /= 0) then info = psb_err_alloc_request_ call psb_errpush(info, 'i_oacc_all', & i_err=(/n, n, n, n, n/)) end if + call x%set_host() + call x%sync_space() end subroutine i_oacc_vect_all + subroutine i_oacc_final_vect_free(x) + implicit none + type(psb_i_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + info = 0 + if (allocated(x%v)) then + if (acc_is_present(x%v)) call acc_delete_finalize(x%v) + deallocate(x%v, stat=info) + end if + + end subroutine i_oacc_final_vect_free + subroutine i_oacc_vect_free(x, info) implicit none class(psb_i_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 if (allocated(x%v)) then - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if + if (acc_is_present(x%v)) call acc_delete_finalize(x%v) deallocate(x%v, stat=info) end if - end subroutine i_oacc_vect_free function i_oacc_get_size(x) result(res) diff --git a/openacc/psb_l_oacc_vect_mod.F90 b/openacc/psb_l_oacc_vect_mod.F90 index cdf28366..9ff100bc 100644 --- a/openacc/psb_l_oacc_vect_mod.F90 +++ b/openacc/psb_l_oacc_vect_mod.F90 @@ -44,7 +44,7 @@ module psb_l_oacc_vect_mod procedure, pass(x) :: get_size => l_oacc_get_size - + final :: l_oacc_final_vect_free end type psb_l_vect_oacc @@ -254,7 +254,7 @@ contains if (x%is_dev()) call x%sync() call x%psb_l_base_vect_type%ins(n, irl, val, dupl, info) call x%set_host() - !$acc update device(x%v) + end subroutine l_oacc_ins_a @@ -265,16 +265,14 @@ contains class(psb_l_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: info + call x%free(info) call x%all(n, info) if (info /= 0) then call psb_errpush(info, 'l_oacc_bld_mn', i_err=(/n, n, n, n, n/)) end if call x%set_host() - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if - !$acc enter data copyin(x%v) - + call x%sync_space() + end subroutine l_oacc_bld_mn @@ -285,6 +283,7 @@ contains class(psb_l_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: info + call x%free(info) call psb_realloc(size(this), x%v, info) if (info /= 0) then info = psb_err_alloc_request_ @@ -292,13 +291,9 @@ contains i_err=(/size(this), izero, izero, izero, izero/)) return end if - x%v(:) = this(:) call x%set_host() - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if - !$acc enter data copyin(x%v) + call x%sync_space() end subroutine l_oacc_bld_x @@ -372,54 +367,21 @@ contains end function l_oacc_get_fmt - ! subroutine l_oacc_set_vect(x,y) - ! implicit none - ! class(psb_l_vect_oacc), intent(inout) :: x - ! integer(psb_lpk_), intent(in) :: y(:) - ! integer(psb_ipk_) :: info - - ! if (size(x%v) /= size(y)) then - ! call x%free(info) - ! call x%all(size(y),info) - ! end if - ! x%v(:) = y(:) - ! call x%set_host() - ! end subroutine l_oacc_set_vect - - subroutine l_oacc_to_dev(v) - implicit none - integer(psb_lpk_) :: v(:) - !$acc update device(v) - end subroutine l_oacc_to_dev - - subroutine l_oacc_to_host(v) - implicit none - integer(psb_lpk_) :: v(:) - !$acc update self(v) - end subroutine l_oacc_to_host subroutine l_oacc_sync_space(x) implicit none class(psb_l_vect_oacc), intent(inout) :: x - if (allocated(x%v)) then - if (.not.acc_is_present(x%v)) call l_oacc_create_dev(x%v) - end if - contains - subroutine l_oacc_create_dev(v) - implicit none - integer(psb_lpk_) :: v(:) - !$acc enter data copyin(v) - end subroutine l_oacc_create_dev + if (allocated(x%v)) call acc_create(x%v) end subroutine l_oacc_sync_space subroutine l_oacc_sync(x) implicit none class(psb_l_vect_oacc), intent(inout) :: x if (x%is_dev()) then - call l_oacc_to_host(x%v) + call acc_update_self(x%v) end if if (x%is_host()) then - call l_oacc_to_dev(x%v) + call acc_update_device(x%v) end if call x%set_sync() end subroutine l_oacc_sync @@ -478,33 +440,36 @@ contains integer(psb_ipk_), intent(out) :: info call psb_realloc(n, x%v, info) - if (info == 0) then - call x%set_host() - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if - !$acc enter data create(x%v) - call x%sync_space() - end if if (info /= 0) then info = psb_err_alloc_request_ call psb_errpush(info, 'l_oacc_all', & i_err=(/n, n, n, n, n/)) end if + call x%set_host() + call x%sync_space() end subroutine l_oacc_vect_all + subroutine l_oacc_final_vect_free(x) + implicit none + type(psb_l_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + info = 0 + if (allocated(x%v)) then + if (acc_is_present(x%v)) call acc_delete_finalize(x%v) + deallocate(x%v, stat=info) + end if + + end subroutine l_oacc_final_vect_free + subroutine l_oacc_vect_free(x, info) implicit none class(psb_l_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 if (allocated(x%v)) then - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if + if (acc_is_present(x%v)) call acc_delete_finalize(x%v) deallocate(x%v, stat=info) end if - end subroutine l_oacc_vect_free function l_oacc_get_size(x) result(res) diff --git a/openacc/psb_s_oacc_csr_mat_mod.F90 b/openacc/psb_s_oacc_csr_mat_mod.F90 index 89b10d08..b577daac 100644 --- a/openacc/psb_s_oacc_csr_mat_mod.F90 +++ b/openacc/psb_s_oacc_csr_mat_mod.F90 @@ -1,6 +1,7 @@ module psb_s_oacc_csr_mat_mod use iso_c_binding + use openacc use psb_s_mat_mod use psb_s_oacc_vect_mod !use oaccsparse_mod @@ -35,6 +36,7 @@ module psb_s_oacc_csr_mat_mod procedure, pass(a) :: set_host => s_oacc_csr_set_host procedure, pass(a) :: set_sync => s_oacc_csr_set_sync procedure, pass(a) :: set_dev => s_oacc_csr_set_dev + procedure, pass(a) :: free_space => s_oacc_csr_free_space procedure, pass(a) :: sync_space => s_oacc_csr_sync_space procedure, pass(a) :: sync => s_oacc_csr_sync end type psb_s_oacc_csr_sparse_mat @@ -154,22 +156,26 @@ module psb_s_oacc_csr_mat_mod contains - subroutine s_oacc_csr_free(a) + subroutine s_oacc_csr_free_space(a) use psb_base_mod implicit none class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - if (allocated(a%val)) then - !$acc exit data delete(a%val) - end if - if (allocated(a%ja)) then - !$acc exit data delete(a%ja) - end if - if (allocated(a%irp)) then - !$acc exit data delete(a%irp) - end if + if (allocated(a%val)) call acc_delete_finalize(a%val) + if (allocated(a%ja)) call acc_delete_finalize(a%ja) + if (allocated(a%irp)) call acc_delete_finalize(a%irp) + + return + end subroutine s_oacc_csr_free_space + + subroutine s_oacc_csr_free(a) + use psb_base_mod + implicit none + class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + call a%free_space() call a%psb_s_csr_sparse_mat%free() return @@ -193,7 +199,7 @@ contains function s_oacc_csr_get_fmt() result(res) implicit none character(len=5) :: res - res = 'CSR_oacc' + res = 'CSROA' end function s_oacc_csr_get_fmt subroutine s_oacc_csr_all(m, n, nz, a, info) @@ -202,19 +208,8 @@ contains class(psb_s_oacc_csr_sparse_mat), intent(out) :: a integer(psb_ipk_), intent(out) :: info - info = 0 - if (allocated(a%val)) then - !$acc exit data delete(a%val) finalize - deallocate(a%val, stat=info) - end if - if (allocated(a%ja)) then - !$acc exit data delete(a%ja) finalize - deallocate(a%ja, stat=info) - end if - if (allocated(a%irp)) then - !$acc exit data delete(a%irp) finalize - deallocate(a%irp, stat=info) - end if + info = 0 + call a%free() call a%set_nrows(m) call a%set_ncols(n) @@ -274,26 +269,9 @@ contains subroutine s_oacc_csr_sync_space(a) implicit none class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a - if (allocated(a%val)) then - call s_oacc_create_dev(a%val) - end if - if (allocated(a%ja)) then - call i_oacc_create_dev(a%ja) - end if - if (allocated(a%irp)) then - call i_oacc_create_dev(a%irp) - end if - contains - subroutine s_oacc_create_dev(v) - implicit none - real(psb_spk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine s_oacc_create_dev - subroutine i_oacc_create_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev + if (allocated(a%val)) call acc_create(a%val) + if (allocated(a%ja)) call acc_create(a%ja) + if (allocated(a%irp)) call acc_create(a%irp) end subroutine s_oacc_csr_sync_space subroutine s_oacc_csr_sync(a) @@ -304,40 +282,16 @@ contains tmpa => a if (a%is_dev()) then - call s_oacc_csr_to_host(a%val) - call i_oacc_csr_to_host(a%ja) - call i_oacc_csr_to_host(a%irp) + call acc_update_self(a%val) + call acc_update_self(a%ja) + call acc_update_self(a%irp) else if (a%is_host()) then - call s_oacc_csr_to_dev(a%val) - call i_oacc_csr_to_dev(a%ja) - call i_oacc_csr_to_dev(a%irp) + call acc_update_device(a%val) + call acc_update_device(a%ja) + call acc_update_device(a%irp) end if call tmpa%set_sync() end subroutine s_oacc_csr_sync - subroutine s_oacc_csr_to_dev(v) - implicit none - real(psb_spk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine s_oacc_csr_to_dev - - subroutine s_oacc_csr_to_host(v) - implicit none - real(psb_spk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine s_oacc_csr_to_host - - subroutine i_oacc_csr_to_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine i_oacc_csr_to_dev - - subroutine i_oacc_csr_to_host(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine i_oacc_csr_to_host - end module psb_s_oacc_csr_mat_mod diff --git a/openacc/psb_s_oacc_ell_mat_mod.F90 b/openacc/psb_s_oacc_ell_mat_mod.F90 index 9924ba77..793bf353 100644 --- a/openacc/psb_s_oacc_ell_mat_mod.F90 +++ b/openacc/psb_s_oacc_ell_mat_mod.F90 @@ -1,5 +1,6 @@ module psb_s_oacc_ell_mat_mod use iso_c_binding + use openacc use psb_s_mat_mod use psb_s_ell_mat_mod use psb_s_oacc_vect_mod @@ -21,6 +22,7 @@ module psb_s_oacc_ell_mat_mod procedure, pass(a) :: set_dev => s_oacc_ell_set_dev procedure, pass(a) :: sync_space => s_oacc_ell_sync_space procedure, pass(a) :: sync => s_oacc_ell_sync + procedure, pass(a) :: free_space => s_oacc_ell_free_space procedure, pass(a) :: free => s_oacc_ell_free procedure, pass(a) :: vect_mv => psb_s_oacc_ell_vect_mv procedure, pass(a) :: in_vect_sv => psb_s_oacc_ell_inner_vect_sv @@ -152,31 +154,32 @@ module psb_s_oacc_ell_mat_mod contains - subroutine s_oacc_ell_free(a) + subroutine s_oacc_ell_free_space(a) use psb_base_mod implicit none class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - if (allocated(a%val)) then - !$acc exit data delete(a%val) - end if - if (allocated(a%ja)) then - !$acc exit data delete(a%ja) - end if - if (allocated(a%irn)) then - !$acc exit data delete(a%irn) - end if - if (allocated(a%idiag)) then - !$acc exit data delete(a%idiag) - end if + if (allocated(a%val)) call acc_delete_finalize(a%val) + if (allocated(a%ja)) call acc_delete_finalize(a%ja) + if (allocated(a%irn)) call acc_delete_finalize(a%irn) + if (allocated(a%idiag)) call acc_delete_finalize(a%idiag) + + return + end subroutine s_oacc_ell_free_space + + subroutine s_oacc_ell_free(a) + use psb_base_mod + implicit none + class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + call a%free_space() call a%psb_s_ell_sparse_mat%free() return end subroutine s_oacc_ell_free - function s_oacc_ell_sizeof(a) result(res) implicit none class(psb_s_oacc_ell_sparse_mat), intent(in) :: a @@ -196,41 +199,12 @@ contains implicit none class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a - if (allocated(a%val)) then - call s_oacc_create_dev(a%val) - end if - if (allocated(a%ja)) then - call i_oacc_create_dev(a%ja) - end if - if (allocated(a%irn)) then - call i_oacc_create_dev_scalar(a%irn) - end if - if (allocated(a%idiag)) then - call i_oacc_create_dev_scalar(a%idiag) - end if - - contains - subroutine s_oacc_create_dev(v) - implicit none - real(psb_spk_), intent(in) :: v(:,:) - !$acc enter data copyin(v) - end subroutine s_oacc_create_dev - - subroutine i_oacc_create_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:,:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev - - subroutine i_oacc_create_dev_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev_scalar + if (allocated(a%val)) call acc_create(a%val) + if (allocated(a%ja)) call acc_create(a%ja) + if (allocated(a%irn)) call acc_create(a%irn) + if (allocated(a%idiag)) call acc_create(a%idiag) end subroutine s_oacc_ell_sync_space - - function s_oacc_ell_is_host(a) result(res) implicit none class(psb_s_oacc_ell_sparse_mat), intent(in) :: a @@ -279,7 +253,7 @@ contains function s_oacc_ell_get_fmt() result(res) implicit none character(len=5) :: res - res = 'ELL_oacc' + res = 'ELLOA' end function s_oacc_ell_get_fmt subroutine s_oacc_ell_sync(a) @@ -290,64 +264,17 @@ contains tmpa => a if (a%is_dev()) then - call s_oacc_ell_to_host(a%val) - call i_oacc_ell_to_host(a%ja) - call i_oacc_ell_to_host_scalar(a%irn) - call i_oacc_ell_to_host_scalar(a%idiag) + call acc_update_self(a%val) + call acc_update_self(a%ja) + call acc_update_self(a%irn) + call acc_update_self(a%idiag) else if (a%is_host()) then - call s_oacc_ell_to_dev(a%val) - call i_oacc_ell_to_dev(a%ja) - call i_oacc_ell_to_dev_scalar(a%irn) - call i_oacc_ell_to_dev_scalar(a%idiag) + call acc_update_device(a%val) + call acc_update_device(a%ja) + call acc_update_device(a%irn) + call acc_update_device(a%idiag) end if call tmpa%set_sync() end subroutine s_oacc_ell_sync - subroutine s_oacc_ell_to_dev_scalar(v) - implicit none - real(psb_spk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine s_oacc_ell_to_dev_scalar - - subroutine s_oacc_ell_to_dev(v) - implicit none - real(psb_spk_), intent(in) :: v(:,:) - !$acc update device(v) - end subroutine s_oacc_ell_to_dev - - subroutine s_oacc_ell_to_host(v) - implicit none - real(psb_spk_), intent(in) :: v(:,:) - !$acc update self(v) - end subroutine s_oacc_ell_to_host - - subroutine s_oacc_ell_to_host_scalar(v) - implicit none - real(psb_spk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine s_oacc_ell_to_host_scalar - - subroutine i_oacc_ell_to_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:,:) - !$acc update device(v) - end subroutine i_oacc_ell_to_dev - - subroutine i_oacc_ell_to_dev_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine i_oacc_ell_to_dev_scalar - - subroutine i_oacc_ell_to_host(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:,:) - !$acc update self(v) - end subroutine i_oacc_ell_to_host - - subroutine i_oacc_ell_to_host_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine i_oacc_ell_to_host_scalar end module psb_s_oacc_ell_mat_mod diff --git a/openacc/psb_s_oacc_hll_mat_mod.F90 b/openacc/psb_s_oacc_hll_mat_mod.F90 index bf8949a1..508591e4 100644 --- a/openacc/psb_s_oacc_hll_mat_mod.F90 +++ b/openacc/psb_s_oacc_hll_mat_mod.F90 @@ -1,5 +1,6 @@ module psb_s_oacc_hll_mat_mod use iso_c_binding + use openacc use psb_s_mat_mod use psb_s_hll_mat_mod use psb_s_oacc_vect_mod @@ -21,6 +22,7 @@ module psb_s_oacc_hll_mat_mod procedure, pass(a) :: set_dev => s_oacc_hll_set_dev procedure, pass(a) :: sync_space => s_oacc_hll_sync_space procedure, pass(a) :: sync => s_oacc_hll_sync + procedure, pass(a) :: free_space => s_oacc_hll_free_space procedure, pass(a) :: free => s_oacc_hll_free procedure, pass(a) :: vect_mv => psb_s_oacc_hll_vect_mv procedure, pass(a) :: in_vect_sv => psb_s_oacc_hll_inner_vect_sv @@ -152,28 +154,28 @@ module psb_s_oacc_hll_mat_mod contains - subroutine s_oacc_hll_free(a) + subroutine s_oacc_hll_free_space(a) use psb_base_mod implicit none class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - if (allocated(a%val)) then - !$acc exit data delete(a%val) - end if - if (allocated(a%ja)) then - !$acc exit data delete(a%ja) - end if - if (allocated(a%irn)) then - !$acc exit data delete(a%irn) - end if - if (allocated(a%idiag)) then - !$acc exit data delete(a%idiag) - end if - if (allocated(a%hkoffs)) then - !$acc exit data delete(a%hkoffs) - end if + if (allocated(a%val)) call acc_delete_finalize(a%val) + if (allocated(a%ja)) call acc_delete_finalize(a%ja) + if (allocated(a%irn)) call acc_delete_finalize(a%irn) + if (allocated(a%idiag)) call acc_delete_finalize(a%idiag) + if (allocated(a%hkoffs)) call acc_delete_finalize(a%hkoffs) + + return + end subroutine s_oacc_hll_free_space + + subroutine s_oacc_hll_free(a) + use psb_base_mod + implicit none + class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + call a%free_space() call a%psb_s_hll_sparse_mat%free() return @@ -244,48 +246,18 @@ contains function s_oacc_hll_get_fmt() result(res) implicit none character(len=5) :: res - res = 'HLL_oacc' + res = 'HLLOA' end function s_oacc_hll_get_fmt subroutine s_oacc_hll_sync_space(a) implicit none class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a - if (allocated(a%val)) then - call s_oacc_create_dev(a%val) - end if - if (allocated(a%ja)) then - call i_oacc_create_dev(a%ja) - end if - if (allocated(a%irn)) then - call i_oacc_create_dev_scalar(a%irn) - end if - if (allocated(a%idiag)) then - call i_oacc_create_dev_scalar(a%idiag) - end if - if (allocated(a%hkoffs)) then - call i_oacc_create_dev_scalar(a%hkoffs) - end if - - contains - subroutine s_oacc_create_dev(v) - implicit none - real(psb_spk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine s_oacc_create_dev - - subroutine i_oacc_create_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev - - subroutine i_oacc_create_dev_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev_scalar - + if (allocated(a%val)) call acc_create(a%val) + if (allocated(a%ja)) call acc_create(a%ja) + if (allocated(a%irn)) call acc_create(a%irn) + if (allocated(a%idiag)) call acc_create(a%idiag) + if (allocated(a%hkoffs)) call acc_create(a%hkoffs) end subroutine s_oacc_hll_sync_space @@ -297,56 +269,19 @@ contains tmpa => a if (a%is_dev()) then - call s_oacc_hll_to_host(a%val) - call i_oacc_hll_to_host(a%ja) - call i_oacc_hll_to_host_scalar(a%irn) - call i_oacc_hll_to_host_scalar(a%idiag) - call i_oacc_hll_to_host_scalar(a%hkoffs) + call acc_update_self(a%val) + call acc_update_self(a%ja) + call acc_update_self(a%irn) + call acc_update_self(a%idiag) + call acc_update_self(a%hkoffs) else if (a%is_host()) then - call s_oacc_hll_to_dev(a%val) - call i_oacc_hll_to_dev(a%ja) - call i_oacc_hll_to_dev_scalar(a%irn) - call i_oacc_hll_to_dev_scalar(a%idiag) - call i_oacc_hll_to_dev_scalar(a%hkoffs) + call acc_update_device(a%val) + call acc_update_device(a%ja) + call acc_update_device(a%irn) + call acc_update_device(a%idiag) + call acc_update_device(a%hkoffs) end if call tmpa%set_sync() end subroutine s_oacc_hll_sync - subroutine s_oacc_hll_to_host(v) - implicit none - real(psb_spk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine s_oacc_hll_to_host - - subroutine s_oacc_hll_to_dev(v) - implicit none - real(psb_spk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine s_oacc_hll_to_dev - - subroutine i_oacc_hll_to_host(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine i_oacc_hll_to_host - - subroutine i_oacc_hll_to_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine i_oacc_hll_to_dev - - subroutine i_oacc_hll_to_host_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine i_oacc_hll_to_host_scalar - - subroutine i_oacc_hll_to_dev_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine i_oacc_hll_to_dev_scalar - - end module psb_s_oacc_hll_mat_mod diff --git a/openacc/psb_s_oacc_vect_mod.F90 b/openacc/psb_s_oacc_vect_mod.F90 index c3b31af7..7ce4292f 100644 --- a/openacc/psb_s_oacc_vect_mod.F90 +++ b/openacc/psb_s_oacc_vect_mod.F90 @@ -59,7 +59,7 @@ module psb_s_oacc_vect_mod procedure, pass(x) :: asum => s_oacc_asum procedure, pass(x) :: absval1 => s_oacc_absval1 procedure, pass(x) :: absval2 => s_oacc_absval2 - + final :: s_oacc_final_vect_free end type psb_s_vect_oacc interface @@ -164,21 +164,25 @@ contains implicit none class(psb_s_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(in) :: n - real(psb_spk_) :: res - real(psb_spk_) :: mx + real(psb_spk_) :: res integer(psb_ipk_) :: info if (x%is_host()) call x%sync() - mx = s_oacc_amax(n,x) - res = s_inner_oacc_nrm2(n, mx, x%v) +!!$ write(0,*)'oacc_nrm2' + res = s_inner_oacc_nrm2(n, x%v) contains - function s_inner_oacc_nrm2(n, mx,x) result(res) + function s_inner_oacc_nrm2(n, x) result(res) integer(psb_ipk_) :: n real(psb_spk_) :: x(:) - real(psb_spk_) :: mx, res - real(psb_spk_) :: sum + real(psb_spk_) :: res + real(psb_spk_) :: sum, mx integer(psb_ipk_) :: i - sum = 0.0 + mx = szero + !$acc parallel loop reduction(max:mx) + do i = 1, n + if (abs(x(i)) > mx) mx = abs(x(i)) + end do + sum = szero !$acc parallel loop reduction(+:sum) do i = 1, n sum = sum + abs(x(i)/mx)**2 @@ -203,7 +207,7 @@ contains real(psb_spk_) :: res real(psb_spk_) :: max_val integer(psb_ipk_) :: i - max_val = -huge(0.0) + max_val = szero !$acc parallel loop reduction(max:max_val) do i = 1, n if (abs(x(i)) > max_val) max_val = abs(x(i)) @@ -228,7 +232,7 @@ contains real(psb_spk_) :: x(:) real(psb_spk_) :: res integer(psb_ipk_) :: i - res = 0.0 + res = szero !$acc parallel loop reduction(+:res) do i = 1, n res = res + abs(x(i)) @@ -271,92 +275,6 @@ contains call z%set_host() end subroutine s_oacc_mlt_a_2 - -!!$ subroutine s_oacc_mlt_v(x, y, info) -!!$ implicit none -!!$ class(psb_s_base_vect_type), intent(inout) :: x -!!$ class(psb_s_vect_oacc), intent(inout) :: y -!!$ integer(psb_ipk_), intent(out) :: info -!!$ -!!$ integer(psb_ipk_) :: i, n -!!$ -!!$ info = 0 -!!$ n = min(x%get_nrows(), y%get_nrows()) -!!$ select type(xx => x) -!!$ type is (psb_s_base_vect_type) -!!$ if (y%is_dev()) call y%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ y%v(i) = y%v(i) * xx%v(i) -!!$ end do -!!$ call y%set_host() -!!$ class default -!!$ if (xx%is_dev()) call xx%sync() -!!$ if (y%is_dev()) call y%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ y%v(i) = y%v(i) * xx%v(i) -!!$ end do -!!$ call y%set_host() -!!$ end select -!!$ end subroutine s_oacc_mlt_v -!!$ -!!$ subroutine s_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) -!!$ use psi_serial_mod -!!$ use psb_string_mod -!!$ implicit none -!!$ real(psb_spk_), intent(in) :: alpha, beta -!!$ class(psb_s_base_vect_type), intent(inout) :: x -!!$ class(psb_s_base_vect_type), intent(inout) :: y -!!$ class(psb_s_vect_oacc), intent(inout) :: z -!!$ integer(psb_ipk_), intent(out) :: info -!!$ character(len=1), intent(in), optional :: conjgx, conjgy -!!$ integer(psb_ipk_) :: i, n -!!$ logical :: conjgx_, conjgy_ -!!$ -!!$ conjgx_ = .false. -!!$ conjgy_ = .false. -!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C') -!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C') -!!$ -!!$ n = min(x%get_nrows(), y%get_nrows(), z%get_nrows()) -!!$ -!!$ info = 0 -!!$ select type(xx => x) -!!$ class is (psb_s_vect_oacc) -!!$ select type (yy => y) -!!$ class is (psb_s_vect_oacc) -!!$ if (xx%is_host()) call xx%sync() -!!$ if (yy%is_host()) call yy%sync() -!!$ if ((beta /= szero) .and. (z%is_host())) call z%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) -!!$ end do -!!$ call z%set_dev() -!!$ class default -!!$ if (xx%is_dev()) call xx%sync() -!!$ if (yy%is_dev()) call yy%sync() -!!$ if ((beta /= szero) .and. (z%is_dev())) call z%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) -!!$ end do -!!$ call z%set_host() -!!$ end select -!!$ class default -!!$ if (x%is_dev()) call x%sync() -!!$ if (y%is_dev()) call y%sync() -!!$ if ((beta /= szero) .and. (z%is_dev())) call z%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i) -!!$ end do -!!$ call z%set_host() -!!$ end select -!!$ end subroutine s_oacc_mlt_v_2 - - subroutine s_oacc_axpby_v(m, alpha, x, beta, y, info) !use psi_serial_mod implicit none @@ -414,7 +332,7 @@ contains integer(psb_ipk_) :: i if ((beta /= szero) .and. (y%is_dev())) call y%sync() - !$acc parallel loop + do i = 1, m y%v(i) = alpha * x(i) + beta * y%v(i) end do @@ -432,44 +350,44 @@ contains integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nx, ny, nz, i logical :: gpu_done - write(0,*)'upd_xyz' + info = psb_success_ gpu_done = .false. select type(xx => x) class is (psb_s_vect_oacc) - select type(yy => y) + select type(yy => y) + class is (psb_s_vect_oacc) + select type(zz => z) class is (psb_s_vect_oacc) - select type(zz => z) - class is (psb_s_vect_oacc) - if ((beta /= szero) .and. yy%is_host()) call yy%sync() - if ((delta /= szero) .and. zz%is_host()) call zz%sync() - if (xx%is_host()) call xx%sync() - nx = size(xx%v) - ny = size(yy%v) - nz = size(zz%v) - if ((nx < m) .or. (ny < m) .or. (nz < m)) then - info = psb_err_internal_error_ - else - !$acc parallel loop - do i = 1, m - yy%v(i) = alpha * xx%v(i) + beta * yy%v(i) - zz%v(i) = gamma * yy%v(i) + delta * zz%v(i) - end do - end if - call yy%set_dev() - call zz%set_dev() - gpu_done = .true. - end select + if ((beta /= szero) .and. yy%is_host()) call yy%sync() + if ((delta /= szero) .and. zz%is_host()) call zz%sync() + if (xx%is_host()) call xx%sync() + nx = size(xx%v) + ny = size(yy%v) + nz = size(zz%v) + if ((nx < m) .or. (ny < m) .or. (nz < m)) then + info = psb_err_internal_error_ + else + !$acc parallel loop + do i = 1, m + yy%v(i) = alpha * xx%v(i) + beta * yy%v(i) + zz%v(i) = gamma * yy%v(i) + delta * zz%v(i) + end do + end if + call yy%set_dev() + call zz%set_dev() + gpu_done = .true. end select + end select end select if (.not. gpu_done) then - if (x%is_host()) call x%sync() - if (y%is_host()) call y%sync() - if (z%is_host()) call z%sync() - call y%axpby(m, alpha, x, beta, info) - call z%axpby(m, gamma, y, delta, info) + if (x%is_host()) call x%sync() + if (y%is_host()) call y%sync() + if (z%is_host()) call z%sync() + call y%axpby(m, alpha, x, beta, info) + call z%axpby(m, gamma, y, delta, info) end if end subroutine s_oacc_upd_xyz @@ -676,7 +594,7 @@ contains if (x%is_dev()) call x%sync() call x%psb_s_base_vect_type%ins(n, irl, val, dupl, info) call x%set_host() - !$acc update device(x%v) + end subroutine s_oacc_ins_a @@ -687,16 +605,14 @@ contains class(psb_s_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: info + call x%free(info) call x%all(n, info) if (info /= 0) then call psb_errpush(info, 's_oacc_bld_mn', i_err=(/n, n, n, n, n/)) end if call x%set_host() - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if - !$acc enter data copyin(x%v) - + call x%sync_space() + end subroutine s_oacc_bld_mn @@ -707,6 +623,7 @@ contains class(psb_s_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: info + call x%free(info) call psb_realloc(size(this), x%v, info) if (info /= 0) then info = psb_err_alloc_request_ @@ -714,13 +631,9 @@ contains i_err=(/size(this), izero, izero, izero, izero/)) return end if - x%v(:) = this(:) call x%set_host() - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if - !$acc enter data copyin(x%v) + call x%sync_space() end subroutine s_oacc_bld_x @@ -848,54 +761,21 @@ contains end function s_oacc_dot_a - ! subroutine s_oacc_set_vect(x,y) - ! implicit none - ! class(psb_s_vect_oacc), intent(inout) :: x - ! real(psb_spk_), intent(in) :: y(:) - ! integer(psb_ipk_) :: info - - ! if (size(x%v) /= size(y)) then - ! call x%free(info) - ! call x%all(size(y),info) - ! end if - ! x%v(:) = y(:) - ! call x%set_host() - ! end subroutine s_oacc_set_vect - - subroutine s_oacc_to_dev(v) - implicit none - real(psb_spk_) :: v(:) - !$acc update device(v) - end subroutine s_oacc_to_dev - - subroutine s_oacc_to_host(v) - implicit none - real(psb_spk_) :: v(:) - !$acc update self(v) - end subroutine s_oacc_to_host subroutine s_oacc_sync_space(x) implicit none class(psb_s_vect_oacc), intent(inout) :: x - if (allocated(x%v)) then - if (.not.acc_is_present(x%v)) call s_oacc_create_dev(x%v) - end if - contains - subroutine s_oacc_create_dev(v) - implicit none - real(psb_spk_) :: v(:) - !$acc enter data copyin(v) - end subroutine s_oacc_create_dev + if (allocated(x%v)) call acc_create(x%v) end subroutine s_oacc_sync_space subroutine s_oacc_sync(x) implicit none class(psb_s_vect_oacc), intent(inout) :: x if (x%is_dev()) then - call s_oacc_to_host(x%v) + call acc_update_self(x%v) end if if (x%is_host()) then - call s_oacc_to_dev(x%v) + call acc_update_device(x%v) end if call x%set_sync() end subroutine s_oacc_sync @@ -954,33 +834,36 @@ contains integer(psb_ipk_), intent(out) :: info call psb_realloc(n, x%v, info) - if (info == 0) then - call x%set_host() - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if - !$acc enter data create(x%v) - call x%sync_space() - end if if (info /= 0) then info = psb_err_alloc_request_ call psb_errpush(info, 's_oacc_all', & i_err=(/n, n, n, n, n/)) end if + call x%set_host() + call x%sync_space() end subroutine s_oacc_vect_all + subroutine s_oacc_final_vect_free(x) + implicit none + type(psb_s_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + info = 0 + if (allocated(x%v)) then + if (acc_is_present(x%v)) call acc_delete_finalize(x%v) + deallocate(x%v, stat=info) + end if + + end subroutine s_oacc_final_vect_free + subroutine s_oacc_vect_free(x, info) implicit none class(psb_s_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 if (allocated(x%v)) then - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if + if (acc_is_present(x%v)) call acc_delete_finalize(x%v) deallocate(x%v, stat=info) end if - end subroutine s_oacc_vect_free function s_oacc_get_size(x) result(res) diff --git a/openacc/psb_z_oacc_csr_mat_mod.F90 b/openacc/psb_z_oacc_csr_mat_mod.F90 index 7842d96c..dbafb391 100644 --- a/openacc/psb_z_oacc_csr_mat_mod.F90 +++ b/openacc/psb_z_oacc_csr_mat_mod.F90 @@ -1,6 +1,7 @@ module psb_z_oacc_csr_mat_mod use iso_c_binding + use openacc use psb_z_mat_mod use psb_z_oacc_vect_mod !use oaccsparse_mod @@ -35,6 +36,7 @@ module psb_z_oacc_csr_mat_mod procedure, pass(a) :: set_host => z_oacc_csr_set_host procedure, pass(a) :: set_sync => z_oacc_csr_set_sync procedure, pass(a) :: set_dev => z_oacc_csr_set_dev + procedure, pass(a) :: free_space => z_oacc_csr_free_space procedure, pass(a) :: sync_space => z_oacc_csr_sync_space procedure, pass(a) :: sync => z_oacc_csr_sync end type psb_z_oacc_csr_sparse_mat @@ -154,22 +156,26 @@ module psb_z_oacc_csr_mat_mod contains - subroutine z_oacc_csr_free(a) + subroutine z_oacc_csr_free_space(a) use psb_base_mod implicit none class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - if (allocated(a%val)) then - !$acc exit data delete(a%val) - end if - if (allocated(a%ja)) then - !$acc exit data delete(a%ja) - end if - if (allocated(a%irp)) then - !$acc exit data delete(a%irp) - end if + if (allocated(a%val)) call acc_delete_finalize(a%val) + if (allocated(a%ja)) call acc_delete_finalize(a%ja) + if (allocated(a%irp)) call acc_delete_finalize(a%irp) + + return + end subroutine z_oacc_csr_free_space + + subroutine z_oacc_csr_free(a) + use psb_base_mod + implicit none + class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + call a%free_space() call a%psb_z_csr_sparse_mat%free() return @@ -193,7 +199,7 @@ contains function z_oacc_csr_get_fmt() result(res) implicit none character(len=5) :: res - res = 'CSR_oacc' + res = 'CSROA' end function z_oacc_csr_get_fmt subroutine z_oacc_csr_all(m, n, nz, a, info) @@ -202,19 +208,8 @@ contains class(psb_z_oacc_csr_sparse_mat), intent(out) :: a integer(psb_ipk_), intent(out) :: info - info = 0 - if (allocated(a%val)) then - !$acc exit data delete(a%val) finalize - deallocate(a%val, stat=info) - end if - if (allocated(a%ja)) then - !$acc exit data delete(a%ja) finalize - deallocate(a%ja, stat=info) - end if - if (allocated(a%irp)) then - !$acc exit data delete(a%irp) finalize - deallocate(a%irp, stat=info) - end if + info = 0 + call a%free() call a%set_nrows(m) call a%set_ncols(n) @@ -274,26 +269,9 @@ contains subroutine z_oacc_csr_sync_space(a) implicit none class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a - if (allocated(a%val)) then - call z_oacc_create_dev(a%val) - end if - if (allocated(a%ja)) then - call i_oacc_create_dev(a%ja) - end if - if (allocated(a%irp)) then - call i_oacc_create_dev(a%irp) - end if - contains - subroutine z_oacc_create_dev(v) - implicit none - complex(psb_dpk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine z_oacc_create_dev - subroutine i_oacc_create_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev + if (allocated(a%val)) call acc_create(a%val) + if (allocated(a%ja)) call acc_create(a%ja) + if (allocated(a%irp)) call acc_create(a%irp) end subroutine z_oacc_csr_sync_space subroutine z_oacc_csr_sync(a) @@ -304,40 +282,16 @@ contains tmpa => a if (a%is_dev()) then - call z_oacc_csr_to_host(a%val) - call i_oacc_csr_to_host(a%ja) - call i_oacc_csr_to_host(a%irp) + call acc_update_self(a%val) + call acc_update_self(a%ja) + call acc_update_self(a%irp) else if (a%is_host()) then - call z_oacc_csr_to_dev(a%val) - call i_oacc_csr_to_dev(a%ja) - call i_oacc_csr_to_dev(a%irp) + call acc_update_device(a%val) + call acc_update_device(a%ja) + call acc_update_device(a%irp) end if call tmpa%set_sync() end subroutine z_oacc_csr_sync - subroutine z_oacc_csr_to_dev(v) - implicit none - complex(psb_dpk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine z_oacc_csr_to_dev - - subroutine z_oacc_csr_to_host(v) - implicit none - complex(psb_dpk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine z_oacc_csr_to_host - - subroutine i_oacc_csr_to_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine i_oacc_csr_to_dev - - subroutine i_oacc_csr_to_host(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine i_oacc_csr_to_host - end module psb_z_oacc_csr_mat_mod diff --git a/openacc/psb_z_oacc_ell_mat_mod.F90 b/openacc/psb_z_oacc_ell_mat_mod.F90 index eb8884d6..76b0182d 100644 --- a/openacc/psb_z_oacc_ell_mat_mod.F90 +++ b/openacc/psb_z_oacc_ell_mat_mod.F90 @@ -1,5 +1,6 @@ module psb_z_oacc_ell_mat_mod use iso_c_binding + use openacc use psb_z_mat_mod use psb_z_ell_mat_mod use psb_z_oacc_vect_mod @@ -21,6 +22,7 @@ module psb_z_oacc_ell_mat_mod procedure, pass(a) :: set_dev => z_oacc_ell_set_dev procedure, pass(a) :: sync_space => z_oacc_ell_sync_space procedure, pass(a) :: sync => z_oacc_ell_sync + procedure, pass(a) :: free_space => z_oacc_ell_free_space procedure, pass(a) :: free => z_oacc_ell_free procedure, pass(a) :: vect_mv => psb_z_oacc_ell_vect_mv procedure, pass(a) :: in_vect_sv => psb_z_oacc_ell_inner_vect_sv @@ -152,31 +154,32 @@ module psb_z_oacc_ell_mat_mod contains - subroutine z_oacc_ell_free(a) + subroutine z_oacc_ell_free_space(a) use psb_base_mod implicit none class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - if (allocated(a%val)) then - !$acc exit data delete(a%val) - end if - if (allocated(a%ja)) then - !$acc exit data delete(a%ja) - end if - if (allocated(a%irn)) then - !$acc exit data delete(a%irn) - end if - if (allocated(a%idiag)) then - !$acc exit data delete(a%idiag) - end if + if (allocated(a%val)) call acc_delete_finalize(a%val) + if (allocated(a%ja)) call acc_delete_finalize(a%ja) + if (allocated(a%irn)) call acc_delete_finalize(a%irn) + if (allocated(a%idiag)) call acc_delete_finalize(a%idiag) + + return + end subroutine z_oacc_ell_free_space + + subroutine z_oacc_ell_free(a) + use psb_base_mod + implicit none + class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + call a%free_space() call a%psb_z_ell_sparse_mat%free() return end subroutine z_oacc_ell_free - function z_oacc_ell_sizeof(a) result(res) implicit none class(psb_z_oacc_ell_sparse_mat), intent(in) :: a @@ -196,41 +199,12 @@ contains implicit none class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a - if (allocated(a%val)) then - call z_oacc_create_dev(a%val) - end if - if (allocated(a%ja)) then - call i_oacc_create_dev(a%ja) - end if - if (allocated(a%irn)) then - call i_oacc_create_dev_scalar(a%irn) - end if - if (allocated(a%idiag)) then - call i_oacc_create_dev_scalar(a%idiag) - end if - - contains - subroutine z_oacc_create_dev(v) - implicit none - complex(psb_dpk_), intent(in) :: v(:,:) - !$acc enter data copyin(v) - end subroutine z_oacc_create_dev - - subroutine i_oacc_create_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:,:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev - - subroutine i_oacc_create_dev_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev_scalar + if (allocated(a%val)) call acc_create(a%val) + if (allocated(a%ja)) call acc_create(a%ja) + if (allocated(a%irn)) call acc_create(a%irn) + if (allocated(a%idiag)) call acc_create(a%idiag) end subroutine z_oacc_ell_sync_space - - function z_oacc_ell_is_host(a) result(res) implicit none class(psb_z_oacc_ell_sparse_mat), intent(in) :: a @@ -279,7 +253,7 @@ contains function z_oacc_ell_get_fmt() result(res) implicit none character(len=5) :: res - res = 'ELL_oacc' + res = 'ELLOA' end function z_oacc_ell_get_fmt subroutine z_oacc_ell_sync(a) @@ -290,64 +264,17 @@ contains tmpa => a if (a%is_dev()) then - call z_oacc_ell_to_host(a%val) - call i_oacc_ell_to_host(a%ja) - call i_oacc_ell_to_host_scalar(a%irn) - call i_oacc_ell_to_host_scalar(a%idiag) + call acc_update_self(a%val) + call acc_update_self(a%ja) + call acc_update_self(a%irn) + call acc_update_self(a%idiag) else if (a%is_host()) then - call z_oacc_ell_to_dev(a%val) - call i_oacc_ell_to_dev(a%ja) - call i_oacc_ell_to_dev_scalar(a%irn) - call i_oacc_ell_to_dev_scalar(a%idiag) + call acc_update_device(a%val) + call acc_update_device(a%ja) + call acc_update_device(a%irn) + call acc_update_device(a%idiag) end if call tmpa%set_sync() end subroutine z_oacc_ell_sync - subroutine z_oacc_ell_to_dev_scalar(v) - implicit none - complex(psb_dpk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine z_oacc_ell_to_dev_scalar - - subroutine z_oacc_ell_to_dev(v) - implicit none - complex(psb_dpk_), intent(in) :: v(:,:) - !$acc update device(v) - end subroutine z_oacc_ell_to_dev - - subroutine z_oacc_ell_to_host(v) - implicit none - complex(psb_dpk_), intent(in) :: v(:,:) - !$acc update self(v) - end subroutine z_oacc_ell_to_host - - subroutine z_oacc_ell_to_host_scalar(v) - implicit none - complex(psb_dpk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine z_oacc_ell_to_host_scalar - - subroutine i_oacc_ell_to_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:,:) - !$acc update device(v) - end subroutine i_oacc_ell_to_dev - - subroutine i_oacc_ell_to_dev_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine i_oacc_ell_to_dev_scalar - - subroutine i_oacc_ell_to_host(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:,:) - !$acc update self(v) - end subroutine i_oacc_ell_to_host - - subroutine i_oacc_ell_to_host_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine i_oacc_ell_to_host_scalar end module psb_z_oacc_ell_mat_mod diff --git a/openacc/psb_z_oacc_hll_mat_mod.F90 b/openacc/psb_z_oacc_hll_mat_mod.F90 index e6a4929a..4a657fd1 100644 --- a/openacc/psb_z_oacc_hll_mat_mod.F90 +++ b/openacc/psb_z_oacc_hll_mat_mod.F90 @@ -1,5 +1,6 @@ module psb_z_oacc_hll_mat_mod use iso_c_binding + use openacc use psb_z_mat_mod use psb_z_hll_mat_mod use psb_z_oacc_vect_mod @@ -21,6 +22,7 @@ module psb_z_oacc_hll_mat_mod procedure, pass(a) :: set_dev => z_oacc_hll_set_dev procedure, pass(a) :: sync_space => z_oacc_hll_sync_space procedure, pass(a) :: sync => z_oacc_hll_sync + procedure, pass(a) :: free_space => z_oacc_hll_free_space procedure, pass(a) :: free => z_oacc_hll_free procedure, pass(a) :: vect_mv => psb_z_oacc_hll_vect_mv procedure, pass(a) :: in_vect_sv => psb_z_oacc_hll_inner_vect_sv @@ -152,28 +154,28 @@ module psb_z_oacc_hll_mat_mod contains - subroutine z_oacc_hll_free(a) + subroutine z_oacc_hll_free_space(a) use psb_base_mod implicit none class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - if (allocated(a%val)) then - !$acc exit data delete(a%val) - end if - if (allocated(a%ja)) then - !$acc exit data delete(a%ja) - end if - if (allocated(a%irn)) then - !$acc exit data delete(a%irn) - end if - if (allocated(a%idiag)) then - !$acc exit data delete(a%idiag) - end if - if (allocated(a%hkoffs)) then - !$acc exit data delete(a%hkoffs) - end if + if (allocated(a%val)) call acc_delete_finalize(a%val) + if (allocated(a%ja)) call acc_delete_finalize(a%ja) + if (allocated(a%irn)) call acc_delete_finalize(a%irn) + if (allocated(a%idiag)) call acc_delete_finalize(a%idiag) + if (allocated(a%hkoffs)) call acc_delete_finalize(a%hkoffs) + + return + end subroutine z_oacc_hll_free_space + + subroutine z_oacc_hll_free(a) + use psb_base_mod + implicit none + class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + call a%free_space() call a%psb_z_hll_sparse_mat%free() return @@ -244,48 +246,18 @@ contains function z_oacc_hll_get_fmt() result(res) implicit none character(len=5) :: res - res = 'HLL_oacc' + res = 'HLLOA' end function z_oacc_hll_get_fmt subroutine z_oacc_hll_sync_space(a) implicit none class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a - if (allocated(a%val)) then - call z_oacc_create_dev(a%val) - end if - if (allocated(a%ja)) then - call i_oacc_create_dev(a%ja) - end if - if (allocated(a%irn)) then - call i_oacc_create_dev_scalar(a%irn) - end if - if (allocated(a%idiag)) then - call i_oacc_create_dev_scalar(a%idiag) - end if - if (allocated(a%hkoffs)) then - call i_oacc_create_dev_scalar(a%hkoffs) - end if - - contains - subroutine z_oacc_create_dev(v) - implicit none - complex(psb_dpk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine z_oacc_create_dev - - subroutine i_oacc_create_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev - - subroutine i_oacc_create_dev_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev_scalar - + if (allocated(a%val)) call acc_create(a%val) + if (allocated(a%ja)) call acc_create(a%ja) + if (allocated(a%irn)) call acc_create(a%irn) + if (allocated(a%idiag)) call acc_create(a%idiag) + if (allocated(a%hkoffs)) call acc_create(a%hkoffs) end subroutine z_oacc_hll_sync_space @@ -297,56 +269,19 @@ contains tmpa => a if (a%is_dev()) then - call z_oacc_hll_to_host(a%val) - call i_oacc_hll_to_host(a%ja) - call i_oacc_hll_to_host_scalar(a%irn) - call i_oacc_hll_to_host_scalar(a%idiag) - call i_oacc_hll_to_host_scalar(a%hkoffs) + call acc_update_self(a%val) + call acc_update_self(a%ja) + call acc_update_self(a%irn) + call acc_update_self(a%idiag) + call acc_update_self(a%hkoffs) else if (a%is_host()) then - call z_oacc_hll_to_dev(a%val) - call i_oacc_hll_to_dev(a%ja) - call i_oacc_hll_to_dev_scalar(a%irn) - call i_oacc_hll_to_dev_scalar(a%idiag) - call i_oacc_hll_to_dev_scalar(a%hkoffs) + call acc_update_device(a%val) + call acc_update_device(a%ja) + call acc_update_device(a%irn) + call acc_update_device(a%idiag) + call acc_update_device(a%hkoffs) end if call tmpa%set_sync() end subroutine z_oacc_hll_sync - subroutine z_oacc_hll_to_host(v) - implicit none - complex(psb_dpk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine z_oacc_hll_to_host - - subroutine z_oacc_hll_to_dev(v) - implicit none - complex(psb_dpk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine z_oacc_hll_to_dev - - subroutine i_oacc_hll_to_host(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine i_oacc_hll_to_host - - subroutine i_oacc_hll_to_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine i_oacc_hll_to_dev - - subroutine i_oacc_hll_to_host_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine i_oacc_hll_to_host_scalar - - subroutine i_oacc_hll_to_dev_scalar(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine i_oacc_hll_to_dev_scalar - - end module psb_z_oacc_hll_mat_mod diff --git a/openacc/psb_z_oacc_vect_mod.F90 b/openacc/psb_z_oacc_vect_mod.F90 index bab1a0a0..5d6f07be 100644 --- a/openacc/psb_z_oacc_vect_mod.F90 +++ b/openacc/psb_z_oacc_vect_mod.F90 @@ -59,7 +59,7 @@ module psb_z_oacc_vect_mod procedure, pass(x) :: asum => z_oacc_asum procedure, pass(x) :: absval1 => z_oacc_absval1 procedure, pass(x) :: absval2 => z_oacc_absval2 - + final :: z_oacc_final_vect_free end type psb_z_vect_oacc interface @@ -164,21 +164,25 @@ contains implicit none class(psb_z_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(in) :: n - real(psb_dpk_) :: res - real(psb_dpk_) :: mx + real(psb_dpk_) :: res integer(psb_ipk_) :: info if (x%is_host()) call x%sync() - mx = z_oacc_amax(n,x) - res = z_inner_oacc_nrm2(n, mx, x%v) +!!$ write(0,*)'oacc_nrm2' + res = z_inner_oacc_nrm2(n, x%v) contains - function z_inner_oacc_nrm2(n, mx,x) result(res) + function z_inner_oacc_nrm2(n, x) result(res) integer(psb_ipk_) :: n complex(psb_dpk_) :: x(:) - real(psb_dpk_) :: mx, res - real(psb_dpk_) :: sum + real(psb_dpk_) :: res + real(psb_dpk_) :: sum, mx integer(psb_ipk_) :: i - sum = 0.0 + mx = dzero + !$acc parallel loop reduction(max:mx) + do i = 1, n + if (abs(x(i)) > mx) mx = abs(x(i)) + end do + sum = dzero !$acc parallel loop reduction(+:sum) do i = 1, n sum = sum + abs(x(i)/mx)**2 @@ -203,7 +207,7 @@ contains real(psb_dpk_) :: res real(psb_dpk_) :: max_val integer(psb_ipk_) :: i - max_val = -huge(0.0) + max_val = dzero !$acc parallel loop reduction(max:max_val) do i = 1, n if (abs(x(i)) > max_val) max_val = abs(x(i)) @@ -228,7 +232,7 @@ contains complex(psb_dpk_) :: x(:) real(psb_dpk_) :: res integer(psb_ipk_) :: i - res = 0.0 + res = dzero !$acc parallel loop reduction(+:res) do i = 1, n res = res + abs(x(i)) @@ -271,92 +275,6 @@ contains call z%set_host() end subroutine z_oacc_mlt_a_2 - -!!$ subroutine z_oacc_mlt_v(x, y, info) -!!$ implicit none -!!$ class(psb_z_base_vect_type), intent(inout) :: x -!!$ class(psb_z_vect_oacc), intent(inout) :: y -!!$ integer(psb_ipk_), intent(out) :: info -!!$ -!!$ integer(psb_ipk_) :: i, n -!!$ -!!$ info = 0 -!!$ n = min(x%get_nrows(), y%get_nrows()) -!!$ select type(xx => x) -!!$ type is (psb_z_base_vect_type) -!!$ if (y%is_dev()) call y%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ y%v(i) = y%v(i) * xx%v(i) -!!$ end do -!!$ call y%set_host() -!!$ class default -!!$ if (xx%is_dev()) call xx%sync() -!!$ if (y%is_dev()) call y%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ y%v(i) = y%v(i) * xx%v(i) -!!$ end do -!!$ call y%set_host() -!!$ end select -!!$ end subroutine z_oacc_mlt_v -!!$ -!!$ subroutine z_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) -!!$ use psi_serial_mod -!!$ use psb_string_mod -!!$ implicit none -!!$ complex(psb_dpk_), intent(in) :: alpha, beta -!!$ class(psb_z_base_vect_type), intent(inout) :: x -!!$ class(psb_z_base_vect_type), intent(inout) :: y -!!$ class(psb_z_vect_oacc), intent(inout) :: z -!!$ integer(psb_ipk_), intent(out) :: info -!!$ character(len=1), intent(in), optional :: conjgx, conjgy -!!$ integer(psb_ipk_) :: i, n -!!$ logical :: conjgx_, conjgy_ -!!$ -!!$ conjgx_ = .false. -!!$ conjgy_ = .false. -!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C') -!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C') -!!$ -!!$ n = min(x%get_nrows(), y%get_nrows(), z%get_nrows()) -!!$ -!!$ info = 0 -!!$ select type(xx => x) -!!$ class is (psb_z_vect_oacc) -!!$ select type (yy => y) -!!$ class is (psb_z_vect_oacc) -!!$ if (xx%is_host()) call xx%sync() -!!$ if (yy%is_host()) call yy%sync() -!!$ if ((beta /= zzero) .and. (z%is_host())) call z%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) -!!$ end do -!!$ call z%set_dev() -!!$ class default -!!$ if (xx%is_dev()) call xx%sync() -!!$ if (yy%is_dev()) call yy%sync() -!!$ if ((beta /= zzero) .and. (z%is_dev())) call z%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) -!!$ end do -!!$ call z%set_host() -!!$ end select -!!$ class default -!!$ if (x%is_dev()) call x%sync() -!!$ if (y%is_dev()) call y%sync() -!!$ if ((beta /= zzero) .and. (z%is_dev())) call z%sync() -!!$ !$acc parallel loop -!!$ do i = 1, n -!!$ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i) -!!$ end do -!!$ call z%set_host() -!!$ end select -!!$ end subroutine z_oacc_mlt_v_2 - - subroutine z_oacc_axpby_v(m, alpha, x, beta, y, info) !use psi_serial_mod implicit none @@ -414,7 +332,7 @@ contains integer(psb_ipk_) :: i if ((beta /= zzero) .and. (y%is_dev())) call y%sync() - !$acc parallel loop + do i = 1, m y%v(i) = alpha * x(i) + beta * y%v(i) end do @@ -432,44 +350,44 @@ contains integer(psb_ipk_), intent(out) :: info integer(psb_ipk_) :: nx, ny, nz, i logical :: gpu_done - write(0,*)'upd_xyz' + info = psb_success_ gpu_done = .false. select type(xx => x) class is (psb_z_vect_oacc) - select type(yy => y) + select type(yy => y) + class is (psb_z_vect_oacc) + select type(zz => z) class is (psb_z_vect_oacc) - select type(zz => z) - class is (psb_z_vect_oacc) - if ((beta /= zzero) .and. yy%is_host()) call yy%sync() - if ((delta /= zzero) .and. zz%is_host()) call zz%sync() - if (xx%is_host()) call xx%sync() - nx = size(xx%v) - ny = size(yy%v) - nz = size(zz%v) - if ((nx < m) .or. (ny < m) .or. (nz < m)) then - info = psb_err_internal_error_ - else - !$acc parallel loop - do i = 1, m - yy%v(i) = alpha * xx%v(i) + beta * yy%v(i) - zz%v(i) = gamma * yy%v(i) + delta * zz%v(i) - end do - end if - call yy%set_dev() - call zz%set_dev() - gpu_done = .true. - end select + if ((beta /= zzero) .and. yy%is_host()) call yy%sync() + if ((delta /= zzero) .and. zz%is_host()) call zz%sync() + if (xx%is_host()) call xx%sync() + nx = size(xx%v) + ny = size(yy%v) + nz = size(zz%v) + if ((nx < m) .or. (ny < m) .or. (nz < m)) then + info = psb_err_internal_error_ + else + !$acc parallel loop + do i = 1, m + yy%v(i) = alpha * xx%v(i) + beta * yy%v(i) + zz%v(i) = gamma * yy%v(i) + delta * zz%v(i) + end do + end if + call yy%set_dev() + call zz%set_dev() + gpu_done = .true. end select + end select end select if (.not. gpu_done) then - if (x%is_host()) call x%sync() - if (y%is_host()) call y%sync() - if (z%is_host()) call z%sync() - call y%axpby(m, alpha, x, beta, info) - call z%axpby(m, gamma, y, delta, info) + if (x%is_host()) call x%sync() + if (y%is_host()) call y%sync() + if (z%is_host()) call z%sync() + call y%axpby(m, alpha, x, beta, info) + call z%axpby(m, gamma, y, delta, info) end if end subroutine z_oacc_upd_xyz @@ -676,7 +594,7 @@ contains if (x%is_dev()) call x%sync() call x%psb_z_base_vect_type%ins(n, irl, val, dupl, info) call x%set_host() - !$acc update device(x%v) + end subroutine z_oacc_ins_a @@ -687,16 +605,14 @@ contains class(psb_z_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: info + call x%free(info) call x%all(n, info) if (info /= 0) then call psb_errpush(info, 'z_oacc_bld_mn', i_err=(/n, n, n, n, n/)) end if call x%set_host() - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if - !$acc enter data copyin(x%v) - + call x%sync_space() + end subroutine z_oacc_bld_mn @@ -707,6 +623,7 @@ contains class(psb_z_vect_oacc), intent(inout) :: x integer(psb_ipk_) :: info + call x%free(info) call psb_realloc(size(this), x%v, info) if (info /= 0) then info = psb_err_alloc_request_ @@ -714,13 +631,9 @@ contains i_err=(/size(this), izero, izero, izero, izero/)) return end if - x%v(:) = this(:) call x%set_host() - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if - !$acc enter data copyin(x%v) + call x%sync_space() end subroutine z_oacc_bld_x @@ -848,54 +761,21 @@ contains end function z_oacc_dot_a - ! subroutine z_oacc_set_vect(x,y) - ! implicit none - ! class(psb_z_vect_oacc), intent(inout) :: x - ! complex(psb_dpk_), intent(in) :: y(:) - ! integer(psb_ipk_) :: info - - ! if (size(x%v) /= size(y)) then - ! call x%free(info) - ! call x%all(size(y),info) - ! end if - ! x%v(:) = y(:) - ! call x%set_host() - ! end subroutine z_oacc_set_vect - - subroutine z_oacc_to_dev(v) - implicit none - complex(psb_dpk_) :: v(:) - !$acc update device(v) - end subroutine z_oacc_to_dev - - subroutine z_oacc_to_host(v) - implicit none - complex(psb_dpk_) :: v(:) - !$acc update self(v) - end subroutine z_oacc_to_host subroutine z_oacc_sync_space(x) implicit none class(psb_z_vect_oacc), intent(inout) :: x - if (allocated(x%v)) then - if (.not.acc_is_present(x%v)) call z_oacc_create_dev(x%v) - end if - contains - subroutine z_oacc_create_dev(v) - implicit none - complex(psb_dpk_) :: v(:) - !$acc enter data copyin(v) - end subroutine z_oacc_create_dev + if (allocated(x%v)) call acc_create(x%v) end subroutine z_oacc_sync_space subroutine z_oacc_sync(x) implicit none class(psb_z_vect_oacc), intent(inout) :: x if (x%is_dev()) then - call z_oacc_to_host(x%v) + call acc_update_self(x%v) end if if (x%is_host()) then - call z_oacc_to_dev(x%v) + call acc_update_device(x%v) end if call x%set_sync() end subroutine z_oacc_sync @@ -954,33 +834,36 @@ contains integer(psb_ipk_), intent(out) :: info call psb_realloc(n, x%v, info) - if (info == 0) then - call x%set_host() - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if - !$acc enter data create(x%v) - call x%sync_space() - end if if (info /= 0) then info = psb_err_alloc_request_ call psb_errpush(info, 'z_oacc_all', & i_err=(/n, n, n, n, n/)) end if + call x%set_host() + call x%sync_space() end subroutine z_oacc_vect_all + subroutine z_oacc_final_vect_free(x) + implicit none + type(psb_z_vect_oacc), intent(inout) :: x + integer(psb_ipk_) :: info + info = 0 + if (allocated(x%v)) then + if (acc_is_present(x%v)) call acc_delete_finalize(x%v) + deallocate(x%v, stat=info) + end if + + end subroutine z_oacc_final_vect_free + subroutine z_oacc_vect_free(x, info) implicit none class(psb_z_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 if (allocated(x%v)) then - if (acc_is_present(x%v)) then - !$acc exit data delete(x%v) finalize - end if + if (acc_is_present(x%v)) call acc_delete_finalize(x%v) deallocate(x%v, stat=info) end if - end subroutine z_oacc_vect_free function z_oacc_get_size(x) result(res) From fbb974fb8b6e71cd7970e2101b0adf6cca32a724 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Fri, 30 Aug 2024 10:20:58 +0200 Subject: [PATCH 34/39] Change name sync|free space, unify allocate impl --- openacc/impl/psb_c_oacc_csr_allocate_mnnz.F90 | 10 +- openacc/impl/psb_c_oacc_csr_cp_from_coo.F90 | 4 +- openacc/impl/psb_c_oacc_csr_cp_from_fmt.F90 | 4 +- openacc/impl/psb_c_oacc_csr_inner_vect_sv.F90 | 4 +- openacc/impl/psb_c_oacc_csr_mv_from_coo.F90 | 4 +- openacc/impl/psb_c_oacc_csr_mv_from_fmt.F90 | 4 +- openacc/impl/psb_c_oacc_csr_reallocate_nz.F90 | 4 +- openacc/impl/psb_c_oacc_ell_allocate_mnnz.F90 | 16 +-- openacc/impl/psb_c_oacc_ell_cp_from_coo.F90 | 4 +- openacc/impl/psb_c_oacc_ell_cp_from_fmt.F90 | 4 +- openacc/impl/psb_c_oacc_ell_inner_vect_sv.F90 | 4 +- openacc/impl/psb_c_oacc_ell_mv_from_coo.F90 | 4 +- openacc/impl/psb_c_oacc_ell_mv_from_fmt.F90 | 4 +- openacc/impl/psb_c_oacc_ell_reallocate_nz.F90 | 4 +- openacc/impl/psb_c_oacc_hll_allocate_mnnz.F90 | 21 +-- openacc/impl/psb_c_oacc_hll_cp_from_coo.F90 | 4 +- openacc/impl/psb_c_oacc_hll_cp_from_fmt.F90 | 4 +- openacc/impl/psb_c_oacc_hll_inner_vect_sv.F90 | 4 +- openacc/impl/psb_c_oacc_hll_mv_from_coo.F90 | 4 +- openacc/impl/psb_c_oacc_hll_mv_from_fmt.F90 | 4 +- openacc/impl/psb_c_oacc_hll_reallocate_nz.F90 | 4 +- openacc/impl/psb_c_oacc_mlt_v.f90 | 4 +- openacc/impl/psb_c_oacc_mlt_v_2.f90 | 6 +- openacc/impl/psb_d_oacc_csr_allocate_mnnz.F90 | 10 +- openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 | 4 +- openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 | 4 +- openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90 | 4 +- openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 | 4 +- openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 | 4 +- openacc/impl/psb_d_oacc_csr_reallocate_nz.F90 | 4 +- openacc/impl/psb_d_oacc_ell_allocate_mnnz.F90 | 16 +-- openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 | 4 +- openacc/impl/psb_d_oacc_ell_cp_from_fmt.F90 | 4 +- openacc/impl/psb_d_oacc_ell_inner_vect_sv.F90 | 4 +- openacc/impl/psb_d_oacc_ell_mv_from_coo.F90 | 4 +- openacc/impl/psb_d_oacc_ell_mv_from_fmt.F90 | 4 +- openacc/impl/psb_d_oacc_ell_reallocate_nz.F90 | 4 +- openacc/impl/psb_d_oacc_hll_allocate_mnnz.F90 | 21 +-- openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 | 4 +- openacc/impl/psb_d_oacc_hll_cp_from_fmt.F90 | 4 +- openacc/impl/psb_d_oacc_hll_inner_vect_sv.F90 | 4 +- openacc/impl/psb_d_oacc_hll_mv_from_coo.F90 | 4 +- openacc/impl/psb_d_oacc_hll_mv_from_fmt.F90 | 4 +- openacc/impl/psb_d_oacc_hll_reallocate_nz.F90 | 4 +- openacc/impl/psb_d_oacc_mlt_v.f90 | 4 +- openacc/impl/psb_d_oacc_mlt_v_2.f90 | 6 +- openacc/impl/psb_s_oacc_csr_allocate_mnnz.F90 | 10 +- openacc/impl/psb_s_oacc_csr_cp_from_coo.F90 | 4 +- openacc/impl/psb_s_oacc_csr_cp_from_fmt.F90 | 4 +- openacc/impl/psb_s_oacc_csr_inner_vect_sv.F90 | 4 +- openacc/impl/psb_s_oacc_csr_mv_from_coo.F90 | 4 +- openacc/impl/psb_s_oacc_csr_mv_from_fmt.F90 | 4 +- openacc/impl/psb_s_oacc_csr_reallocate_nz.F90 | 4 +- openacc/impl/psb_s_oacc_ell_allocate_mnnz.F90 | 16 +-- openacc/impl/psb_s_oacc_ell_cp_from_coo.F90 | 4 +- openacc/impl/psb_s_oacc_ell_cp_from_fmt.F90 | 4 +- openacc/impl/psb_s_oacc_ell_inner_vect_sv.F90 | 4 +- openacc/impl/psb_s_oacc_ell_mv_from_coo.F90 | 4 +- openacc/impl/psb_s_oacc_ell_mv_from_fmt.F90 | 4 +- openacc/impl/psb_s_oacc_ell_reallocate_nz.F90 | 4 +- openacc/impl/psb_s_oacc_hll_allocate_mnnz.F90 | 21 +-- openacc/impl/psb_s_oacc_hll_cp_from_coo.F90 | 4 +- openacc/impl/psb_s_oacc_hll_cp_from_fmt.F90 | 4 +- openacc/impl/psb_s_oacc_hll_inner_vect_sv.F90 | 4 +- openacc/impl/psb_s_oacc_hll_mv_from_coo.F90 | 4 +- openacc/impl/psb_s_oacc_hll_mv_from_fmt.F90 | 4 +- openacc/impl/psb_s_oacc_hll_reallocate_nz.F90 | 4 +- openacc/impl/psb_s_oacc_mlt_v.f90 | 4 +- openacc/impl/psb_s_oacc_mlt_v_2.f90 | 6 +- openacc/impl/psb_z_oacc_csr_allocate_mnnz.F90 | 10 +- openacc/impl/psb_z_oacc_csr_cp_from_coo.F90 | 4 +- openacc/impl/psb_z_oacc_csr_cp_from_fmt.F90 | 4 +- openacc/impl/psb_z_oacc_csr_inner_vect_sv.F90 | 4 +- openacc/impl/psb_z_oacc_csr_mv_from_coo.F90 | 4 +- openacc/impl/psb_z_oacc_csr_mv_from_fmt.F90 | 4 +- openacc/impl/psb_z_oacc_csr_reallocate_nz.F90 | 4 +- openacc/impl/psb_z_oacc_ell_allocate_mnnz.F90 | 16 +-- openacc/impl/psb_z_oacc_ell_cp_from_coo.F90 | 4 +- openacc/impl/psb_z_oacc_ell_cp_from_fmt.F90 | 4 +- openacc/impl/psb_z_oacc_ell_inner_vect_sv.F90 | 4 +- openacc/impl/psb_z_oacc_ell_mv_from_coo.F90 | 4 +- openacc/impl/psb_z_oacc_ell_mv_from_fmt.F90 | 4 +- openacc/impl/psb_z_oacc_ell_reallocate_nz.F90 | 4 +- openacc/impl/psb_z_oacc_hll_allocate_mnnz.F90 | 21 +-- openacc/impl/psb_z_oacc_hll_cp_from_coo.F90 | 4 +- openacc/impl/psb_z_oacc_hll_cp_from_fmt.F90 | 4 +- openacc/impl/psb_z_oacc_hll_inner_vect_sv.F90 | 4 +- openacc/impl/psb_z_oacc_hll_mv_from_coo.F90 | 4 +- openacc/impl/psb_z_oacc_hll_mv_from_fmt.F90 | 4 +- openacc/impl/psb_z_oacc_hll_reallocate_nz.F90 | 4 +- openacc/impl/psb_z_oacc_mlt_v.f90 | 4 +- openacc/impl/psb_z_oacc_mlt_v_2.f90 | 6 +- openacc/psb_c_oacc_csr_mat_mod.F90 | 66 ++++----- openacc/psb_c_oacc_ell_mat_mod.F90 | 62 ++++----- openacc/psb_c_oacc_hll_mat_mod.F90 | 62 ++++----- openacc/psb_c_oacc_vect_mod.F90 | 126 +++++++++--------- openacc/psb_d_oacc_csr_mat_mod.F90 | 66 ++++----- openacc/psb_d_oacc_ell_mat_mod.F90 | 62 ++++----- openacc/psb_d_oacc_hll_mat_mod.F90 | 62 ++++----- openacc/psb_d_oacc_vect_mod.F90 | 126 +++++++++--------- openacc/psb_i_oacc_vect_mod.F90 | 82 ++++++------ openacc/psb_l_oacc_vect_mod.F90 | 82 ++++++------ openacc/psb_s_oacc_csr_mat_mod.F90 | 66 ++++----- openacc/psb_s_oacc_ell_mat_mod.F90 | 62 ++++----- openacc/psb_s_oacc_hll_mat_mod.F90 | 62 ++++----- openacc/psb_s_oacc_vect_mod.F90 | 126 +++++++++--------- openacc/psb_z_oacc_csr_mat_mod.F90 | 66 ++++----- openacc/psb_z_oacc_ell_mat_mod.F90 | 62 ++++----- openacc/psb_z_oacc_hll_mat_mod.F90 | 62 ++++----- openacc/psb_z_oacc_vect_mod.F90 | 126 +++++++++--------- 110 files changed, 902 insertions(+), 1042 deletions(-) diff --git a/openacc/impl/psb_c_oacc_csr_allocate_mnnz.F90 b/openacc/impl/psb_c_oacc_csr_allocate_mnnz.F90 index 09cdc228..a6fe9b9c 100644 --- a/openacc/impl/psb_c_oacc_csr_allocate_mnnz.F90 +++ b/openacc/impl/psb_c_oacc_csr_allocate_mnnz.F90 @@ -15,14 +15,8 @@ contains info = psb_success_ call a%psb_c_csr_sparse_mat%allocate(m, n, nz) - - if (.not.allocated(a%val)) then - allocate(a%val(nz)) - allocate(a%ja(nz)) - allocate(a%irp(m+1)) - end if - - call a%set_dev() + call a%set_host() + call a%sync_dev_space() if (info /= 0) goto 9999 call psb_erractionrestore(err_act) diff --git a/openacc/impl/psb_c_oacc_csr_cp_from_coo.F90 b/openacc/impl/psb_c_oacc_csr_cp_from_coo.F90 index a411cc6a..ab5f514b 100644 --- a/openacc/impl/psb_c_oacc_csr_cp_from_coo.F90 +++ b/openacc/impl/psb_c_oacc_csr_cp_from_coo.F90 @@ -10,10 +10,10 @@ contains info = psb_success_ - call a%free_space() + call a%free_dev_space() call a%psb_c_csr_sparse_mat%cp_from_coo(b, info) if (info /= 0) goto 9999 - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() diff --git a/openacc/impl/psb_c_oacc_csr_cp_from_fmt.F90 b/openacc/impl/psb_c_oacc_csr_cp_from_fmt.F90 index a8cd93a0..c6287cbb 100644 --- a/openacc/impl/psb_c_oacc_csr_cp_from_fmt.F90 +++ b/openacc/impl/psb_c_oacc_csr_cp_from_fmt.F90 @@ -14,10 +14,10 @@ contains type is (psb_c_coo_sparse_mat) call a%cp_from_coo(b, info) class default - call a%free_space() + call a%free_dev_space() call a%psb_c_csr_sparse_mat%cp_from_fmt(b, info) if (info /= 0) return - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() end select diff --git a/openacc/impl/psb_c_oacc_csr_inner_vect_sv.F90 b/openacc/impl/psb_c_oacc_csr_inner_vect_sv.F90 index 1dee9f2e..833140f4 100644 --- a/openacc/impl/psb_c_oacc_csr_inner_vect_sv.F90 +++ b/openacc/impl/psb_c_oacc_csr_inner_vect_sv.F90 @@ -40,9 +40,9 @@ contains call a%psb_c_csr_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans) call y%set_host() else - select type (xx => x) + select type (xx => x) type is (psb_c_vect_oacc) - select type(yy => y) + select type(yy => y) type is (psb_c_vect_oacc) if (xx%is_host()) call xx%sync() if (beta /= dzero) then diff --git a/openacc/impl/psb_c_oacc_csr_mv_from_coo.F90 b/openacc/impl/psb_c_oacc_csr_mv_from_coo.F90 index 30691030..73f02b84 100644 --- a/openacc/impl/psb_c_oacc_csr_mv_from_coo.F90 +++ b/openacc/impl/psb_c_oacc_csr_mv_from_coo.F90 @@ -10,10 +10,10 @@ contains info = psb_success_ - call a%free_space() + call a%free_dev_space() call a%psb_c_csr_sparse_mat%mv_from_coo(b, info) if (info /= 0) goto 9999 - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() diff --git a/openacc/impl/psb_c_oacc_csr_mv_from_fmt.F90 b/openacc/impl/psb_c_oacc_csr_mv_from_fmt.F90 index b37011c0..853a3269 100644 --- a/openacc/impl/psb_c_oacc_csr_mv_from_fmt.F90 +++ b/openacc/impl/psb_c_oacc_csr_mv_from_fmt.F90 @@ -14,10 +14,10 @@ contains type is (psb_c_coo_sparse_mat) call a%mv_from_coo(b, info) class default - call a%free_space() + call a%free_dev_space() call a%psb_c_csr_sparse_mat%mv_from_fmt(b, info) if (info /= 0) return - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() end select diff --git a/openacc/impl/psb_c_oacc_csr_reallocate_nz.F90 b/openacc/impl/psb_c_oacc_csr_reallocate_nz.F90 index 92a53116..e7dc970a 100644 --- a/openacc/impl/psb_c_oacc_csr_reallocate_nz.F90 +++ b/openacc/impl/psb_c_oacc_csr_reallocate_nz.F90 @@ -14,8 +14,8 @@ contains info = psb_success_ call a%psb_c_csr_sparse_mat%reallocate(nz) - - call a%set_dev() + call a%sync_dev_space() + call a%set_host() if (info /= 0) goto 9999 call psb_erractionrestore(err_act) diff --git a/openacc/impl/psb_c_oacc_ell_allocate_mnnz.F90 b/openacc/impl/psb_c_oacc_ell_allocate_mnnz.F90 index 594f3e88..5a363434 100644 --- a/openacc/impl/psb_c_oacc_ell_allocate_mnnz.F90 +++ b/openacc/impl/psb_c_oacc_ell_allocate_mnnz.F90 @@ -21,20 +21,8 @@ contains end if call a%psb_c_ell_sparse_mat%allocate(m, n, nz_) - - if (.not.allocated(a%val)) then - allocate(a%val(m, nz_)) - allocate(a%ja(m, nz_)) - allocate(a%irn(m)) - allocate(a%idiag(m)) - end if - - a%val = czero - a%ja = -1 - a%irn = 0 - a%idiag = 0 - - call a%set_dev() + call a%sync_dev_space() + call a%set_host() if (info /= 0) goto 9999 call psb_erractionrestore(err_act) diff --git a/openacc/impl/psb_c_oacc_ell_cp_from_coo.F90 b/openacc/impl/psb_c_oacc_ell_cp_from_coo.F90 index 836874fe..47118e48 100644 --- a/openacc/impl/psb_c_oacc_ell_cp_from_coo.F90 +++ b/openacc/impl/psb_c_oacc_ell_cp_from_coo.F90 @@ -10,10 +10,10 @@ contains info = psb_success_ - call a%free_space() + call a%free_dev_space() call a%psb_c_ell_sparse_mat%cp_from_coo(b, info) if (info /= 0) goto 9999 - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() diff --git a/openacc/impl/psb_c_oacc_ell_cp_from_fmt.F90 b/openacc/impl/psb_c_oacc_ell_cp_from_fmt.F90 index 31d6c4b4..559cb65c 100644 --- a/openacc/impl/psb_c_oacc_ell_cp_from_fmt.F90 +++ b/openacc/impl/psb_c_oacc_ell_cp_from_fmt.F90 @@ -14,10 +14,10 @@ contains type is (psb_c_coo_sparse_mat) call a%cp_from_coo(b, info) class default - call a%free_space() + call a%free_dev_space() call a%psb_c_ell_sparse_mat%cp_from_fmt(b, info) if (info /= 0) return - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() end select diff --git a/openacc/impl/psb_c_oacc_ell_inner_vect_sv.F90 b/openacc/impl/psb_c_oacc_ell_inner_vect_sv.F90 index df642ee8..cfabcb78 100644 --- a/openacc/impl/psb_c_oacc_ell_inner_vect_sv.F90 +++ b/openacc/impl/psb_c_oacc_ell_inner_vect_sv.F90 @@ -40,9 +40,9 @@ contains call a%psb_c_ell_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans) call y%set_host() else - select type (xx => x) + select type (xx => x) type is (psb_c_vect_oacc) - select type(yy => y) + select type(yy => y) type is (psb_c_vect_oacc) if (xx%is_host()) call xx%sync() if (beta /= dzero) then diff --git a/openacc/impl/psb_c_oacc_ell_mv_from_coo.F90 b/openacc/impl/psb_c_oacc_ell_mv_from_coo.F90 index 1ca43435..183be9a5 100644 --- a/openacc/impl/psb_c_oacc_ell_mv_from_coo.F90 +++ b/openacc/impl/psb_c_oacc_ell_mv_from_coo.F90 @@ -10,10 +10,10 @@ contains info = psb_success_ - call a%free_space() + call a%free_dev_space() call a%psb_c_ell_sparse_mat%mv_from_coo(b, info) if (info /= 0) goto 9999 - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() diff --git a/openacc/impl/psb_c_oacc_ell_mv_from_fmt.F90 b/openacc/impl/psb_c_oacc_ell_mv_from_fmt.F90 index 95798429..d1bd6330 100644 --- a/openacc/impl/psb_c_oacc_ell_mv_from_fmt.F90 +++ b/openacc/impl/psb_c_oacc_ell_mv_from_fmt.F90 @@ -14,10 +14,10 @@ contains type is (psb_c_coo_sparse_mat) call a%mv_from_coo(b, info) class default - call a%free_space() + call a%free_dev_space() call a%psb_c_ell_sparse_mat%mv_from_fmt(b, info) if (info /= 0) return - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() end select diff --git a/openacc/impl/psb_c_oacc_ell_reallocate_nz.F90 b/openacc/impl/psb_c_oacc_ell_reallocate_nz.F90 index 9f21c5df..24d153f6 100644 --- a/openacc/impl/psb_c_oacc_ell_reallocate_nz.F90 +++ b/openacc/impl/psb_c_oacc_ell_reallocate_nz.F90 @@ -14,8 +14,8 @@ contains info = psb_success_ call a%psb_c_ell_sparse_mat%reallocate(nz) - - call a%set_dev() + call a%sync_dev_space() + call a%set_host() if (info /= 0) goto 9999 call psb_erractionrestore(err_act) diff --git a/openacc/impl/psb_c_oacc_hll_allocate_mnnz.F90 b/openacc/impl/psb_c_oacc_hll_allocate_mnnz.F90 index 0840d0d6..c90fc652 100644 --- a/openacc/impl/psb_c_oacc_hll_allocate_mnnz.F90 +++ b/openacc/impl/psb_c_oacc_hll_allocate_mnnz.F90 @@ -22,25 +22,8 @@ contains end if call a%psb_c_hll_sparse_mat%allocate(m, n, nz_) - - hksz = a%hksz - nhacks = (m + hksz - 1) / hksz - - if (.not.allocated(a%val)) then - allocate(a%val(nz_ * m)) - allocate(a%ja(nz_ * m)) - allocate(a%irn(m)) - allocate(a%idiag(m)) - allocate(a%hkoffs(nhacks)) - end if - - a%val = czero - a%ja = -1 - a%irn = 0 - a%idiag = 0 - a%hkoffs = 0 - - call a%set_dev() + call a%sync_dev_space() + call a%set_host() if (info /= 0) goto 9999 call psb_erractionrestore(err_act) diff --git a/openacc/impl/psb_c_oacc_hll_cp_from_coo.F90 b/openacc/impl/psb_c_oacc_hll_cp_from_coo.F90 index 32391cc8..0eaebf9d 100644 --- a/openacc/impl/psb_c_oacc_hll_cp_from_coo.F90 +++ b/openacc/impl/psb_c_oacc_hll_cp_from_coo.F90 @@ -10,10 +10,10 @@ contains info = psb_success_ - call a%free_space() + call a%free_dev_space() call a%psb_c_hll_sparse_mat%cp_from_coo(b, info) if (info /= 0) goto 9999 - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() diff --git a/openacc/impl/psb_c_oacc_hll_cp_from_fmt.F90 b/openacc/impl/psb_c_oacc_hll_cp_from_fmt.F90 index e442b668..29b18470 100644 --- a/openacc/impl/psb_c_oacc_hll_cp_from_fmt.F90 +++ b/openacc/impl/psb_c_oacc_hll_cp_from_fmt.F90 @@ -14,10 +14,10 @@ contains type is (psb_c_coo_sparse_mat) call a%cp_from_coo(b, info) class default - call a%free_space() + call a%free_dev_space() call a%psb_c_hll_sparse_mat%cp_from_fmt(b, info) if (info /= 0) return - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() end select diff --git a/openacc/impl/psb_c_oacc_hll_inner_vect_sv.F90 b/openacc/impl/psb_c_oacc_hll_inner_vect_sv.F90 index a8d486b2..4b45f518 100644 --- a/openacc/impl/psb_c_oacc_hll_inner_vect_sv.F90 +++ b/openacc/impl/psb_c_oacc_hll_inner_vect_sv.F90 @@ -40,9 +40,9 @@ contains call a%psb_c_hll_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans) call y%set_host() else - select type (xx => x) + select type (xx => x) type is (psb_c_vect_oacc) - select type(yy => y) + select type(yy => y) type is (psb_c_vect_oacc) if (xx%is_host()) call xx%sync() if (beta /= dzero) then diff --git a/openacc/impl/psb_c_oacc_hll_mv_from_coo.F90 b/openacc/impl/psb_c_oacc_hll_mv_from_coo.F90 index 30d723fe..193f9753 100644 --- a/openacc/impl/psb_c_oacc_hll_mv_from_coo.F90 +++ b/openacc/impl/psb_c_oacc_hll_mv_from_coo.F90 @@ -10,10 +10,10 @@ contains info = psb_success_ - call a%free_space() + call a%free_dev_space() call a%psb_c_hll_sparse_mat%mv_from_coo(b, info) if (info /= 0) goto 9999 - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() diff --git a/openacc/impl/psb_c_oacc_hll_mv_from_fmt.F90 b/openacc/impl/psb_c_oacc_hll_mv_from_fmt.F90 index 0ac69af8..1c928067 100644 --- a/openacc/impl/psb_c_oacc_hll_mv_from_fmt.F90 +++ b/openacc/impl/psb_c_oacc_hll_mv_from_fmt.F90 @@ -14,10 +14,10 @@ contains type is (psb_c_coo_sparse_mat) call a%mv_from_coo(b, info) class default - call a%free_space() + call a%free_dev_space() call a%psb_c_hll_sparse_mat%mv_from_fmt(b, info) if (info /= 0) return - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() end select diff --git a/openacc/impl/psb_c_oacc_hll_reallocate_nz.F90 b/openacc/impl/psb_c_oacc_hll_reallocate_nz.F90 index 52983d4e..9290c381 100644 --- a/openacc/impl/psb_c_oacc_hll_reallocate_nz.F90 +++ b/openacc/impl/psb_c_oacc_hll_reallocate_nz.F90 @@ -15,8 +15,8 @@ contains info = psb_success_ call a%psb_c_hll_sparse_mat%reallocate(nz) - - call a%set_dev() + call a%sync_dev_space() + call a%set_host() if (info /= 0) goto 9999 call psb_erractionrestore(err_act) diff --git a/openacc/impl/psb_c_oacc_mlt_v.f90 b/openacc/impl/psb_c_oacc_mlt_v.f90 index a366543a..e5f215be 100644 --- a/openacc/impl/psb_c_oacc_mlt_v.f90 +++ b/openacc/impl/psb_c_oacc_mlt_v.f90 @@ -1,6 +1,6 @@ subroutine psb_c_oacc_mlt_v(x, y, info) - use psb_c_oacc_vect_mod, psb_protect_name => psb_c_oacc_mlt_v + use psb_c_oacc_vect_mod, psb_protect_name => psb_c_oacc_mlt_v implicit none class(psb_c_base_vect_type), intent(inout) :: x @@ -13,7 +13,7 @@ subroutine psb_c_oacc_mlt_v(x, y, info) n = min(x%get_nrows(), y%get_nrows()) info = 0 n = min(x%get_nrows(), y%get_nrows()) - select type(xx => x) + select type(xx => x) class is (psb_c_vect_oacc) if (y%is_host()) call y%sync() if (xx%is_host()) call xx%sync() diff --git a/openacc/impl/psb_c_oacc_mlt_v_2.f90 b/openacc/impl/psb_c_oacc_mlt_v_2.f90 index f7bceae7..b47cd1ae 100644 --- a/openacc/impl/psb_c_oacc_mlt_v_2.f90 +++ b/openacc/impl/psb_c_oacc_mlt_v_2.f90 @@ -1,5 +1,5 @@ subroutine psb_c_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) - use psb_c_oacc_vect_mod, psb_protect_name => psb_c_oacc_mlt_v_2 + use psb_c_oacc_vect_mod, psb_protect_name => psb_c_oacc_mlt_v_2 use psb_string_mod implicit none complex(psb_spk_), intent(in) :: alpha, beta @@ -18,9 +18,9 @@ subroutine psb_c_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) n = min(x%get_nrows(), y%get_nrows(), z%get_nrows()) info = 0 - select type(xx => x) + select type(xx => x) class is (psb_c_vect_oacc) - select type (yy => y) + select type (yy => y) class is (psb_c_vect_oacc) if (xx%is_host()) call xx%sync() if (yy%is_host()) call yy%sync() diff --git a/openacc/impl/psb_d_oacc_csr_allocate_mnnz.F90 b/openacc/impl/psb_d_oacc_csr_allocate_mnnz.F90 index 5281edee..1f210a09 100644 --- a/openacc/impl/psb_d_oacc_csr_allocate_mnnz.F90 +++ b/openacc/impl/psb_d_oacc_csr_allocate_mnnz.F90 @@ -15,14 +15,8 @@ contains info = psb_success_ call a%psb_d_csr_sparse_mat%allocate(m, n, nz) - - if (.not.allocated(a%val)) then - allocate(a%val(nz)) - allocate(a%ja(nz)) - allocate(a%irp(m+1)) - end if - - call a%set_dev() + call a%set_host() + call a%sync_dev_space() if (info /= 0) goto 9999 call psb_erractionrestore(err_act) diff --git a/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 b/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 index 50da7692..92770e0f 100644 --- a/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 +++ b/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 @@ -10,10 +10,10 @@ contains info = psb_success_ - call a%free_space() + call a%free_dev_space() call a%psb_d_csr_sparse_mat%cp_from_coo(b, info) if (info /= 0) goto 9999 - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() diff --git a/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 b/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 index 0d35e247..9da6d861 100644 --- a/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 +++ b/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 @@ -14,10 +14,10 @@ contains type is (psb_d_coo_sparse_mat) call a%cp_from_coo(b, info) class default - call a%free_space() + call a%free_dev_space() call a%psb_d_csr_sparse_mat%cp_from_fmt(b, info) if (info /= 0) return - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() end select diff --git a/openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90 b/openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90 index 5f0ef4a0..c9a875bc 100644 --- a/openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90 +++ b/openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90 @@ -40,9 +40,9 @@ contains call a%psb_d_csr_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans) call y%set_host() else - select type (xx => x) + select type (xx => x) type is (psb_d_vect_oacc) - select type(yy => y) + select type(yy => y) type is (psb_d_vect_oacc) if (xx%is_host()) call xx%sync() if (beta /= dzero) then diff --git a/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 b/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 index 97fa07d1..0f020e06 100644 --- a/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 +++ b/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 @@ -10,10 +10,10 @@ contains info = psb_success_ - call a%free_space() + call a%free_dev_space() call a%psb_d_csr_sparse_mat%mv_from_coo(b, info) if (info /= 0) goto 9999 - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() diff --git a/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 b/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 index e992f41a..cfd7d7dd 100644 --- a/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 +++ b/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 @@ -14,10 +14,10 @@ contains type is (psb_d_coo_sparse_mat) call a%mv_from_coo(b, info) class default - call a%free_space() + call a%free_dev_space() call a%psb_d_csr_sparse_mat%mv_from_fmt(b, info) if (info /= 0) return - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() end select diff --git a/openacc/impl/psb_d_oacc_csr_reallocate_nz.F90 b/openacc/impl/psb_d_oacc_csr_reallocate_nz.F90 index c345f681..8f746105 100644 --- a/openacc/impl/psb_d_oacc_csr_reallocate_nz.F90 +++ b/openacc/impl/psb_d_oacc_csr_reallocate_nz.F90 @@ -14,8 +14,8 @@ contains info = psb_success_ call a%psb_d_csr_sparse_mat%reallocate(nz) - - call a%set_dev() + call a%sync_dev_space() + call a%set_host() if (info /= 0) goto 9999 call psb_erractionrestore(err_act) diff --git a/openacc/impl/psb_d_oacc_ell_allocate_mnnz.F90 b/openacc/impl/psb_d_oacc_ell_allocate_mnnz.F90 index b46c5454..ca0a0f84 100644 --- a/openacc/impl/psb_d_oacc_ell_allocate_mnnz.F90 +++ b/openacc/impl/psb_d_oacc_ell_allocate_mnnz.F90 @@ -21,20 +21,8 @@ contains end if call a%psb_d_ell_sparse_mat%allocate(m, n, nz_) - - if (.not.allocated(a%val)) then - allocate(a%val(m, nz_)) - allocate(a%ja(m, nz_)) - allocate(a%irn(m)) - allocate(a%idiag(m)) - end if - - a%val = dzero - a%ja = -1 - a%irn = 0 - a%idiag = 0 - - call a%set_dev() + call a%sync_dev_space() + call a%set_host() if (info /= 0) goto 9999 call psb_erractionrestore(err_act) diff --git a/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 b/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 index 6c24098e..384701ce 100644 --- a/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 +++ b/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 @@ -10,10 +10,10 @@ contains info = psb_success_ - call a%free_space() + call a%free_dev_space() call a%psb_d_ell_sparse_mat%cp_from_coo(b, info) if (info /= 0) goto 9999 - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() diff --git a/openacc/impl/psb_d_oacc_ell_cp_from_fmt.F90 b/openacc/impl/psb_d_oacc_ell_cp_from_fmt.F90 index 991681e9..6622a642 100644 --- a/openacc/impl/psb_d_oacc_ell_cp_from_fmt.F90 +++ b/openacc/impl/psb_d_oacc_ell_cp_from_fmt.F90 @@ -14,10 +14,10 @@ contains type is (psb_d_coo_sparse_mat) call a%cp_from_coo(b, info) class default - call a%free_space() + call a%free_dev_space() call a%psb_d_ell_sparse_mat%cp_from_fmt(b, info) if (info /= 0) return - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() end select diff --git a/openacc/impl/psb_d_oacc_ell_inner_vect_sv.F90 b/openacc/impl/psb_d_oacc_ell_inner_vect_sv.F90 index 2c5563ce..466594fb 100644 --- a/openacc/impl/psb_d_oacc_ell_inner_vect_sv.F90 +++ b/openacc/impl/psb_d_oacc_ell_inner_vect_sv.F90 @@ -40,9 +40,9 @@ contains call a%psb_d_ell_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans) call y%set_host() else - select type (xx => x) + select type (xx => x) type is (psb_d_vect_oacc) - select type(yy => y) + select type(yy => y) type is (psb_d_vect_oacc) if (xx%is_host()) call xx%sync() if (beta /= dzero) then diff --git a/openacc/impl/psb_d_oacc_ell_mv_from_coo.F90 b/openacc/impl/psb_d_oacc_ell_mv_from_coo.F90 index 9214ba3f..7bddced9 100644 --- a/openacc/impl/psb_d_oacc_ell_mv_from_coo.F90 +++ b/openacc/impl/psb_d_oacc_ell_mv_from_coo.F90 @@ -10,10 +10,10 @@ contains info = psb_success_ - call a%free_space() + call a%free_dev_space() call a%psb_d_ell_sparse_mat%mv_from_coo(b, info) if (info /= 0) goto 9999 - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() diff --git a/openacc/impl/psb_d_oacc_ell_mv_from_fmt.F90 b/openacc/impl/psb_d_oacc_ell_mv_from_fmt.F90 index 292165fc..53e45b98 100644 --- a/openacc/impl/psb_d_oacc_ell_mv_from_fmt.F90 +++ b/openacc/impl/psb_d_oacc_ell_mv_from_fmt.F90 @@ -14,10 +14,10 @@ contains type is (psb_d_coo_sparse_mat) call a%mv_from_coo(b, info) class default - call a%free_space() + call a%free_dev_space() call a%psb_d_ell_sparse_mat%mv_from_fmt(b, info) if (info /= 0) return - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() end select diff --git a/openacc/impl/psb_d_oacc_ell_reallocate_nz.F90 b/openacc/impl/psb_d_oacc_ell_reallocate_nz.F90 index 11332472..130a931e 100644 --- a/openacc/impl/psb_d_oacc_ell_reallocate_nz.F90 +++ b/openacc/impl/psb_d_oacc_ell_reallocate_nz.F90 @@ -14,8 +14,8 @@ contains info = psb_success_ call a%psb_d_ell_sparse_mat%reallocate(nz) - - call a%set_dev() + call a%sync_dev_space() + call a%set_host() if (info /= 0) goto 9999 call psb_erractionrestore(err_act) diff --git a/openacc/impl/psb_d_oacc_hll_allocate_mnnz.F90 b/openacc/impl/psb_d_oacc_hll_allocate_mnnz.F90 index 47a6933b..a30a0b00 100644 --- a/openacc/impl/psb_d_oacc_hll_allocate_mnnz.F90 +++ b/openacc/impl/psb_d_oacc_hll_allocate_mnnz.F90 @@ -22,25 +22,8 @@ contains end if call a%psb_d_hll_sparse_mat%allocate(m, n, nz_) - - hksz = a%hksz - nhacks = (m + hksz - 1) / hksz - - if (.not.allocated(a%val)) then - allocate(a%val(nz_ * m)) - allocate(a%ja(nz_ * m)) - allocate(a%irn(m)) - allocate(a%idiag(m)) - allocate(a%hkoffs(nhacks)) - end if - - a%val = dzero - a%ja = -1 - a%irn = 0 - a%idiag = 0 - a%hkoffs = 0 - - call a%set_dev() + call a%sync_dev_space() + call a%set_host() if (info /= 0) goto 9999 call psb_erractionrestore(err_act) diff --git a/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 b/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 index e39a29b7..34a31b0b 100644 --- a/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 +++ b/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 @@ -10,10 +10,10 @@ contains info = psb_success_ - call a%free_space() + call a%free_dev_space() call a%psb_d_hll_sparse_mat%cp_from_coo(b, info) if (info /= 0) goto 9999 - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() diff --git a/openacc/impl/psb_d_oacc_hll_cp_from_fmt.F90 b/openacc/impl/psb_d_oacc_hll_cp_from_fmt.F90 index a838e31e..7d14f65c 100644 --- a/openacc/impl/psb_d_oacc_hll_cp_from_fmt.F90 +++ b/openacc/impl/psb_d_oacc_hll_cp_from_fmt.F90 @@ -14,10 +14,10 @@ contains type is (psb_d_coo_sparse_mat) call a%cp_from_coo(b, info) class default - call a%free_space() + call a%free_dev_space() call a%psb_d_hll_sparse_mat%cp_from_fmt(b, info) if (info /= 0) return - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() end select diff --git a/openacc/impl/psb_d_oacc_hll_inner_vect_sv.F90 b/openacc/impl/psb_d_oacc_hll_inner_vect_sv.F90 index ae1c3c94..ff6a4580 100644 --- a/openacc/impl/psb_d_oacc_hll_inner_vect_sv.F90 +++ b/openacc/impl/psb_d_oacc_hll_inner_vect_sv.F90 @@ -40,9 +40,9 @@ contains call a%psb_d_hll_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans) call y%set_host() else - select type (xx => x) + select type (xx => x) type is (psb_d_vect_oacc) - select type(yy => y) + select type(yy => y) type is (psb_d_vect_oacc) if (xx%is_host()) call xx%sync() if (beta /= dzero) then diff --git a/openacc/impl/psb_d_oacc_hll_mv_from_coo.F90 b/openacc/impl/psb_d_oacc_hll_mv_from_coo.F90 index 29494a39..0f0ce6f1 100644 --- a/openacc/impl/psb_d_oacc_hll_mv_from_coo.F90 +++ b/openacc/impl/psb_d_oacc_hll_mv_from_coo.F90 @@ -10,10 +10,10 @@ contains info = psb_success_ - call a%free_space() + call a%free_dev_space() call a%psb_d_hll_sparse_mat%mv_from_coo(b, info) if (info /= 0) goto 9999 - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() diff --git a/openacc/impl/psb_d_oacc_hll_mv_from_fmt.F90 b/openacc/impl/psb_d_oacc_hll_mv_from_fmt.F90 index e03e2f30..1fcfa4f6 100644 --- a/openacc/impl/psb_d_oacc_hll_mv_from_fmt.F90 +++ b/openacc/impl/psb_d_oacc_hll_mv_from_fmt.F90 @@ -14,10 +14,10 @@ contains type is (psb_d_coo_sparse_mat) call a%mv_from_coo(b, info) class default - call a%free_space() + call a%free_dev_space() call a%psb_d_hll_sparse_mat%mv_from_fmt(b, info) if (info /= 0) return - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() end select diff --git a/openacc/impl/psb_d_oacc_hll_reallocate_nz.F90 b/openacc/impl/psb_d_oacc_hll_reallocate_nz.F90 index 412409d1..21f5c48a 100644 --- a/openacc/impl/psb_d_oacc_hll_reallocate_nz.F90 +++ b/openacc/impl/psb_d_oacc_hll_reallocate_nz.F90 @@ -15,8 +15,8 @@ contains info = psb_success_ call a%psb_d_hll_sparse_mat%reallocate(nz) - - call a%set_dev() + call a%sync_dev_space() + call a%set_host() if (info /= 0) goto 9999 call psb_erractionrestore(err_act) diff --git a/openacc/impl/psb_d_oacc_mlt_v.f90 b/openacc/impl/psb_d_oacc_mlt_v.f90 index dac62a65..8b3a05b1 100644 --- a/openacc/impl/psb_d_oacc_mlt_v.f90 +++ b/openacc/impl/psb_d_oacc_mlt_v.f90 @@ -1,6 +1,6 @@ subroutine psb_d_oacc_mlt_v(x, y, info) - use psb_d_oacc_vect_mod, psb_protect_name => psb_d_oacc_mlt_v + use psb_d_oacc_vect_mod, psb_protect_name => psb_d_oacc_mlt_v implicit none class(psb_d_base_vect_type), intent(inout) :: x @@ -13,7 +13,7 @@ subroutine psb_d_oacc_mlt_v(x, y, info) n = min(x%get_nrows(), y%get_nrows()) info = 0 n = min(x%get_nrows(), y%get_nrows()) - select type(xx => x) + select type(xx => x) class is (psb_d_vect_oacc) if (y%is_host()) call y%sync() if (xx%is_host()) call xx%sync() diff --git a/openacc/impl/psb_d_oacc_mlt_v_2.f90 b/openacc/impl/psb_d_oacc_mlt_v_2.f90 index 3f3a457d..ce460924 100644 --- a/openacc/impl/psb_d_oacc_mlt_v_2.f90 +++ b/openacc/impl/psb_d_oacc_mlt_v_2.f90 @@ -1,5 +1,5 @@ subroutine psb_d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) - use psb_d_oacc_vect_mod, psb_protect_name => psb_d_oacc_mlt_v_2 + use psb_d_oacc_vect_mod, psb_protect_name => psb_d_oacc_mlt_v_2 use psb_string_mod implicit none real(psb_dpk_), intent(in) :: alpha, beta @@ -18,9 +18,9 @@ subroutine psb_d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) n = min(x%get_nrows(), y%get_nrows(), z%get_nrows()) info = 0 - select type(xx => x) + select type(xx => x) class is (psb_d_vect_oacc) - select type (yy => y) + select type (yy => y) class is (psb_d_vect_oacc) if (xx%is_host()) call xx%sync() if (yy%is_host()) call yy%sync() diff --git a/openacc/impl/psb_s_oacc_csr_allocate_mnnz.F90 b/openacc/impl/psb_s_oacc_csr_allocate_mnnz.F90 index 08c51bce..0d531129 100644 --- a/openacc/impl/psb_s_oacc_csr_allocate_mnnz.F90 +++ b/openacc/impl/psb_s_oacc_csr_allocate_mnnz.F90 @@ -15,14 +15,8 @@ contains info = psb_success_ call a%psb_s_csr_sparse_mat%allocate(m, n, nz) - - if (.not.allocated(a%val)) then - allocate(a%val(nz)) - allocate(a%ja(nz)) - allocate(a%irp(m+1)) - end if - - call a%set_dev() + call a%set_host() + call a%sync_dev_space() if (info /= 0) goto 9999 call psb_erractionrestore(err_act) diff --git a/openacc/impl/psb_s_oacc_csr_cp_from_coo.F90 b/openacc/impl/psb_s_oacc_csr_cp_from_coo.F90 index 3c2fb1cb..f740cf18 100644 --- a/openacc/impl/psb_s_oacc_csr_cp_from_coo.F90 +++ b/openacc/impl/psb_s_oacc_csr_cp_from_coo.F90 @@ -10,10 +10,10 @@ contains info = psb_success_ - call a%free_space() + call a%free_dev_space() call a%psb_s_csr_sparse_mat%cp_from_coo(b, info) if (info /= 0) goto 9999 - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() diff --git a/openacc/impl/psb_s_oacc_csr_cp_from_fmt.F90 b/openacc/impl/psb_s_oacc_csr_cp_from_fmt.F90 index e47959f8..fc495668 100644 --- a/openacc/impl/psb_s_oacc_csr_cp_from_fmt.F90 +++ b/openacc/impl/psb_s_oacc_csr_cp_from_fmt.F90 @@ -14,10 +14,10 @@ contains type is (psb_s_coo_sparse_mat) call a%cp_from_coo(b, info) class default - call a%free_space() + call a%free_dev_space() call a%psb_s_csr_sparse_mat%cp_from_fmt(b, info) if (info /= 0) return - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() end select diff --git a/openacc/impl/psb_s_oacc_csr_inner_vect_sv.F90 b/openacc/impl/psb_s_oacc_csr_inner_vect_sv.F90 index ec7f4bad..b1785b49 100644 --- a/openacc/impl/psb_s_oacc_csr_inner_vect_sv.F90 +++ b/openacc/impl/psb_s_oacc_csr_inner_vect_sv.F90 @@ -40,9 +40,9 @@ contains call a%psb_s_csr_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans) call y%set_host() else - select type (xx => x) + select type (xx => x) type is (psb_s_vect_oacc) - select type(yy => y) + select type(yy => y) type is (psb_s_vect_oacc) if (xx%is_host()) call xx%sync() if (beta /= dzero) then diff --git a/openacc/impl/psb_s_oacc_csr_mv_from_coo.F90 b/openacc/impl/psb_s_oacc_csr_mv_from_coo.F90 index fdbf3a0c..0c92e476 100644 --- a/openacc/impl/psb_s_oacc_csr_mv_from_coo.F90 +++ b/openacc/impl/psb_s_oacc_csr_mv_from_coo.F90 @@ -10,10 +10,10 @@ contains info = psb_success_ - call a%free_space() + call a%free_dev_space() call a%psb_s_csr_sparse_mat%mv_from_coo(b, info) if (info /= 0) goto 9999 - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() diff --git a/openacc/impl/psb_s_oacc_csr_mv_from_fmt.F90 b/openacc/impl/psb_s_oacc_csr_mv_from_fmt.F90 index a7a581b8..1c61eb3b 100644 --- a/openacc/impl/psb_s_oacc_csr_mv_from_fmt.F90 +++ b/openacc/impl/psb_s_oacc_csr_mv_from_fmt.F90 @@ -14,10 +14,10 @@ contains type is (psb_s_coo_sparse_mat) call a%mv_from_coo(b, info) class default - call a%free_space() + call a%free_dev_space() call a%psb_s_csr_sparse_mat%mv_from_fmt(b, info) if (info /= 0) return - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() end select diff --git a/openacc/impl/psb_s_oacc_csr_reallocate_nz.F90 b/openacc/impl/psb_s_oacc_csr_reallocate_nz.F90 index 77c17120..e49bf2c8 100644 --- a/openacc/impl/psb_s_oacc_csr_reallocate_nz.F90 +++ b/openacc/impl/psb_s_oacc_csr_reallocate_nz.F90 @@ -14,8 +14,8 @@ contains info = psb_success_ call a%psb_s_csr_sparse_mat%reallocate(nz) - - call a%set_dev() + call a%sync_dev_space() + call a%set_host() if (info /= 0) goto 9999 call psb_erractionrestore(err_act) diff --git a/openacc/impl/psb_s_oacc_ell_allocate_mnnz.F90 b/openacc/impl/psb_s_oacc_ell_allocate_mnnz.F90 index 38c19b78..b9c25654 100644 --- a/openacc/impl/psb_s_oacc_ell_allocate_mnnz.F90 +++ b/openacc/impl/psb_s_oacc_ell_allocate_mnnz.F90 @@ -21,20 +21,8 @@ contains end if call a%psb_s_ell_sparse_mat%allocate(m, n, nz_) - - if (.not.allocated(a%val)) then - allocate(a%val(m, nz_)) - allocate(a%ja(m, nz_)) - allocate(a%irn(m)) - allocate(a%idiag(m)) - end if - - a%val = szero - a%ja = -1 - a%irn = 0 - a%idiag = 0 - - call a%set_dev() + call a%sync_dev_space() + call a%set_host() if (info /= 0) goto 9999 call psb_erractionrestore(err_act) diff --git a/openacc/impl/psb_s_oacc_ell_cp_from_coo.F90 b/openacc/impl/psb_s_oacc_ell_cp_from_coo.F90 index 0f6fbc48..5a607370 100644 --- a/openacc/impl/psb_s_oacc_ell_cp_from_coo.F90 +++ b/openacc/impl/psb_s_oacc_ell_cp_from_coo.F90 @@ -10,10 +10,10 @@ contains info = psb_success_ - call a%free_space() + call a%free_dev_space() call a%psb_s_ell_sparse_mat%cp_from_coo(b, info) if (info /= 0) goto 9999 - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() diff --git a/openacc/impl/psb_s_oacc_ell_cp_from_fmt.F90 b/openacc/impl/psb_s_oacc_ell_cp_from_fmt.F90 index 793c2779..47ce5241 100644 --- a/openacc/impl/psb_s_oacc_ell_cp_from_fmt.F90 +++ b/openacc/impl/psb_s_oacc_ell_cp_from_fmt.F90 @@ -14,10 +14,10 @@ contains type is (psb_s_coo_sparse_mat) call a%cp_from_coo(b, info) class default - call a%free_space() + call a%free_dev_space() call a%psb_s_ell_sparse_mat%cp_from_fmt(b, info) if (info /= 0) return - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() end select diff --git a/openacc/impl/psb_s_oacc_ell_inner_vect_sv.F90 b/openacc/impl/psb_s_oacc_ell_inner_vect_sv.F90 index ba42af12..5fc672d0 100644 --- a/openacc/impl/psb_s_oacc_ell_inner_vect_sv.F90 +++ b/openacc/impl/psb_s_oacc_ell_inner_vect_sv.F90 @@ -40,9 +40,9 @@ contains call a%psb_s_ell_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans) call y%set_host() else - select type (xx => x) + select type (xx => x) type is (psb_s_vect_oacc) - select type(yy => y) + select type(yy => y) type is (psb_s_vect_oacc) if (xx%is_host()) call xx%sync() if (beta /= dzero) then diff --git a/openacc/impl/psb_s_oacc_ell_mv_from_coo.F90 b/openacc/impl/psb_s_oacc_ell_mv_from_coo.F90 index ba82049f..736d4253 100644 --- a/openacc/impl/psb_s_oacc_ell_mv_from_coo.F90 +++ b/openacc/impl/psb_s_oacc_ell_mv_from_coo.F90 @@ -10,10 +10,10 @@ contains info = psb_success_ - call a%free_space() + call a%free_dev_space() call a%psb_s_ell_sparse_mat%mv_from_coo(b, info) if (info /= 0) goto 9999 - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() diff --git a/openacc/impl/psb_s_oacc_ell_mv_from_fmt.F90 b/openacc/impl/psb_s_oacc_ell_mv_from_fmt.F90 index df789664..d8d13aa2 100644 --- a/openacc/impl/psb_s_oacc_ell_mv_from_fmt.F90 +++ b/openacc/impl/psb_s_oacc_ell_mv_from_fmt.F90 @@ -14,10 +14,10 @@ contains type is (psb_s_coo_sparse_mat) call a%mv_from_coo(b, info) class default - call a%free_space() + call a%free_dev_space() call a%psb_s_ell_sparse_mat%mv_from_fmt(b, info) if (info /= 0) return - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() end select diff --git a/openacc/impl/psb_s_oacc_ell_reallocate_nz.F90 b/openacc/impl/psb_s_oacc_ell_reallocate_nz.F90 index 373c2b67..34036cdb 100644 --- a/openacc/impl/psb_s_oacc_ell_reallocate_nz.F90 +++ b/openacc/impl/psb_s_oacc_ell_reallocate_nz.F90 @@ -14,8 +14,8 @@ contains info = psb_success_ call a%psb_s_ell_sparse_mat%reallocate(nz) - - call a%set_dev() + call a%sync_dev_space() + call a%set_host() if (info /= 0) goto 9999 call psb_erractionrestore(err_act) diff --git a/openacc/impl/psb_s_oacc_hll_allocate_mnnz.F90 b/openacc/impl/psb_s_oacc_hll_allocate_mnnz.F90 index c67ea621..1db3e55d 100644 --- a/openacc/impl/psb_s_oacc_hll_allocate_mnnz.F90 +++ b/openacc/impl/psb_s_oacc_hll_allocate_mnnz.F90 @@ -22,25 +22,8 @@ contains end if call a%psb_s_hll_sparse_mat%allocate(m, n, nz_) - - hksz = a%hksz - nhacks = (m + hksz - 1) / hksz - - if (.not.allocated(a%val)) then - allocate(a%val(nz_ * m)) - allocate(a%ja(nz_ * m)) - allocate(a%irn(m)) - allocate(a%idiag(m)) - allocate(a%hkoffs(nhacks)) - end if - - a%val = szero - a%ja = -1 - a%irn = 0 - a%idiag = 0 - a%hkoffs = 0 - - call a%set_dev() + call a%sync_dev_space() + call a%set_host() if (info /= 0) goto 9999 call psb_erractionrestore(err_act) diff --git a/openacc/impl/psb_s_oacc_hll_cp_from_coo.F90 b/openacc/impl/psb_s_oacc_hll_cp_from_coo.F90 index dfba3c6c..ca4afeb7 100644 --- a/openacc/impl/psb_s_oacc_hll_cp_from_coo.F90 +++ b/openacc/impl/psb_s_oacc_hll_cp_from_coo.F90 @@ -10,10 +10,10 @@ contains info = psb_success_ - call a%free_space() + call a%free_dev_space() call a%psb_s_hll_sparse_mat%cp_from_coo(b, info) if (info /= 0) goto 9999 - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() diff --git a/openacc/impl/psb_s_oacc_hll_cp_from_fmt.F90 b/openacc/impl/psb_s_oacc_hll_cp_from_fmt.F90 index 849e03b7..47a41b8d 100644 --- a/openacc/impl/psb_s_oacc_hll_cp_from_fmt.F90 +++ b/openacc/impl/psb_s_oacc_hll_cp_from_fmt.F90 @@ -14,10 +14,10 @@ contains type is (psb_s_coo_sparse_mat) call a%cp_from_coo(b, info) class default - call a%free_space() + call a%free_dev_space() call a%psb_s_hll_sparse_mat%cp_from_fmt(b, info) if (info /= 0) return - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() end select diff --git a/openacc/impl/psb_s_oacc_hll_inner_vect_sv.F90 b/openacc/impl/psb_s_oacc_hll_inner_vect_sv.F90 index 900b8982..ea81574b 100644 --- a/openacc/impl/psb_s_oacc_hll_inner_vect_sv.F90 +++ b/openacc/impl/psb_s_oacc_hll_inner_vect_sv.F90 @@ -40,9 +40,9 @@ contains call a%psb_s_hll_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans) call y%set_host() else - select type (xx => x) + select type (xx => x) type is (psb_s_vect_oacc) - select type(yy => y) + select type(yy => y) type is (psb_s_vect_oacc) if (xx%is_host()) call xx%sync() if (beta /= dzero) then diff --git a/openacc/impl/psb_s_oacc_hll_mv_from_coo.F90 b/openacc/impl/psb_s_oacc_hll_mv_from_coo.F90 index c22818fa..c26c5018 100644 --- a/openacc/impl/psb_s_oacc_hll_mv_from_coo.F90 +++ b/openacc/impl/psb_s_oacc_hll_mv_from_coo.F90 @@ -10,10 +10,10 @@ contains info = psb_success_ - call a%free_space() + call a%free_dev_space() call a%psb_s_hll_sparse_mat%mv_from_coo(b, info) if (info /= 0) goto 9999 - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() diff --git a/openacc/impl/psb_s_oacc_hll_mv_from_fmt.F90 b/openacc/impl/psb_s_oacc_hll_mv_from_fmt.F90 index 992b1c7b..dd04bbab 100644 --- a/openacc/impl/psb_s_oacc_hll_mv_from_fmt.F90 +++ b/openacc/impl/psb_s_oacc_hll_mv_from_fmt.F90 @@ -14,10 +14,10 @@ contains type is (psb_s_coo_sparse_mat) call a%mv_from_coo(b, info) class default - call a%free_space() + call a%free_dev_space() call a%psb_s_hll_sparse_mat%mv_from_fmt(b, info) if (info /= 0) return - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() end select diff --git a/openacc/impl/psb_s_oacc_hll_reallocate_nz.F90 b/openacc/impl/psb_s_oacc_hll_reallocate_nz.F90 index 7768d1ed..242ffe0d 100644 --- a/openacc/impl/psb_s_oacc_hll_reallocate_nz.F90 +++ b/openacc/impl/psb_s_oacc_hll_reallocate_nz.F90 @@ -15,8 +15,8 @@ contains info = psb_success_ call a%psb_s_hll_sparse_mat%reallocate(nz) - - call a%set_dev() + call a%sync_dev_space() + call a%set_host() if (info /= 0) goto 9999 call psb_erractionrestore(err_act) diff --git a/openacc/impl/psb_s_oacc_mlt_v.f90 b/openacc/impl/psb_s_oacc_mlt_v.f90 index 61a1d152..11dcac6c 100644 --- a/openacc/impl/psb_s_oacc_mlt_v.f90 +++ b/openacc/impl/psb_s_oacc_mlt_v.f90 @@ -1,6 +1,6 @@ subroutine psb_s_oacc_mlt_v(x, y, info) - use psb_s_oacc_vect_mod, psb_protect_name => psb_s_oacc_mlt_v + use psb_s_oacc_vect_mod, psb_protect_name => psb_s_oacc_mlt_v implicit none class(psb_s_base_vect_type), intent(inout) :: x @@ -13,7 +13,7 @@ subroutine psb_s_oacc_mlt_v(x, y, info) n = min(x%get_nrows(), y%get_nrows()) info = 0 n = min(x%get_nrows(), y%get_nrows()) - select type(xx => x) + select type(xx => x) class is (psb_s_vect_oacc) if (y%is_host()) call y%sync() if (xx%is_host()) call xx%sync() diff --git a/openacc/impl/psb_s_oacc_mlt_v_2.f90 b/openacc/impl/psb_s_oacc_mlt_v_2.f90 index bcaebfbe..2ce7fe53 100644 --- a/openacc/impl/psb_s_oacc_mlt_v_2.f90 +++ b/openacc/impl/psb_s_oacc_mlt_v_2.f90 @@ -1,5 +1,5 @@ subroutine psb_s_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) - use psb_s_oacc_vect_mod, psb_protect_name => psb_s_oacc_mlt_v_2 + use psb_s_oacc_vect_mod, psb_protect_name => psb_s_oacc_mlt_v_2 use psb_string_mod implicit none real(psb_spk_), intent(in) :: alpha, beta @@ -18,9 +18,9 @@ subroutine psb_s_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) n = min(x%get_nrows(), y%get_nrows(), z%get_nrows()) info = 0 - select type(xx => x) + select type(xx => x) class is (psb_s_vect_oacc) - select type (yy => y) + select type (yy => y) class is (psb_s_vect_oacc) if (xx%is_host()) call xx%sync() if (yy%is_host()) call yy%sync() diff --git a/openacc/impl/psb_z_oacc_csr_allocate_mnnz.F90 b/openacc/impl/psb_z_oacc_csr_allocate_mnnz.F90 index fd19d6f9..b7ba4bad 100644 --- a/openacc/impl/psb_z_oacc_csr_allocate_mnnz.F90 +++ b/openacc/impl/psb_z_oacc_csr_allocate_mnnz.F90 @@ -15,14 +15,8 @@ contains info = psb_success_ call a%psb_z_csr_sparse_mat%allocate(m, n, nz) - - if (.not.allocated(a%val)) then - allocate(a%val(nz)) - allocate(a%ja(nz)) - allocate(a%irp(m+1)) - end if - - call a%set_dev() + call a%set_host() + call a%sync_dev_space() if (info /= 0) goto 9999 call psb_erractionrestore(err_act) diff --git a/openacc/impl/psb_z_oacc_csr_cp_from_coo.F90 b/openacc/impl/psb_z_oacc_csr_cp_from_coo.F90 index 6c40d0d2..ef1b5b73 100644 --- a/openacc/impl/psb_z_oacc_csr_cp_from_coo.F90 +++ b/openacc/impl/psb_z_oacc_csr_cp_from_coo.F90 @@ -10,10 +10,10 @@ contains info = psb_success_ - call a%free_space() + call a%free_dev_space() call a%psb_z_csr_sparse_mat%cp_from_coo(b, info) if (info /= 0) goto 9999 - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() diff --git a/openacc/impl/psb_z_oacc_csr_cp_from_fmt.F90 b/openacc/impl/psb_z_oacc_csr_cp_from_fmt.F90 index 3025fde2..7c3470a2 100644 --- a/openacc/impl/psb_z_oacc_csr_cp_from_fmt.F90 +++ b/openacc/impl/psb_z_oacc_csr_cp_from_fmt.F90 @@ -14,10 +14,10 @@ contains type is (psb_z_coo_sparse_mat) call a%cp_from_coo(b, info) class default - call a%free_space() + call a%free_dev_space() call a%psb_z_csr_sparse_mat%cp_from_fmt(b, info) if (info /= 0) return - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() end select diff --git a/openacc/impl/psb_z_oacc_csr_inner_vect_sv.F90 b/openacc/impl/psb_z_oacc_csr_inner_vect_sv.F90 index 4975b276..25715903 100644 --- a/openacc/impl/psb_z_oacc_csr_inner_vect_sv.F90 +++ b/openacc/impl/psb_z_oacc_csr_inner_vect_sv.F90 @@ -40,9 +40,9 @@ contains call a%psb_z_csr_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans) call y%set_host() else - select type (xx => x) + select type (xx => x) type is (psb_z_vect_oacc) - select type(yy => y) + select type(yy => y) type is (psb_z_vect_oacc) if (xx%is_host()) call xx%sync() if (beta /= dzero) then diff --git a/openacc/impl/psb_z_oacc_csr_mv_from_coo.F90 b/openacc/impl/psb_z_oacc_csr_mv_from_coo.F90 index 6dae625a..d2b9ee48 100644 --- a/openacc/impl/psb_z_oacc_csr_mv_from_coo.F90 +++ b/openacc/impl/psb_z_oacc_csr_mv_from_coo.F90 @@ -10,10 +10,10 @@ contains info = psb_success_ - call a%free_space() + call a%free_dev_space() call a%psb_z_csr_sparse_mat%mv_from_coo(b, info) if (info /= 0) goto 9999 - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() diff --git a/openacc/impl/psb_z_oacc_csr_mv_from_fmt.F90 b/openacc/impl/psb_z_oacc_csr_mv_from_fmt.F90 index 1d7dd723..af7fb1da 100644 --- a/openacc/impl/psb_z_oacc_csr_mv_from_fmt.F90 +++ b/openacc/impl/psb_z_oacc_csr_mv_from_fmt.F90 @@ -14,10 +14,10 @@ contains type is (psb_z_coo_sparse_mat) call a%mv_from_coo(b, info) class default - call a%free_space() + call a%free_dev_space() call a%psb_z_csr_sparse_mat%mv_from_fmt(b, info) if (info /= 0) return - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() end select diff --git a/openacc/impl/psb_z_oacc_csr_reallocate_nz.F90 b/openacc/impl/psb_z_oacc_csr_reallocate_nz.F90 index bdfb88d6..4cfea07c 100644 --- a/openacc/impl/psb_z_oacc_csr_reallocate_nz.F90 +++ b/openacc/impl/psb_z_oacc_csr_reallocate_nz.F90 @@ -14,8 +14,8 @@ contains info = psb_success_ call a%psb_z_csr_sparse_mat%reallocate(nz) - - call a%set_dev() + call a%sync_dev_space() + call a%set_host() if (info /= 0) goto 9999 call psb_erractionrestore(err_act) diff --git a/openacc/impl/psb_z_oacc_ell_allocate_mnnz.F90 b/openacc/impl/psb_z_oacc_ell_allocate_mnnz.F90 index 48a5e202..aa8a03cd 100644 --- a/openacc/impl/psb_z_oacc_ell_allocate_mnnz.F90 +++ b/openacc/impl/psb_z_oacc_ell_allocate_mnnz.F90 @@ -21,20 +21,8 @@ contains end if call a%psb_z_ell_sparse_mat%allocate(m, n, nz_) - - if (.not.allocated(a%val)) then - allocate(a%val(m, nz_)) - allocate(a%ja(m, nz_)) - allocate(a%irn(m)) - allocate(a%idiag(m)) - end if - - a%val = zzero - a%ja = -1 - a%irn = 0 - a%idiag = 0 - - call a%set_dev() + call a%sync_dev_space() + call a%set_host() if (info /= 0) goto 9999 call psb_erractionrestore(err_act) diff --git a/openacc/impl/psb_z_oacc_ell_cp_from_coo.F90 b/openacc/impl/psb_z_oacc_ell_cp_from_coo.F90 index 6dd60bd7..a3129ff6 100644 --- a/openacc/impl/psb_z_oacc_ell_cp_from_coo.F90 +++ b/openacc/impl/psb_z_oacc_ell_cp_from_coo.F90 @@ -10,10 +10,10 @@ contains info = psb_success_ - call a%free_space() + call a%free_dev_space() call a%psb_z_ell_sparse_mat%cp_from_coo(b, info) if (info /= 0) goto 9999 - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() diff --git a/openacc/impl/psb_z_oacc_ell_cp_from_fmt.F90 b/openacc/impl/psb_z_oacc_ell_cp_from_fmt.F90 index 60d94bb2..f9a52d62 100644 --- a/openacc/impl/psb_z_oacc_ell_cp_from_fmt.F90 +++ b/openacc/impl/psb_z_oacc_ell_cp_from_fmt.F90 @@ -14,10 +14,10 @@ contains type is (psb_z_coo_sparse_mat) call a%cp_from_coo(b, info) class default - call a%free_space() + call a%free_dev_space() call a%psb_z_ell_sparse_mat%cp_from_fmt(b, info) if (info /= 0) return - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() end select diff --git a/openacc/impl/psb_z_oacc_ell_inner_vect_sv.F90 b/openacc/impl/psb_z_oacc_ell_inner_vect_sv.F90 index f445a6b4..1cbe249c 100644 --- a/openacc/impl/psb_z_oacc_ell_inner_vect_sv.F90 +++ b/openacc/impl/psb_z_oacc_ell_inner_vect_sv.F90 @@ -40,9 +40,9 @@ contains call a%psb_z_ell_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans) call y%set_host() else - select type (xx => x) + select type (xx => x) type is (psb_z_vect_oacc) - select type(yy => y) + select type(yy => y) type is (psb_z_vect_oacc) if (xx%is_host()) call xx%sync() if (beta /= dzero) then diff --git a/openacc/impl/psb_z_oacc_ell_mv_from_coo.F90 b/openacc/impl/psb_z_oacc_ell_mv_from_coo.F90 index db70b944..9abb1a74 100644 --- a/openacc/impl/psb_z_oacc_ell_mv_from_coo.F90 +++ b/openacc/impl/psb_z_oacc_ell_mv_from_coo.F90 @@ -10,10 +10,10 @@ contains info = psb_success_ - call a%free_space() + call a%free_dev_space() call a%psb_z_ell_sparse_mat%mv_from_coo(b, info) if (info /= 0) goto 9999 - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() diff --git a/openacc/impl/psb_z_oacc_ell_mv_from_fmt.F90 b/openacc/impl/psb_z_oacc_ell_mv_from_fmt.F90 index f99b3817..eb49b21a 100644 --- a/openacc/impl/psb_z_oacc_ell_mv_from_fmt.F90 +++ b/openacc/impl/psb_z_oacc_ell_mv_from_fmt.F90 @@ -14,10 +14,10 @@ contains type is (psb_z_coo_sparse_mat) call a%mv_from_coo(b, info) class default - call a%free_space() + call a%free_dev_space() call a%psb_z_ell_sparse_mat%mv_from_fmt(b, info) if (info /= 0) return - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() end select diff --git a/openacc/impl/psb_z_oacc_ell_reallocate_nz.F90 b/openacc/impl/psb_z_oacc_ell_reallocate_nz.F90 index 8fd3ad77..c7c88c65 100644 --- a/openacc/impl/psb_z_oacc_ell_reallocate_nz.F90 +++ b/openacc/impl/psb_z_oacc_ell_reallocate_nz.F90 @@ -14,8 +14,8 @@ contains info = psb_success_ call a%psb_z_ell_sparse_mat%reallocate(nz) - - call a%set_dev() + call a%sync_dev_space() + call a%set_host() if (info /= 0) goto 9999 call psb_erractionrestore(err_act) diff --git a/openacc/impl/psb_z_oacc_hll_allocate_mnnz.F90 b/openacc/impl/psb_z_oacc_hll_allocate_mnnz.F90 index c398d259..fb84dfb7 100644 --- a/openacc/impl/psb_z_oacc_hll_allocate_mnnz.F90 +++ b/openacc/impl/psb_z_oacc_hll_allocate_mnnz.F90 @@ -22,25 +22,8 @@ contains end if call a%psb_z_hll_sparse_mat%allocate(m, n, nz_) - - hksz = a%hksz - nhacks = (m + hksz - 1) / hksz - - if (.not.allocated(a%val)) then - allocate(a%val(nz_ * m)) - allocate(a%ja(nz_ * m)) - allocate(a%irn(m)) - allocate(a%idiag(m)) - allocate(a%hkoffs(nhacks)) - end if - - a%val = zzero - a%ja = -1 - a%irn = 0 - a%idiag = 0 - a%hkoffs = 0 - - call a%set_dev() + call a%sync_dev_space() + call a%set_host() if (info /= 0) goto 9999 call psb_erractionrestore(err_act) diff --git a/openacc/impl/psb_z_oacc_hll_cp_from_coo.F90 b/openacc/impl/psb_z_oacc_hll_cp_from_coo.F90 index e018e762..9fb903ca 100644 --- a/openacc/impl/psb_z_oacc_hll_cp_from_coo.F90 +++ b/openacc/impl/psb_z_oacc_hll_cp_from_coo.F90 @@ -10,10 +10,10 @@ contains info = psb_success_ - call a%free_space() + call a%free_dev_space() call a%psb_z_hll_sparse_mat%cp_from_coo(b, info) if (info /= 0) goto 9999 - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() diff --git a/openacc/impl/psb_z_oacc_hll_cp_from_fmt.F90 b/openacc/impl/psb_z_oacc_hll_cp_from_fmt.F90 index 7b18b255..3a77865d 100644 --- a/openacc/impl/psb_z_oacc_hll_cp_from_fmt.F90 +++ b/openacc/impl/psb_z_oacc_hll_cp_from_fmt.F90 @@ -14,10 +14,10 @@ contains type is (psb_z_coo_sparse_mat) call a%cp_from_coo(b, info) class default - call a%free_space() + call a%free_dev_space() call a%psb_z_hll_sparse_mat%cp_from_fmt(b, info) if (info /= 0) return - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() end select diff --git a/openacc/impl/psb_z_oacc_hll_inner_vect_sv.F90 b/openacc/impl/psb_z_oacc_hll_inner_vect_sv.F90 index 1d068542..a5bd54ad 100644 --- a/openacc/impl/psb_z_oacc_hll_inner_vect_sv.F90 +++ b/openacc/impl/psb_z_oacc_hll_inner_vect_sv.F90 @@ -40,9 +40,9 @@ contains call a%psb_z_hll_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans) call y%set_host() else - select type (xx => x) + select type (xx => x) type is (psb_z_vect_oacc) - select type(yy => y) + select type(yy => y) type is (psb_z_vect_oacc) if (xx%is_host()) call xx%sync() if (beta /= dzero) then diff --git a/openacc/impl/psb_z_oacc_hll_mv_from_coo.F90 b/openacc/impl/psb_z_oacc_hll_mv_from_coo.F90 index 267f13f3..46d4d6bd 100644 --- a/openacc/impl/psb_z_oacc_hll_mv_from_coo.F90 +++ b/openacc/impl/psb_z_oacc_hll_mv_from_coo.F90 @@ -10,10 +10,10 @@ contains info = psb_success_ - call a%free_space() + call a%free_dev_space() call a%psb_z_hll_sparse_mat%mv_from_coo(b, info) if (info /= 0) goto 9999 - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() diff --git a/openacc/impl/psb_z_oacc_hll_mv_from_fmt.F90 b/openacc/impl/psb_z_oacc_hll_mv_from_fmt.F90 index 151dc6ce..45e9846a 100644 --- a/openacc/impl/psb_z_oacc_hll_mv_from_fmt.F90 +++ b/openacc/impl/psb_z_oacc_hll_mv_from_fmt.F90 @@ -14,10 +14,10 @@ contains type is (psb_z_coo_sparse_mat) call a%mv_from_coo(b, info) class default - call a%free_space() + call a%free_dev_space() call a%psb_z_hll_sparse_mat%mv_from_fmt(b, info) if (info /= 0) return - call a%sync_space() + call a%sync_dev_space() call a%set_host() call a%sync() end select diff --git a/openacc/impl/psb_z_oacc_hll_reallocate_nz.F90 b/openacc/impl/psb_z_oacc_hll_reallocate_nz.F90 index 5b49efe5..7d9589cb 100644 --- a/openacc/impl/psb_z_oacc_hll_reallocate_nz.F90 +++ b/openacc/impl/psb_z_oacc_hll_reallocate_nz.F90 @@ -15,8 +15,8 @@ contains info = psb_success_ call a%psb_z_hll_sparse_mat%reallocate(nz) - - call a%set_dev() + call a%sync_dev_space() + call a%set_host() if (info /= 0) goto 9999 call psb_erractionrestore(err_act) diff --git a/openacc/impl/psb_z_oacc_mlt_v.f90 b/openacc/impl/psb_z_oacc_mlt_v.f90 index 4bc582d2..3e8f6030 100644 --- a/openacc/impl/psb_z_oacc_mlt_v.f90 +++ b/openacc/impl/psb_z_oacc_mlt_v.f90 @@ -1,6 +1,6 @@ subroutine psb_z_oacc_mlt_v(x, y, info) - use psb_z_oacc_vect_mod, psb_protect_name => psb_z_oacc_mlt_v + use psb_z_oacc_vect_mod, psb_protect_name => psb_z_oacc_mlt_v implicit none class(psb_z_base_vect_type), intent(inout) :: x @@ -13,7 +13,7 @@ subroutine psb_z_oacc_mlt_v(x, y, info) n = min(x%get_nrows(), y%get_nrows()) info = 0 n = min(x%get_nrows(), y%get_nrows()) - select type(xx => x) + select type(xx => x) class is (psb_z_vect_oacc) if (y%is_host()) call y%sync() if (xx%is_host()) call xx%sync() diff --git a/openacc/impl/psb_z_oacc_mlt_v_2.f90 b/openacc/impl/psb_z_oacc_mlt_v_2.f90 index 337a0a96..f69d863c 100644 --- a/openacc/impl/psb_z_oacc_mlt_v_2.f90 +++ b/openacc/impl/psb_z_oacc_mlt_v_2.f90 @@ -1,5 +1,5 @@ subroutine psb_z_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) - use psb_z_oacc_vect_mod, psb_protect_name => psb_z_oacc_mlt_v_2 + use psb_z_oacc_vect_mod, psb_protect_name => psb_z_oacc_mlt_v_2 use psb_string_mod implicit none complex(psb_dpk_), intent(in) :: alpha, beta @@ -18,9 +18,9 @@ subroutine psb_z_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) n = min(x%get_nrows(), y%get_nrows(), z%get_nrows()) info = 0 - select type(xx => x) + select type(xx => x) class is (psb_z_vect_oacc) - select type (yy => y) + select type (yy => y) class is (psb_z_vect_oacc) if (xx%is_host()) call xx%sync() if (yy%is_host()) call yy%sync() diff --git a/openacc/psb_c_oacc_csr_mat_mod.F90 b/openacc/psb_c_oacc_csr_mat_mod.F90 index 07734762..a1f6e2c3 100644 --- a/openacc/psb_c_oacc_csr_mat_mod.F90 +++ b/openacc/psb_c_oacc_csr_mat_mod.F90 @@ -13,32 +13,32 @@ module psb_c_oacc_csr_mat_mod type, extends(psb_c_csr_sparse_mat) :: psb_c_oacc_csr_sparse_mat integer(psb_ipk_) :: devstate = is_host contains - procedure, nopass :: get_fmt => c_oacc_csr_get_fmt - procedure, pass(a) :: sizeof => c_oacc_csr_sizeof - procedure, pass(a) :: vect_mv => psb_c_oacc_csr_vect_mv - procedure, pass(a) :: in_vect_sv => psb_c_oacc_csr_inner_vect_sv - procedure, pass(a) :: csmm => psb_c_oacc_csr_csmm - procedure, pass(a) :: csmv => psb_c_oacc_csr_csmv - procedure, pass(a) :: scals => psb_c_oacc_csr_scals - procedure, pass(a) :: scalv => psb_c_oacc_csr_scal - procedure, pass(a) :: reallocate_nz => psb_c_oacc_csr_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_c_oacc_csr_allocate_mnnz - procedure, pass(a) :: cp_from_coo => psb_c_oacc_csr_cp_from_coo - procedure, pass(a) :: cp_from_fmt => psb_c_oacc_csr_cp_from_fmt - procedure, pass(a) :: mv_from_coo => psb_c_oacc_csr_mv_from_coo - procedure, pass(a) :: mv_from_fmt => psb_c_oacc_csr_mv_from_fmt - procedure, pass(a) :: free => c_oacc_csr_free - procedure, pass(a) :: mold => psb_c_oacc_csr_mold - procedure, pass(a) :: all => c_oacc_csr_all - procedure, pass(a) :: is_host => c_oacc_csr_is_host - procedure, pass(a) :: is_sync => c_oacc_csr_is_sync - procedure, pass(a) :: is_dev => c_oacc_csr_is_dev - procedure, pass(a) :: set_host => c_oacc_csr_set_host - procedure, pass(a) :: set_sync => c_oacc_csr_set_sync - procedure, pass(a) :: set_dev => c_oacc_csr_set_dev - procedure, pass(a) :: free_space => c_oacc_csr_free_space - procedure, pass(a) :: sync_space => c_oacc_csr_sync_space - procedure, pass(a) :: sync => c_oacc_csr_sync + procedure, nopass :: get_fmt => c_oacc_csr_get_fmt + procedure, pass(a) :: sizeof => c_oacc_csr_sizeof + procedure, pass(a) :: vect_mv => psb_c_oacc_csr_vect_mv + procedure, pass(a) :: in_vect_sv => psb_c_oacc_csr_inner_vect_sv + procedure, pass(a) :: csmm => psb_c_oacc_csr_csmm + procedure, pass(a) :: csmv => psb_c_oacc_csr_csmv + procedure, pass(a) :: scals => psb_c_oacc_csr_scals + procedure, pass(a) :: scalv => psb_c_oacc_csr_scal + procedure, pass(a) :: reallocate_nz => psb_c_oacc_csr_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_oacc_csr_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_c_oacc_csr_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_oacc_csr_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_oacc_csr_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_oacc_csr_mv_from_fmt + procedure, pass(a) :: free => c_oacc_csr_free + procedure, pass(a) :: mold => psb_c_oacc_csr_mold + procedure, pass(a) :: all => c_oacc_csr_all + procedure, pass(a) :: is_host => c_oacc_csr_is_host + procedure, pass(a) :: is_sync => c_oacc_csr_is_sync + procedure, pass(a) :: is_dev => c_oacc_csr_is_dev + procedure, pass(a) :: set_host => c_oacc_csr_set_host + procedure, pass(a) :: set_sync => c_oacc_csr_set_sync + procedure, pass(a) :: set_dev => c_oacc_csr_set_dev + procedure, pass(a) :: free_dev_space => c_oacc_csr_free_dev_space + procedure, pass(a) :: sync_dev_space => c_oacc_csr_sync_dev_space + procedure, pass(a) :: sync => c_oacc_csr_sync end type psb_c_oacc_csr_sparse_mat interface @@ -156,7 +156,7 @@ module psb_c_oacc_csr_mat_mod contains - subroutine c_oacc_csr_free_space(a) + subroutine c_oacc_csr_free_dev_space(a) use psb_base_mod implicit none class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a @@ -167,7 +167,7 @@ contains if (allocated(a%irp)) call acc_delete_finalize(a%irp) return - end subroutine c_oacc_csr_free_space + end subroutine c_oacc_csr_free_dev_space subroutine c_oacc_csr_free(a) use psb_base_mod @@ -175,7 +175,7 @@ contains class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - call a%free_space() + call a%free_dev_space() call a%psb_c_csr_sparse_mat%free() return @@ -218,7 +218,7 @@ contains allocate(a%ja(nz),stat=info) allocate(a%irp(m+1),stat=info) if (info == 0) call a%set_host() - if (info == 0) call a%sync_space() + if (info == 0) call a%sync_dev_space() end subroutine c_oacc_csr_all function c_oacc_csr_is_host(a) result(res) @@ -266,13 +266,13 @@ contains a%devstate = is_dev end subroutine c_oacc_csr_set_dev - subroutine c_oacc_csr_sync_space(a) + subroutine c_oacc_csr_sync_dev_space(a) implicit none class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a if (allocated(a%val)) call acc_create(a%val) if (allocated(a%ja)) call acc_create(a%ja) if (allocated(a%irp)) call acc_create(a%irp) - end subroutine c_oacc_csr_sync_space + end subroutine c_oacc_csr_sync_dev_space subroutine c_oacc_csr_sync(a) implicit none @@ -280,7 +280,7 @@ contains class(psb_c_oacc_csr_sparse_mat), pointer :: tmpa integer(psb_ipk_) :: info - tmpa => a + tmpa => a if (a%is_dev()) then call acc_update_self(a%val) call acc_update_self(a%ja) diff --git a/openacc/psb_c_oacc_ell_mat_mod.F90 b/openacc/psb_c_oacc_ell_mat_mod.F90 index d23d4d0a..996d4628 100644 --- a/openacc/psb_c_oacc_ell_mat_mod.F90 +++ b/openacc/psb_c_oacc_ell_mat_mod.F90 @@ -12,31 +12,31 @@ module psb_c_oacc_ell_mat_mod type, extends(psb_c_ell_sparse_mat) :: psb_c_oacc_ell_sparse_mat integer(psb_ipk_) :: devstate = is_host contains - procedure, nopass :: get_fmt => c_oacc_ell_get_fmt - procedure, pass(a) :: sizeof => c_oacc_ell_sizeof - procedure, pass(a) :: is_host => c_oacc_ell_is_host - procedure, pass(a) :: is_sync => c_oacc_ell_is_sync - procedure, pass(a) :: is_dev => c_oacc_ell_is_dev - procedure, pass(a) :: set_host => c_oacc_ell_set_host - procedure, pass(a) :: set_sync => c_oacc_ell_set_sync - procedure, pass(a) :: set_dev => c_oacc_ell_set_dev - procedure, pass(a) :: sync_space => c_oacc_ell_sync_space - procedure, pass(a) :: sync => c_oacc_ell_sync - procedure, pass(a) :: free_space => c_oacc_ell_free_space - procedure, pass(a) :: free => c_oacc_ell_free - procedure, pass(a) :: vect_mv => psb_c_oacc_ell_vect_mv - procedure, pass(a) :: in_vect_sv => psb_c_oacc_ell_inner_vect_sv - procedure, pass(a) :: csmm => psb_c_oacc_ell_csmm - procedure, pass(a) :: csmv => psb_c_oacc_ell_csmv - procedure, pass(a) :: scals => psb_c_oacc_ell_scals - procedure, pass(a) :: scalv => psb_c_oacc_ell_scal - procedure, pass(a) :: reallocate_nz => psb_c_oacc_ell_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_c_oacc_ell_allocate_mnnz - procedure, pass(a) :: cp_from_coo => psb_c_oacc_ell_cp_from_coo - procedure, pass(a) :: cp_from_fmt => psb_c_oacc_ell_cp_from_fmt - procedure, pass(a) :: mv_from_coo => psb_c_oacc_ell_mv_from_coo - procedure, pass(a) :: mv_from_fmt => psb_c_oacc_ell_mv_from_fmt - procedure, pass(a) :: mold => psb_c_oacc_ell_mold + procedure, nopass :: get_fmt => c_oacc_ell_get_fmt + procedure, pass(a) :: sizeof => c_oacc_ell_sizeof + procedure, pass(a) :: is_host => c_oacc_ell_is_host + procedure, pass(a) :: is_sync => c_oacc_ell_is_sync + procedure, pass(a) :: is_dev => c_oacc_ell_is_dev + procedure, pass(a) :: set_host => c_oacc_ell_set_host + procedure, pass(a) :: set_sync => c_oacc_ell_set_sync + procedure, pass(a) :: set_dev => c_oacc_ell_set_dev + procedure, pass(a) :: sync_dev_space => c_oacc_ell_sync_dev_space + procedure, pass(a) :: sync => c_oacc_ell_sync + procedure, pass(a) :: free_dev_space => c_oacc_ell_free_dev_space + procedure, pass(a) :: free => c_oacc_ell_free + procedure, pass(a) :: vect_mv => psb_c_oacc_ell_vect_mv + procedure, pass(a) :: in_vect_sv => psb_c_oacc_ell_inner_vect_sv + procedure, pass(a) :: csmm => psb_c_oacc_ell_csmm + procedure, pass(a) :: csmv => psb_c_oacc_ell_csmv + procedure, pass(a) :: scals => psb_c_oacc_ell_scals + procedure, pass(a) :: scalv => psb_c_oacc_ell_scal + procedure, pass(a) :: reallocate_nz => psb_c_oacc_ell_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_oacc_ell_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_c_oacc_ell_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_oacc_ell_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_oacc_ell_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_oacc_ell_mv_from_fmt + procedure, pass(a) :: mold => psb_c_oacc_ell_mold end type psb_c_oacc_ell_sparse_mat @@ -154,7 +154,7 @@ module psb_c_oacc_ell_mat_mod contains - subroutine c_oacc_ell_free_space(a) + subroutine c_oacc_ell_free_dev_space(a) use psb_base_mod implicit none class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a @@ -166,7 +166,7 @@ contains if (allocated(a%idiag)) call acc_delete_finalize(a%idiag) return - end subroutine c_oacc_ell_free_space + end subroutine c_oacc_ell_free_dev_space subroutine c_oacc_ell_free(a) use psb_base_mod @@ -174,7 +174,7 @@ contains class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - call a%free_space() + call a%free_dev_space() call a%psb_c_ell_sparse_mat%free() return @@ -195,7 +195,7 @@ contains end function c_oacc_ell_sizeof - subroutine c_oacc_ell_sync_space(a) + subroutine c_oacc_ell_sync_dev_space(a) implicit none class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a @@ -203,7 +203,7 @@ contains if (allocated(a%ja)) call acc_create(a%ja) if (allocated(a%irn)) call acc_create(a%irn) if (allocated(a%idiag)) call acc_create(a%idiag) - end subroutine c_oacc_ell_sync_space + end subroutine c_oacc_ell_sync_dev_space function c_oacc_ell_is_host(a) result(res) implicit none @@ -262,7 +262,7 @@ contains class(psb_c_oacc_ell_sparse_mat), pointer :: tmpa integer(psb_ipk_) :: info - tmpa => a + tmpa => a if (a%is_dev()) then call acc_update_self(a%val) call acc_update_self(a%ja) diff --git a/openacc/psb_c_oacc_hll_mat_mod.F90 b/openacc/psb_c_oacc_hll_mat_mod.F90 index 2d2095bf..ebcbf29e 100644 --- a/openacc/psb_c_oacc_hll_mat_mod.F90 +++ b/openacc/psb_c_oacc_hll_mat_mod.F90 @@ -12,31 +12,31 @@ module psb_c_oacc_hll_mat_mod type, extends(psb_c_hll_sparse_mat) :: psb_c_oacc_hll_sparse_mat integer(psb_ipk_) :: devstate = is_host contains - procedure, nopass :: get_fmt => c_oacc_hll_get_fmt - procedure, pass(a) :: sizeof => c_oacc_hll_sizeof - procedure, pass(a) :: is_host => c_oacc_hll_is_host - procedure, pass(a) :: is_sync => c_oacc_hll_is_sync - procedure, pass(a) :: is_dev => c_oacc_hll_is_dev - procedure, pass(a) :: set_host => c_oacc_hll_set_host - procedure, pass(a) :: set_sync => c_oacc_hll_set_sync - procedure, pass(a) :: set_dev => c_oacc_hll_set_dev - procedure, pass(a) :: sync_space => c_oacc_hll_sync_space - procedure, pass(a) :: sync => c_oacc_hll_sync - procedure, pass(a) :: free_space => c_oacc_hll_free_space - procedure, pass(a) :: free => c_oacc_hll_free - procedure, pass(a) :: vect_mv => psb_c_oacc_hll_vect_mv - procedure, pass(a) :: in_vect_sv => psb_c_oacc_hll_inner_vect_sv - procedure, pass(a) :: csmm => psb_c_oacc_hll_csmm - procedure, pass(a) :: csmv => psb_c_oacc_hll_csmv - procedure, pass(a) :: scals => psb_c_oacc_hll_scals - procedure, pass(a) :: scalv => psb_c_oacc_hll_scal - procedure, pass(a) :: reallocate_nz => psb_c_oacc_hll_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_c_oacc_hll_allocate_mnnz - procedure, pass(a) :: cp_from_coo => psb_c_oacc_hll_cp_from_coo - procedure, pass(a) :: cp_from_fmt => psb_c_oacc_hll_cp_from_fmt - procedure, pass(a) :: mv_from_coo => psb_c_oacc_hll_mv_from_coo - procedure, pass(a) :: mv_from_fmt => psb_c_oacc_hll_mv_from_fmt - procedure, pass(a) :: mold => psb_c_oacc_hll_mold + procedure, nopass :: get_fmt => c_oacc_hll_get_fmt + procedure, pass(a) :: sizeof => c_oacc_hll_sizeof + procedure, pass(a) :: is_host => c_oacc_hll_is_host + procedure, pass(a) :: is_sync => c_oacc_hll_is_sync + procedure, pass(a) :: is_dev => c_oacc_hll_is_dev + procedure, pass(a) :: set_host => c_oacc_hll_set_host + procedure, pass(a) :: set_sync => c_oacc_hll_set_sync + procedure, pass(a) :: set_dev => c_oacc_hll_set_dev + procedure, pass(a) :: sync_dev_space => c_oacc_hll_sync_dev_space + procedure, pass(a) :: sync => c_oacc_hll_sync + procedure, pass(a) :: free_dev_space => c_oacc_hll_free_dev_space + procedure, pass(a) :: free => c_oacc_hll_free + procedure, pass(a) :: vect_mv => psb_c_oacc_hll_vect_mv + procedure, pass(a) :: in_vect_sv => psb_c_oacc_hll_inner_vect_sv + procedure, pass(a) :: csmm => psb_c_oacc_hll_csmm + procedure, pass(a) :: csmv => psb_c_oacc_hll_csmv + procedure, pass(a) :: scals => psb_c_oacc_hll_scals + procedure, pass(a) :: scalv => psb_c_oacc_hll_scal + procedure, pass(a) :: reallocate_nz => psb_c_oacc_hll_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_c_oacc_hll_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_c_oacc_hll_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_c_oacc_hll_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_c_oacc_hll_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_c_oacc_hll_mv_from_fmt + procedure, pass(a) :: mold => psb_c_oacc_hll_mold end type psb_c_oacc_hll_sparse_mat @@ -154,7 +154,7 @@ module psb_c_oacc_hll_mat_mod contains - subroutine c_oacc_hll_free_space(a) + subroutine c_oacc_hll_free_dev_space(a) use psb_base_mod implicit none class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a @@ -167,7 +167,7 @@ contains if (allocated(a%hkoffs)) call acc_delete_finalize(a%hkoffs) return - end subroutine c_oacc_hll_free_space + end subroutine c_oacc_hll_free_dev_space subroutine c_oacc_hll_free(a) use psb_base_mod @@ -175,7 +175,7 @@ contains class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - call a%free_space() + call a%free_dev_space() call a%psb_c_hll_sparse_mat%free() return @@ -249,7 +249,7 @@ contains res = 'HLLOA' end function c_oacc_hll_get_fmt - subroutine c_oacc_hll_sync_space(a) + subroutine c_oacc_hll_sync_dev_space(a) implicit none class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a @@ -258,7 +258,7 @@ contains if (allocated(a%irn)) call acc_create(a%irn) if (allocated(a%idiag)) call acc_create(a%idiag) if (allocated(a%hkoffs)) call acc_create(a%hkoffs) - end subroutine c_oacc_hll_sync_space + end subroutine c_oacc_hll_sync_dev_space subroutine c_oacc_hll_sync(a) @@ -267,7 +267,7 @@ contains class(psb_c_oacc_hll_sparse_mat), pointer :: tmpa integer(psb_ipk_) :: info - tmpa => a + tmpa => a if (a%is_dev()) then call acc_update_self(a%val) call acc_update_self(a%ja) diff --git a/openacc/psb_c_oacc_vect_mod.F90 b/openacc/psb_c_oacc_vect_mod.F90 index 79ff0ca3..95c45646 100644 --- a/openacc/psb_c_oacc_vect_mod.F90 +++ b/openacc/psb_c_oacc_vect_mod.F90 @@ -15,50 +15,50 @@ module psb_c_oacc_vect_mod integer :: state = is_host contains - procedure, pass(x) :: get_nrows => c_oacc_get_nrows - procedure, nopass :: get_fmt => c_oacc_get_fmt - - procedure, pass(x) :: all => c_oacc_vect_all - procedure, pass(x) :: zero => c_oacc_zero - procedure, pass(x) :: asb_m => c_oacc_asb_m - procedure, pass(x) :: sync => c_oacc_sync - procedure, pass(x) :: sync_space => c_oacc_sync_space - procedure, pass(x) :: bld_x => c_oacc_bld_x - procedure, pass(x) :: bld_mn => c_oacc_bld_mn - procedure, pass(x) :: free => c_oacc_vect_free - procedure, pass(x) :: ins_a => c_oacc_ins_a - procedure, pass(x) :: ins_v => c_oacc_ins_v - procedure, pass(x) :: is_host => c_oacc_is_host - procedure, pass(x) :: is_dev => c_oacc_is_dev - procedure, pass(x) :: is_sync => c_oacc_is_sync - procedure, pass(x) :: set_host => c_oacc_set_host - procedure, pass(x) :: set_dev => c_oacc_set_dev - procedure, pass(x) :: set_sync => c_oacc_set_sync - procedure, pass(x) :: set_scal => c_oacc_set_scal - - procedure, pass(x) :: gthzv_x => c_oacc_gthzv_x - procedure, pass(x) :: gthzbuf_x => c_oacc_gthzbuf - procedure, pass(y) :: sctb => c_oacc_sctb - procedure, pass(y) :: sctb_x => c_oacc_sctb_x - procedure, pass(y) :: sctb_buf => c_oacc_sctb_buf - - procedure, pass(x) :: get_size => c_oacc_get_size - - procedure, pass(x) :: dot_v => c_oacc_vect_dot - procedure, pass(x) :: dot_a => c_oacc_dot_a - procedure, pass(y) :: axpby_v => c_oacc_axpby_v - procedure, pass(y) :: axpby_a => c_oacc_axpby_a - procedure, pass(z) :: upd_xyz => c_oacc_upd_xyz - procedure, pass(y) :: mlt_a => c_oacc_mlt_a - procedure, pass(z) :: mlt_a_2 => c_oacc_mlt_a_2 - procedure, pass(y) :: mlt_v => psb_c_oacc_mlt_v - procedure, pass(z) :: mlt_v_2 => psb_c_oacc_mlt_v_2 - procedure, pass(x) :: scal => c_oacc_scal - procedure, pass(x) :: nrm2 => c_oacc_nrm2 - procedure, pass(x) :: amax => c_oacc_amax - procedure, pass(x) :: asum => c_oacc_asum - procedure, pass(x) :: absval1 => c_oacc_absval1 - procedure, pass(x) :: absval2 => c_oacc_absval2 + procedure, pass(x) :: get_nrows => c_oacc_get_nrows + procedure, nopass :: get_fmt => c_oacc_get_fmt + + procedure, pass(x) :: all => c_oacc_vect_all + procedure, pass(x) :: zero => c_oacc_zero + procedure, pass(x) :: asb_m => c_oacc_asb_m + procedure, pass(x) :: sync => c_oacc_sync + procedure, pass(x) :: sync_dev_space => c_oacc_sync_dev_space + procedure, pass(x) :: bld_x => c_oacc_bld_x + procedure, pass(x) :: bld_mn => c_oacc_bld_mn + procedure, pass(x) :: free => c_oacc_vect_free + procedure, pass(x) :: ins_a => c_oacc_ins_a + procedure, pass(x) :: ins_v => c_oacc_ins_v + procedure, pass(x) :: is_host => c_oacc_is_host + procedure, pass(x) :: is_dev => c_oacc_is_dev + procedure, pass(x) :: is_sync => c_oacc_is_sync + procedure, pass(x) :: set_host => c_oacc_set_host + procedure, pass(x) :: set_dev => c_oacc_set_dev + procedure, pass(x) :: set_sync => c_oacc_set_sync + procedure, pass(x) :: set_scal => c_oacc_set_scal + + procedure, pass(x) :: gthzv_x => c_oacc_gthzv_x + procedure, pass(x) :: gthzbuf_x => c_oacc_gthzbuf + procedure, pass(y) :: sctb => c_oacc_sctb + procedure, pass(y) :: sctb_x => c_oacc_sctb_x + procedure, pass(y) :: sctb_buf => c_oacc_sctb_buf + + procedure, pass(x) :: get_size => c_oacc_get_size + + procedure, pass(x) :: dot_v => c_oacc_vect_dot + procedure, pass(x) :: dot_a => c_oacc_dot_a + procedure, pass(y) :: axpby_v => c_oacc_axpby_v + procedure, pass(y) :: axpby_a => c_oacc_axpby_a + procedure, pass(z) :: upd_xyz => c_oacc_upd_xyz + procedure, pass(y) :: mlt_a => c_oacc_mlt_a + procedure, pass(z) :: mlt_a_2 => c_oacc_mlt_a_2 + procedure, pass(y) :: mlt_v => psb_c_oacc_mlt_v + procedure, pass(z) :: mlt_v_2 => psb_c_oacc_mlt_v_2 + procedure, pass(x) :: scal => c_oacc_scal + procedure, pass(x) :: nrm2 => c_oacc_nrm2 + procedure, pass(x) :: amax => c_oacc_amax + procedure, pass(x) :: asum => c_oacc_asum + procedure, pass(x) :: absval1 => c_oacc_absval1 + procedure, pass(x) :: absval2 => c_oacc_absval2 final :: c_oacc_final_vect_free end type psb_c_vect_oacc @@ -117,7 +117,7 @@ contains integer(psb_ipk_) :: i n = min(size(x%v), size(y%v)) - select type (yy => y) + select type (yy => y) class is (psb_c_vect_oacc) if (x%is_host()) call x%sync() if (yy%is_host()) call yy%sync() @@ -287,7 +287,7 @@ contains info = psb_success_ - select type(xx => x) + select type(xx => x) type is (psb_c_vect_oacc) if ((beta /= czero) .and. y%is_host()) call y%sync() if (xx%is_host()) call xx%sync() @@ -354,11 +354,11 @@ contains info = psb_success_ gpu_done = .false. - select type(xx => x) + select type(xx => x) class is (psb_c_vect_oacc) - select type(yy => y) + select type(yy => y) class is (psb_c_vect_oacc) - select type(zz => z) + select type(zz => z) class is (psb_c_vect_oacc) if ((beta /= czero) .and. yy%is_host()) call yy%sync() if ((delta /= czero) .and. zz%is_host()) call zz%sync() @@ -405,7 +405,7 @@ contains return end if - select type(ii => idx) + select type(ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() @@ -432,7 +432,7 @@ contains class(psb_c_vect_oacc) :: y integer(psb_ipk_) :: info, ni - select type(ii => idx) + select type(ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() class default @@ -485,7 +485,7 @@ contains return end if - select type(ii => idx) + select type(ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() class default @@ -512,7 +512,7 @@ contains info = 0 - select type(ii => idx) + select type(ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() class default @@ -544,9 +544,9 @@ contains if (psb_errstatus_fatal()) return done_oacc = .false. - select type(virl => irl) + select type(virl => irl) type is (psb_i_vect_oacc) - select type(vval => val) + select type(vval => val) type is (psb_c_vect_oacc) if (vval%is_host()) call vval%sync() if (virl%is_host()) call virl%sync() @@ -561,11 +561,11 @@ contains end select if (.not.done_oacc) then - select type(virl => irl) + select type(virl => irl) type is (psb_i_vect_oacc) if (virl%is_dev()) call virl%sync() end select - select type(vval => val) + select type(vval => val) type is (psb_c_vect_oacc) if (vval%is_dev()) call vval%sync() end select @@ -611,7 +611,7 @@ contains call psb_errpush(info, 'c_oacc_bld_mn', i_err=(/n, n, n, n, n/)) end if call x%set_host() - call x%sync_space() + call x%sync_dev_space() end subroutine c_oacc_bld_mn @@ -633,7 +633,7 @@ contains end if x%v(:) = this(:) call x%set_host() - call x%sync_space() + call x%sync_dev_space() end subroutine c_oacc_bld_x @@ -719,7 +719,7 @@ contains res = czero !write(0,*) 'dot_v' - select type(yy => y) + select type(yy => y) type is (psb_c_base_vect_type) if (x%is_dev()) call x%sync() res = ddot(n, x%v, 1, yy%v, 1) @@ -762,11 +762,11 @@ contains end function c_oacc_dot_a - subroutine c_oacc_sync_space(x) + subroutine c_oacc_sync_dev_space(x) implicit none class(psb_c_vect_oacc), intent(inout) :: x if (allocated(x%v)) call acc_create(x%v) - end subroutine c_oacc_sync_space + end subroutine c_oacc_sync_dev_space subroutine c_oacc_sync(x) implicit none @@ -840,7 +840,7 @@ contains i_err=(/n, n, n, n, n/)) end if call x%set_host() - call x%sync_space() + call x%sync_dev_space() end subroutine c_oacc_vect_all subroutine c_oacc_final_vect_free(x) diff --git a/openacc/psb_d_oacc_csr_mat_mod.F90 b/openacc/psb_d_oacc_csr_mat_mod.F90 index 74031e89..a9d72eae 100644 --- a/openacc/psb_d_oacc_csr_mat_mod.F90 +++ b/openacc/psb_d_oacc_csr_mat_mod.F90 @@ -13,32 +13,32 @@ module psb_d_oacc_csr_mat_mod type, extends(psb_d_csr_sparse_mat) :: psb_d_oacc_csr_sparse_mat integer(psb_ipk_) :: devstate = is_host contains - procedure, nopass :: get_fmt => d_oacc_csr_get_fmt - procedure, pass(a) :: sizeof => d_oacc_csr_sizeof - procedure, pass(a) :: vect_mv => psb_d_oacc_csr_vect_mv - procedure, pass(a) :: in_vect_sv => psb_d_oacc_csr_inner_vect_sv - procedure, pass(a) :: csmm => psb_d_oacc_csr_csmm - procedure, pass(a) :: csmv => psb_d_oacc_csr_csmv - procedure, pass(a) :: scals => psb_d_oacc_csr_scals - procedure, pass(a) :: scalv => psb_d_oacc_csr_scal - procedure, pass(a) :: reallocate_nz => psb_d_oacc_csr_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_d_oacc_csr_allocate_mnnz - procedure, pass(a) :: cp_from_coo => psb_d_oacc_csr_cp_from_coo - procedure, pass(a) :: cp_from_fmt => psb_d_oacc_csr_cp_from_fmt - procedure, pass(a) :: mv_from_coo => psb_d_oacc_csr_mv_from_coo - procedure, pass(a) :: mv_from_fmt => psb_d_oacc_csr_mv_from_fmt - procedure, pass(a) :: free => d_oacc_csr_free - procedure, pass(a) :: mold => psb_d_oacc_csr_mold - procedure, pass(a) :: all => d_oacc_csr_all - procedure, pass(a) :: is_host => d_oacc_csr_is_host - procedure, pass(a) :: is_sync => d_oacc_csr_is_sync - procedure, pass(a) :: is_dev => d_oacc_csr_is_dev - procedure, pass(a) :: set_host => d_oacc_csr_set_host - procedure, pass(a) :: set_sync => d_oacc_csr_set_sync - procedure, pass(a) :: set_dev => d_oacc_csr_set_dev - procedure, pass(a) :: free_space => d_oacc_csr_free_space - procedure, pass(a) :: sync_space => d_oacc_csr_sync_space - procedure, pass(a) :: sync => d_oacc_csr_sync + procedure, nopass :: get_fmt => d_oacc_csr_get_fmt + procedure, pass(a) :: sizeof => d_oacc_csr_sizeof + procedure, pass(a) :: vect_mv => psb_d_oacc_csr_vect_mv + procedure, pass(a) :: in_vect_sv => psb_d_oacc_csr_inner_vect_sv + procedure, pass(a) :: csmm => psb_d_oacc_csr_csmm + procedure, pass(a) :: csmv => psb_d_oacc_csr_csmv + procedure, pass(a) :: scals => psb_d_oacc_csr_scals + procedure, pass(a) :: scalv => psb_d_oacc_csr_scal + procedure, pass(a) :: reallocate_nz => psb_d_oacc_csr_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_oacc_csr_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_d_oacc_csr_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_oacc_csr_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_oacc_csr_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_oacc_csr_mv_from_fmt + procedure, pass(a) :: free => d_oacc_csr_free + procedure, pass(a) :: mold => psb_d_oacc_csr_mold + procedure, pass(a) :: all => d_oacc_csr_all + procedure, pass(a) :: is_host => d_oacc_csr_is_host + procedure, pass(a) :: is_sync => d_oacc_csr_is_sync + procedure, pass(a) :: is_dev => d_oacc_csr_is_dev + procedure, pass(a) :: set_host => d_oacc_csr_set_host + procedure, pass(a) :: set_sync => d_oacc_csr_set_sync + procedure, pass(a) :: set_dev => d_oacc_csr_set_dev + procedure, pass(a) :: free_dev_space => d_oacc_csr_free_dev_space + procedure, pass(a) :: sync_dev_space => d_oacc_csr_sync_dev_space + procedure, pass(a) :: sync => d_oacc_csr_sync end type psb_d_oacc_csr_sparse_mat interface @@ -156,7 +156,7 @@ module psb_d_oacc_csr_mat_mod contains - subroutine d_oacc_csr_free_space(a) + subroutine d_oacc_csr_free_dev_space(a) use psb_base_mod implicit none class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a @@ -167,7 +167,7 @@ contains if (allocated(a%irp)) call acc_delete_finalize(a%irp) return - end subroutine d_oacc_csr_free_space + end subroutine d_oacc_csr_free_dev_space subroutine d_oacc_csr_free(a) use psb_base_mod @@ -175,7 +175,7 @@ contains class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - call a%free_space() + call a%free_dev_space() call a%psb_d_csr_sparse_mat%free() return @@ -218,7 +218,7 @@ contains allocate(a%ja(nz),stat=info) allocate(a%irp(m+1),stat=info) if (info == 0) call a%set_host() - if (info == 0) call a%sync_space() + if (info == 0) call a%sync_dev_space() end subroutine d_oacc_csr_all function d_oacc_csr_is_host(a) result(res) @@ -266,13 +266,13 @@ contains a%devstate = is_dev end subroutine d_oacc_csr_set_dev - subroutine d_oacc_csr_sync_space(a) + subroutine d_oacc_csr_sync_dev_space(a) implicit none class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a if (allocated(a%val)) call acc_create(a%val) if (allocated(a%ja)) call acc_create(a%ja) if (allocated(a%irp)) call acc_create(a%irp) - end subroutine d_oacc_csr_sync_space + end subroutine d_oacc_csr_sync_dev_space subroutine d_oacc_csr_sync(a) implicit none @@ -280,7 +280,7 @@ contains class(psb_d_oacc_csr_sparse_mat), pointer :: tmpa integer(psb_ipk_) :: info - tmpa => a + tmpa => a if (a%is_dev()) then call acc_update_self(a%val) call acc_update_self(a%ja) diff --git a/openacc/psb_d_oacc_ell_mat_mod.F90 b/openacc/psb_d_oacc_ell_mat_mod.F90 index 45ffc92d..1409a1d6 100644 --- a/openacc/psb_d_oacc_ell_mat_mod.F90 +++ b/openacc/psb_d_oacc_ell_mat_mod.F90 @@ -12,31 +12,31 @@ module psb_d_oacc_ell_mat_mod type, extends(psb_d_ell_sparse_mat) :: psb_d_oacc_ell_sparse_mat integer(psb_ipk_) :: devstate = is_host contains - procedure, nopass :: get_fmt => d_oacc_ell_get_fmt - procedure, pass(a) :: sizeof => d_oacc_ell_sizeof - procedure, pass(a) :: is_host => d_oacc_ell_is_host - procedure, pass(a) :: is_sync => d_oacc_ell_is_sync - procedure, pass(a) :: is_dev => d_oacc_ell_is_dev - procedure, pass(a) :: set_host => d_oacc_ell_set_host - procedure, pass(a) :: set_sync => d_oacc_ell_set_sync - procedure, pass(a) :: set_dev => d_oacc_ell_set_dev - procedure, pass(a) :: sync_space => d_oacc_ell_sync_space - procedure, pass(a) :: sync => d_oacc_ell_sync - procedure, pass(a) :: free_space => d_oacc_ell_free_space - procedure, pass(a) :: free => d_oacc_ell_free - procedure, pass(a) :: vect_mv => psb_d_oacc_ell_vect_mv - procedure, pass(a) :: in_vect_sv => psb_d_oacc_ell_inner_vect_sv - procedure, pass(a) :: csmm => psb_d_oacc_ell_csmm - procedure, pass(a) :: csmv => psb_d_oacc_ell_csmv - procedure, pass(a) :: scals => psb_d_oacc_ell_scals - procedure, pass(a) :: scalv => psb_d_oacc_ell_scal - procedure, pass(a) :: reallocate_nz => psb_d_oacc_ell_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_d_oacc_ell_allocate_mnnz - procedure, pass(a) :: cp_from_coo => psb_d_oacc_ell_cp_from_coo - procedure, pass(a) :: cp_from_fmt => psb_d_oacc_ell_cp_from_fmt - procedure, pass(a) :: mv_from_coo => psb_d_oacc_ell_mv_from_coo - procedure, pass(a) :: mv_from_fmt => psb_d_oacc_ell_mv_from_fmt - procedure, pass(a) :: mold => psb_d_oacc_ell_mold + procedure, nopass :: get_fmt => d_oacc_ell_get_fmt + procedure, pass(a) :: sizeof => d_oacc_ell_sizeof + procedure, pass(a) :: is_host => d_oacc_ell_is_host + procedure, pass(a) :: is_sync => d_oacc_ell_is_sync + procedure, pass(a) :: is_dev => d_oacc_ell_is_dev + procedure, pass(a) :: set_host => d_oacc_ell_set_host + procedure, pass(a) :: set_sync => d_oacc_ell_set_sync + procedure, pass(a) :: set_dev => d_oacc_ell_set_dev + procedure, pass(a) :: sync_dev_space => d_oacc_ell_sync_dev_space + procedure, pass(a) :: sync => d_oacc_ell_sync + procedure, pass(a) :: free_dev_space => d_oacc_ell_free_dev_space + procedure, pass(a) :: free => d_oacc_ell_free + procedure, pass(a) :: vect_mv => psb_d_oacc_ell_vect_mv + procedure, pass(a) :: in_vect_sv => psb_d_oacc_ell_inner_vect_sv + procedure, pass(a) :: csmm => psb_d_oacc_ell_csmm + procedure, pass(a) :: csmv => psb_d_oacc_ell_csmv + procedure, pass(a) :: scals => psb_d_oacc_ell_scals + procedure, pass(a) :: scalv => psb_d_oacc_ell_scal + procedure, pass(a) :: reallocate_nz => psb_d_oacc_ell_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_oacc_ell_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_d_oacc_ell_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_oacc_ell_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_oacc_ell_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_oacc_ell_mv_from_fmt + procedure, pass(a) :: mold => psb_d_oacc_ell_mold end type psb_d_oacc_ell_sparse_mat @@ -154,7 +154,7 @@ module psb_d_oacc_ell_mat_mod contains - subroutine d_oacc_ell_free_space(a) + subroutine d_oacc_ell_free_dev_space(a) use psb_base_mod implicit none class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a @@ -166,7 +166,7 @@ contains if (allocated(a%idiag)) call acc_delete_finalize(a%idiag) return - end subroutine d_oacc_ell_free_space + end subroutine d_oacc_ell_free_dev_space subroutine d_oacc_ell_free(a) use psb_base_mod @@ -174,7 +174,7 @@ contains class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - call a%free_space() + call a%free_dev_space() call a%psb_d_ell_sparse_mat%free() return @@ -195,7 +195,7 @@ contains end function d_oacc_ell_sizeof - subroutine d_oacc_ell_sync_space(a) + subroutine d_oacc_ell_sync_dev_space(a) implicit none class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a @@ -203,7 +203,7 @@ contains if (allocated(a%ja)) call acc_create(a%ja) if (allocated(a%irn)) call acc_create(a%irn) if (allocated(a%idiag)) call acc_create(a%idiag) - end subroutine d_oacc_ell_sync_space + end subroutine d_oacc_ell_sync_dev_space function d_oacc_ell_is_host(a) result(res) implicit none @@ -262,7 +262,7 @@ contains class(psb_d_oacc_ell_sparse_mat), pointer :: tmpa integer(psb_ipk_) :: info - tmpa => a + tmpa => a if (a%is_dev()) then call acc_update_self(a%val) call acc_update_self(a%ja) diff --git a/openacc/psb_d_oacc_hll_mat_mod.F90 b/openacc/psb_d_oacc_hll_mat_mod.F90 index 8009f085..15ae055e 100644 --- a/openacc/psb_d_oacc_hll_mat_mod.F90 +++ b/openacc/psb_d_oacc_hll_mat_mod.F90 @@ -12,31 +12,31 @@ module psb_d_oacc_hll_mat_mod type, extends(psb_d_hll_sparse_mat) :: psb_d_oacc_hll_sparse_mat integer(psb_ipk_) :: devstate = is_host contains - procedure, nopass :: get_fmt => d_oacc_hll_get_fmt - procedure, pass(a) :: sizeof => d_oacc_hll_sizeof - procedure, pass(a) :: is_host => d_oacc_hll_is_host - procedure, pass(a) :: is_sync => d_oacc_hll_is_sync - procedure, pass(a) :: is_dev => d_oacc_hll_is_dev - procedure, pass(a) :: set_host => d_oacc_hll_set_host - procedure, pass(a) :: set_sync => d_oacc_hll_set_sync - procedure, pass(a) :: set_dev => d_oacc_hll_set_dev - procedure, pass(a) :: sync_space => d_oacc_hll_sync_space - procedure, pass(a) :: sync => d_oacc_hll_sync - procedure, pass(a) :: free_space => d_oacc_hll_free_space - procedure, pass(a) :: free => d_oacc_hll_free - procedure, pass(a) :: vect_mv => psb_d_oacc_hll_vect_mv - procedure, pass(a) :: in_vect_sv => psb_d_oacc_hll_inner_vect_sv - procedure, pass(a) :: csmm => psb_d_oacc_hll_csmm - procedure, pass(a) :: csmv => psb_d_oacc_hll_csmv - procedure, pass(a) :: scals => psb_d_oacc_hll_scals - procedure, pass(a) :: scalv => psb_d_oacc_hll_scal - procedure, pass(a) :: reallocate_nz => psb_d_oacc_hll_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_d_oacc_hll_allocate_mnnz - procedure, pass(a) :: cp_from_coo => psb_d_oacc_hll_cp_from_coo - procedure, pass(a) :: cp_from_fmt => psb_d_oacc_hll_cp_from_fmt - procedure, pass(a) :: mv_from_coo => psb_d_oacc_hll_mv_from_coo - procedure, pass(a) :: mv_from_fmt => psb_d_oacc_hll_mv_from_fmt - procedure, pass(a) :: mold => psb_d_oacc_hll_mold + procedure, nopass :: get_fmt => d_oacc_hll_get_fmt + procedure, pass(a) :: sizeof => d_oacc_hll_sizeof + procedure, pass(a) :: is_host => d_oacc_hll_is_host + procedure, pass(a) :: is_sync => d_oacc_hll_is_sync + procedure, pass(a) :: is_dev => d_oacc_hll_is_dev + procedure, pass(a) :: set_host => d_oacc_hll_set_host + procedure, pass(a) :: set_sync => d_oacc_hll_set_sync + procedure, pass(a) :: set_dev => d_oacc_hll_set_dev + procedure, pass(a) :: sync_dev_space => d_oacc_hll_sync_dev_space + procedure, pass(a) :: sync => d_oacc_hll_sync + procedure, pass(a) :: free_dev_space => d_oacc_hll_free_dev_space + procedure, pass(a) :: free => d_oacc_hll_free + procedure, pass(a) :: vect_mv => psb_d_oacc_hll_vect_mv + procedure, pass(a) :: in_vect_sv => psb_d_oacc_hll_inner_vect_sv + procedure, pass(a) :: csmm => psb_d_oacc_hll_csmm + procedure, pass(a) :: csmv => psb_d_oacc_hll_csmv + procedure, pass(a) :: scals => psb_d_oacc_hll_scals + procedure, pass(a) :: scalv => psb_d_oacc_hll_scal + procedure, pass(a) :: reallocate_nz => psb_d_oacc_hll_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_oacc_hll_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_d_oacc_hll_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_oacc_hll_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_oacc_hll_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_oacc_hll_mv_from_fmt + procedure, pass(a) :: mold => psb_d_oacc_hll_mold end type psb_d_oacc_hll_sparse_mat @@ -154,7 +154,7 @@ module psb_d_oacc_hll_mat_mod contains - subroutine d_oacc_hll_free_space(a) + subroutine d_oacc_hll_free_dev_space(a) use psb_base_mod implicit none class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a @@ -167,7 +167,7 @@ contains if (allocated(a%hkoffs)) call acc_delete_finalize(a%hkoffs) return - end subroutine d_oacc_hll_free_space + end subroutine d_oacc_hll_free_dev_space subroutine d_oacc_hll_free(a) use psb_base_mod @@ -175,7 +175,7 @@ contains class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - call a%free_space() + call a%free_dev_space() call a%psb_d_hll_sparse_mat%free() return @@ -249,7 +249,7 @@ contains res = 'HLLOA' end function d_oacc_hll_get_fmt - subroutine d_oacc_hll_sync_space(a) + subroutine d_oacc_hll_sync_dev_space(a) implicit none class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a @@ -258,7 +258,7 @@ contains if (allocated(a%irn)) call acc_create(a%irn) if (allocated(a%idiag)) call acc_create(a%idiag) if (allocated(a%hkoffs)) call acc_create(a%hkoffs) - end subroutine d_oacc_hll_sync_space + end subroutine d_oacc_hll_sync_dev_space subroutine d_oacc_hll_sync(a) @@ -267,7 +267,7 @@ contains class(psb_d_oacc_hll_sparse_mat), pointer :: tmpa integer(psb_ipk_) :: info - tmpa => a + tmpa => a if (a%is_dev()) then call acc_update_self(a%val) call acc_update_self(a%ja) diff --git a/openacc/psb_d_oacc_vect_mod.F90 b/openacc/psb_d_oacc_vect_mod.F90 index c7804bc1..3d71e54c 100644 --- a/openacc/psb_d_oacc_vect_mod.F90 +++ b/openacc/psb_d_oacc_vect_mod.F90 @@ -15,50 +15,50 @@ module psb_d_oacc_vect_mod integer :: state = is_host contains - procedure, pass(x) :: get_nrows => d_oacc_get_nrows - procedure, nopass :: get_fmt => d_oacc_get_fmt - - procedure, pass(x) :: all => d_oacc_vect_all - procedure, pass(x) :: zero => d_oacc_zero - procedure, pass(x) :: asb_m => d_oacc_asb_m - procedure, pass(x) :: sync => d_oacc_sync - procedure, pass(x) :: sync_space => d_oacc_sync_space - procedure, pass(x) :: bld_x => d_oacc_bld_x - procedure, pass(x) :: bld_mn => d_oacc_bld_mn - procedure, pass(x) :: free => d_oacc_vect_free - procedure, pass(x) :: ins_a => d_oacc_ins_a - procedure, pass(x) :: ins_v => d_oacc_ins_v - procedure, pass(x) :: is_host => d_oacc_is_host - procedure, pass(x) :: is_dev => d_oacc_is_dev - procedure, pass(x) :: is_sync => d_oacc_is_sync - procedure, pass(x) :: set_host => d_oacc_set_host - procedure, pass(x) :: set_dev => d_oacc_set_dev - procedure, pass(x) :: set_sync => d_oacc_set_sync - procedure, pass(x) :: set_scal => d_oacc_set_scal - - procedure, pass(x) :: gthzv_x => d_oacc_gthzv_x - procedure, pass(x) :: gthzbuf_x => d_oacc_gthzbuf - procedure, pass(y) :: sctb => d_oacc_sctb - procedure, pass(y) :: sctb_x => d_oacc_sctb_x - procedure, pass(y) :: sctb_buf => d_oacc_sctb_buf - - procedure, pass(x) :: get_size => d_oacc_get_size - - procedure, pass(x) :: dot_v => d_oacc_vect_dot - procedure, pass(x) :: dot_a => d_oacc_dot_a - procedure, pass(y) :: axpby_v => d_oacc_axpby_v - procedure, pass(y) :: axpby_a => d_oacc_axpby_a - procedure, pass(z) :: upd_xyz => d_oacc_upd_xyz - procedure, pass(y) :: mlt_a => d_oacc_mlt_a - procedure, pass(z) :: mlt_a_2 => d_oacc_mlt_a_2 - procedure, pass(y) :: mlt_v => psb_d_oacc_mlt_v - procedure, pass(z) :: mlt_v_2 => psb_d_oacc_mlt_v_2 - procedure, pass(x) :: scal => d_oacc_scal - procedure, pass(x) :: nrm2 => d_oacc_nrm2 - procedure, pass(x) :: amax => d_oacc_amax - procedure, pass(x) :: asum => d_oacc_asum - procedure, pass(x) :: absval1 => d_oacc_absval1 - procedure, pass(x) :: absval2 => d_oacc_absval2 + procedure, pass(x) :: get_nrows => d_oacc_get_nrows + procedure, nopass :: get_fmt => d_oacc_get_fmt + + procedure, pass(x) :: all => d_oacc_vect_all + procedure, pass(x) :: zero => d_oacc_zero + procedure, pass(x) :: asb_m => d_oacc_asb_m + procedure, pass(x) :: sync => d_oacc_sync + procedure, pass(x) :: sync_dev_space => d_oacc_sync_dev_space + procedure, pass(x) :: bld_x => d_oacc_bld_x + procedure, pass(x) :: bld_mn => d_oacc_bld_mn + procedure, pass(x) :: free => d_oacc_vect_free + procedure, pass(x) :: ins_a => d_oacc_ins_a + procedure, pass(x) :: ins_v => d_oacc_ins_v + procedure, pass(x) :: is_host => d_oacc_is_host + procedure, pass(x) :: is_dev => d_oacc_is_dev + procedure, pass(x) :: is_sync => d_oacc_is_sync + procedure, pass(x) :: set_host => d_oacc_set_host + procedure, pass(x) :: set_dev => d_oacc_set_dev + procedure, pass(x) :: set_sync => d_oacc_set_sync + procedure, pass(x) :: set_scal => d_oacc_set_scal + + procedure, pass(x) :: gthzv_x => d_oacc_gthzv_x + procedure, pass(x) :: gthzbuf_x => d_oacc_gthzbuf + procedure, pass(y) :: sctb => d_oacc_sctb + procedure, pass(y) :: sctb_x => d_oacc_sctb_x + procedure, pass(y) :: sctb_buf => d_oacc_sctb_buf + + procedure, pass(x) :: get_size => d_oacc_get_size + + procedure, pass(x) :: dot_v => d_oacc_vect_dot + procedure, pass(x) :: dot_a => d_oacc_dot_a + procedure, pass(y) :: axpby_v => d_oacc_axpby_v + procedure, pass(y) :: axpby_a => d_oacc_axpby_a + procedure, pass(z) :: upd_xyz => d_oacc_upd_xyz + procedure, pass(y) :: mlt_a => d_oacc_mlt_a + procedure, pass(z) :: mlt_a_2 => d_oacc_mlt_a_2 + procedure, pass(y) :: mlt_v => psb_d_oacc_mlt_v + procedure, pass(z) :: mlt_v_2 => psb_d_oacc_mlt_v_2 + procedure, pass(x) :: scal => d_oacc_scal + procedure, pass(x) :: nrm2 => d_oacc_nrm2 + procedure, pass(x) :: amax => d_oacc_amax + procedure, pass(x) :: asum => d_oacc_asum + procedure, pass(x) :: absval1 => d_oacc_absval1 + procedure, pass(x) :: absval2 => d_oacc_absval2 final :: d_oacc_final_vect_free end type psb_d_vect_oacc @@ -117,7 +117,7 @@ contains integer(psb_ipk_) :: i n = min(size(x%v), size(y%v)) - select type (yy => y) + select type (yy => y) class is (psb_d_vect_oacc) if (x%is_host()) call x%sync() if (yy%is_host()) call yy%sync() @@ -287,7 +287,7 @@ contains info = psb_success_ - select type(xx => x) + select type(xx => x) type is (psb_d_vect_oacc) if ((beta /= dzero) .and. y%is_host()) call y%sync() if (xx%is_host()) call xx%sync() @@ -354,11 +354,11 @@ contains info = psb_success_ gpu_done = .false. - select type(xx => x) + select type(xx => x) class is (psb_d_vect_oacc) - select type(yy => y) + select type(yy => y) class is (psb_d_vect_oacc) - select type(zz => z) + select type(zz => z) class is (psb_d_vect_oacc) if ((beta /= dzero) .and. yy%is_host()) call yy%sync() if ((delta /= dzero) .and. zz%is_host()) call zz%sync() @@ -405,7 +405,7 @@ contains return end if - select type(ii => idx) + select type(ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() @@ -432,7 +432,7 @@ contains class(psb_d_vect_oacc) :: y integer(psb_ipk_) :: info, ni - select type(ii => idx) + select type(ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() class default @@ -485,7 +485,7 @@ contains return end if - select type(ii => idx) + select type(ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() class default @@ -512,7 +512,7 @@ contains info = 0 - select type(ii => idx) + select type(ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() class default @@ -544,9 +544,9 @@ contains if (psb_errstatus_fatal()) return done_oacc = .false. - select type(virl => irl) + select type(virl => irl) type is (psb_i_vect_oacc) - select type(vval => val) + select type(vval => val) type is (psb_d_vect_oacc) if (vval%is_host()) call vval%sync() if (virl%is_host()) call virl%sync() @@ -561,11 +561,11 @@ contains end select if (.not.done_oacc) then - select type(virl => irl) + select type(virl => irl) type is (psb_i_vect_oacc) if (virl%is_dev()) call virl%sync() end select - select type(vval => val) + select type(vval => val) type is (psb_d_vect_oacc) if (vval%is_dev()) call vval%sync() end select @@ -611,7 +611,7 @@ contains call psb_errpush(info, 'd_oacc_bld_mn', i_err=(/n, n, n, n, n/)) end if call x%set_host() - call x%sync_space() + call x%sync_dev_space() end subroutine d_oacc_bld_mn @@ -633,7 +633,7 @@ contains end if x%v(:) = this(:) call x%set_host() - call x%sync_space() + call x%sync_dev_space() end subroutine d_oacc_bld_x @@ -719,7 +719,7 @@ contains res = dzero !write(0,*) 'dot_v' - select type(yy => y) + select type(yy => y) type is (psb_d_base_vect_type) if (x%is_dev()) call x%sync() res = ddot(n, x%v, 1, yy%v, 1) @@ -762,11 +762,11 @@ contains end function d_oacc_dot_a - subroutine d_oacc_sync_space(x) + subroutine d_oacc_sync_dev_space(x) implicit none class(psb_d_vect_oacc), intent(inout) :: x if (allocated(x%v)) call acc_create(x%v) - end subroutine d_oacc_sync_space + end subroutine d_oacc_sync_dev_space subroutine d_oacc_sync(x) implicit none @@ -840,7 +840,7 @@ contains i_err=(/n, n, n, n, n/)) end if call x%set_host() - call x%sync_space() + call x%sync_dev_space() end subroutine d_oacc_vect_all subroutine d_oacc_final_vect_free(x) diff --git a/openacc/psb_i_oacc_vect_mod.F90 b/openacc/psb_i_oacc_vect_mod.F90 index 8c813134..42226f0c 100644 --- a/openacc/psb_i_oacc_vect_mod.F90 +++ b/openacc/psb_i_oacc_vect_mod.F90 @@ -13,34 +13,34 @@ module psb_i_oacc_vect_mod integer :: state = is_host contains - procedure, pass(x) :: get_nrows => i_oacc_get_nrows - procedure, nopass :: get_fmt => i_oacc_get_fmt - - procedure, pass(x) :: all => i_oacc_vect_all - procedure, pass(x) :: zero => i_oacc_zero - procedure, pass(x) :: asb_m => i_oacc_asb_m - procedure, pass(x) :: sync => i_oacc_sync - procedure, pass(x) :: sync_space => i_oacc_sync_space - procedure, pass(x) :: bld_x => i_oacc_bld_x - procedure, pass(x) :: bld_mn => i_oacc_bld_mn - procedure, pass(x) :: free => i_oacc_vect_free - procedure, pass(x) :: ins_a => i_oacc_ins_a - procedure, pass(x) :: ins_v => i_oacc_ins_v - procedure, pass(x) :: is_host => i_oacc_is_host - procedure, pass(x) :: is_dev => i_oacc_is_dev - procedure, pass(x) :: is_sync => i_oacc_is_sync - procedure, pass(x) :: set_host => i_oacc_set_host - procedure, pass(x) :: set_dev => i_oacc_set_dev - procedure, pass(x) :: set_sync => i_oacc_set_sync - procedure, pass(x) :: set_scal => i_oacc_set_scal - - procedure, pass(x) :: gthzv_x => i_oacc_gthzv_x - procedure, pass(x) :: gthzbuf_x => i_oacc_gthzbuf - procedure, pass(y) :: sctb => i_oacc_sctb - procedure, pass(y) :: sctb_x => i_oacc_sctb_x - procedure, pass(y) :: sctb_buf => i_oacc_sctb_buf - - procedure, pass(x) :: get_size => i_oacc_get_size + procedure, pass(x) :: get_nrows => i_oacc_get_nrows + procedure, nopass :: get_fmt => i_oacc_get_fmt + + procedure, pass(x) :: all => i_oacc_vect_all + procedure, pass(x) :: zero => i_oacc_zero + procedure, pass(x) :: asb_m => i_oacc_asb_m + procedure, pass(x) :: sync => i_oacc_sync + procedure, pass(x) :: sync_dev_space => i_oacc_sync_dev_space + procedure, pass(x) :: bld_x => i_oacc_bld_x + procedure, pass(x) :: bld_mn => i_oacc_bld_mn + procedure, pass(x) :: free => i_oacc_vect_free + procedure, pass(x) :: ins_a => i_oacc_ins_a + procedure, pass(x) :: ins_v => i_oacc_ins_v + procedure, pass(x) :: is_host => i_oacc_is_host + procedure, pass(x) :: is_dev => i_oacc_is_dev + procedure, pass(x) :: is_sync => i_oacc_is_sync + procedure, pass(x) :: set_host => i_oacc_set_host + procedure, pass(x) :: set_dev => i_oacc_set_dev + procedure, pass(x) :: set_sync => i_oacc_set_sync + procedure, pass(x) :: set_scal => i_oacc_set_scal + + procedure, pass(x) :: gthzv_x => i_oacc_gthzv_x + procedure, pass(x) :: gthzbuf_x => i_oacc_gthzbuf + procedure, pass(y) :: sctb => i_oacc_sctb + procedure, pass(y) :: sctb_x => i_oacc_sctb_x + procedure, pass(y) :: sctb_buf => i_oacc_sctb_buf + + procedure, pass(x) :: get_size => i_oacc_get_size final :: i_oacc_final_vect_free end type psb_i_vect_oacc @@ -63,7 +63,7 @@ contains return end if - select type(ii => idx) + select type(ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() @@ -90,7 +90,7 @@ contains class(psb_i_vect_oacc) :: y integer(psb_ipk_) :: info, ni - select type(ii => idx) + select type(ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() class default @@ -143,7 +143,7 @@ contains return end if - select type(ii => idx) + select type(ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() class default @@ -170,7 +170,7 @@ contains info = 0 - select type(ii => idx) + select type(ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() class default @@ -202,9 +202,9 @@ contains if (psb_errstatus_fatal()) return done_oacc = .false. - select type(virl => irl) + select type(virl => irl) type is (psb_i_vect_oacc) - select type(vval => val) + select type(vval => val) type is (psb_i_vect_oacc) if (vval%is_host()) call vval%sync() if (virl%is_host()) call virl%sync() @@ -219,11 +219,11 @@ contains end select if (.not.done_oacc) then - select type(virl => irl) + select type(virl => irl) type is (psb_i_vect_oacc) if (virl%is_dev()) call virl%sync() end select - select type(vval => val) + select type(vval => val) type is (psb_i_vect_oacc) if (vval%is_dev()) call vval%sync() end select @@ -269,7 +269,7 @@ contains call psb_errpush(info, 'i_oacc_bld_mn', i_err=(/n, n, n, n, n/)) end if call x%set_host() - call x%sync_space() + call x%sync_dev_space() end subroutine i_oacc_bld_mn @@ -291,7 +291,7 @@ contains end if x%v(:) = this(:) call x%set_host() - call x%sync_space() + call x%sync_dev_space() end subroutine i_oacc_bld_x @@ -366,11 +366,11 @@ contains end function i_oacc_get_fmt - subroutine i_oacc_sync_space(x) + subroutine i_oacc_sync_dev_space(x) implicit none class(psb_i_vect_oacc), intent(inout) :: x if (allocated(x%v)) call acc_create(x%v) - end subroutine i_oacc_sync_space + end subroutine i_oacc_sync_dev_space subroutine i_oacc_sync(x) implicit none @@ -444,7 +444,7 @@ contains i_err=(/n, n, n, n, n/)) end if call x%set_host() - call x%sync_space() + call x%sync_dev_space() end subroutine i_oacc_vect_all subroutine i_oacc_final_vect_free(x) diff --git a/openacc/psb_l_oacc_vect_mod.F90 b/openacc/psb_l_oacc_vect_mod.F90 index 9ff100bc..eb9b2b9a 100644 --- a/openacc/psb_l_oacc_vect_mod.F90 +++ b/openacc/psb_l_oacc_vect_mod.F90 @@ -15,34 +15,34 @@ module psb_l_oacc_vect_mod integer :: state = is_host contains - procedure, pass(x) :: get_nrows => l_oacc_get_nrows - procedure, nopass :: get_fmt => l_oacc_get_fmt - - procedure, pass(x) :: all => l_oacc_vect_all - procedure, pass(x) :: zero => l_oacc_zero - procedure, pass(x) :: asb_m => l_oacc_asb_m - procedure, pass(x) :: sync => l_oacc_sync - procedure, pass(x) :: sync_space => l_oacc_sync_space - procedure, pass(x) :: bld_x => l_oacc_bld_x - procedure, pass(x) :: bld_mn => l_oacc_bld_mn - procedure, pass(x) :: free => l_oacc_vect_free - procedure, pass(x) :: ins_a => l_oacc_ins_a - procedure, pass(x) :: ins_v => l_oacc_ins_v - procedure, pass(x) :: is_host => l_oacc_is_host - procedure, pass(x) :: is_dev => l_oacc_is_dev - procedure, pass(x) :: is_sync => l_oacc_is_sync - procedure, pass(x) :: set_host => l_oacc_set_host - procedure, pass(x) :: set_dev => l_oacc_set_dev - procedure, pass(x) :: set_sync => l_oacc_set_sync - procedure, pass(x) :: set_scal => l_oacc_set_scal - - procedure, pass(x) :: gthzv_x => l_oacc_gthzv_x - procedure, pass(x) :: gthzbuf_x => l_oacc_gthzbuf - procedure, pass(y) :: sctb => l_oacc_sctb - procedure, pass(y) :: sctb_x => l_oacc_sctb_x - procedure, pass(y) :: sctb_buf => l_oacc_sctb_buf - - procedure, pass(x) :: get_size => l_oacc_get_size + procedure, pass(x) :: get_nrows => l_oacc_get_nrows + procedure, nopass :: get_fmt => l_oacc_get_fmt + + procedure, pass(x) :: all => l_oacc_vect_all + procedure, pass(x) :: zero => l_oacc_zero + procedure, pass(x) :: asb_m => l_oacc_asb_m + procedure, pass(x) :: sync => l_oacc_sync + procedure, pass(x) :: sync_dev_space => l_oacc_sync_dev_space + procedure, pass(x) :: bld_x => l_oacc_bld_x + procedure, pass(x) :: bld_mn => l_oacc_bld_mn + procedure, pass(x) :: free => l_oacc_vect_free + procedure, pass(x) :: ins_a => l_oacc_ins_a + procedure, pass(x) :: ins_v => l_oacc_ins_v + procedure, pass(x) :: is_host => l_oacc_is_host + procedure, pass(x) :: is_dev => l_oacc_is_dev + procedure, pass(x) :: is_sync => l_oacc_is_sync + procedure, pass(x) :: set_host => l_oacc_set_host + procedure, pass(x) :: set_dev => l_oacc_set_dev + procedure, pass(x) :: set_sync => l_oacc_set_sync + procedure, pass(x) :: set_scal => l_oacc_set_scal + + procedure, pass(x) :: gthzv_x => l_oacc_gthzv_x + procedure, pass(x) :: gthzbuf_x => l_oacc_gthzbuf + procedure, pass(y) :: sctb => l_oacc_sctb + procedure, pass(y) :: sctb_x => l_oacc_sctb_x + procedure, pass(y) :: sctb_buf => l_oacc_sctb_buf + + procedure, pass(x) :: get_size => l_oacc_get_size final :: l_oacc_final_vect_free end type psb_l_vect_oacc @@ -65,7 +65,7 @@ contains return end if - select type(ii => idx) + select type(ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() @@ -92,7 +92,7 @@ contains class(psb_l_vect_oacc) :: y integer(psb_ipk_) :: info, ni - select type(ii => idx) + select type(ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() class default @@ -145,7 +145,7 @@ contains return end if - select type(ii => idx) + select type(ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() class default @@ -172,7 +172,7 @@ contains info = 0 - select type(ii => idx) + select type(ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() class default @@ -204,9 +204,9 @@ contains if (psb_errstatus_fatal()) return done_oacc = .false. - select type(virl => irl) + select type(virl => irl) type is (psb_i_vect_oacc) - select type(vval => val) + select type(vval => val) type is (psb_l_vect_oacc) if (vval%is_host()) call vval%sync() if (virl%is_host()) call virl%sync() @@ -221,11 +221,11 @@ contains end select if (.not.done_oacc) then - select type(virl => irl) + select type(virl => irl) type is (psb_i_vect_oacc) if (virl%is_dev()) call virl%sync() end select - select type(vval => val) + select type(vval => val) type is (psb_l_vect_oacc) if (vval%is_dev()) call vval%sync() end select @@ -271,7 +271,7 @@ contains call psb_errpush(info, 'l_oacc_bld_mn', i_err=(/n, n, n, n, n/)) end if call x%set_host() - call x%sync_space() + call x%sync_dev_space() end subroutine l_oacc_bld_mn @@ -293,7 +293,7 @@ contains end if x%v(:) = this(:) call x%set_host() - call x%sync_space() + call x%sync_dev_space() end subroutine l_oacc_bld_x @@ -368,11 +368,11 @@ contains end function l_oacc_get_fmt - subroutine l_oacc_sync_space(x) + subroutine l_oacc_sync_dev_space(x) implicit none class(psb_l_vect_oacc), intent(inout) :: x if (allocated(x%v)) call acc_create(x%v) - end subroutine l_oacc_sync_space + end subroutine l_oacc_sync_dev_space subroutine l_oacc_sync(x) implicit none @@ -446,7 +446,7 @@ contains i_err=(/n, n, n, n, n/)) end if call x%set_host() - call x%sync_space() + call x%sync_dev_space() end subroutine l_oacc_vect_all subroutine l_oacc_final_vect_free(x) diff --git a/openacc/psb_s_oacc_csr_mat_mod.F90 b/openacc/psb_s_oacc_csr_mat_mod.F90 index b577daac..64a5f9a1 100644 --- a/openacc/psb_s_oacc_csr_mat_mod.F90 +++ b/openacc/psb_s_oacc_csr_mat_mod.F90 @@ -13,32 +13,32 @@ module psb_s_oacc_csr_mat_mod type, extends(psb_s_csr_sparse_mat) :: psb_s_oacc_csr_sparse_mat integer(psb_ipk_) :: devstate = is_host contains - procedure, nopass :: get_fmt => s_oacc_csr_get_fmt - procedure, pass(a) :: sizeof => s_oacc_csr_sizeof - procedure, pass(a) :: vect_mv => psb_s_oacc_csr_vect_mv - procedure, pass(a) :: in_vect_sv => psb_s_oacc_csr_inner_vect_sv - procedure, pass(a) :: csmm => psb_s_oacc_csr_csmm - procedure, pass(a) :: csmv => psb_s_oacc_csr_csmv - procedure, pass(a) :: scals => psb_s_oacc_csr_scals - procedure, pass(a) :: scalv => psb_s_oacc_csr_scal - procedure, pass(a) :: reallocate_nz => psb_s_oacc_csr_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_s_oacc_csr_allocate_mnnz - procedure, pass(a) :: cp_from_coo => psb_s_oacc_csr_cp_from_coo - procedure, pass(a) :: cp_from_fmt => psb_s_oacc_csr_cp_from_fmt - procedure, pass(a) :: mv_from_coo => psb_s_oacc_csr_mv_from_coo - procedure, pass(a) :: mv_from_fmt => psb_s_oacc_csr_mv_from_fmt - procedure, pass(a) :: free => s_oacc_csr_free - procedure, pass(a) :: mold => psb_s_oacc_csr_mold - procedure, pass(a) :: all => s_oacc_csr_all - procedure, pass(a) :: is_host => s_oacc_csr_is_host - procedure, pass(a) :: is_sync => s_oacc_csr_is_sync - procedure, pass(a) :: is_dev => s_oacc_csr_is_dev - procedure, pass(a) :: set_host => s_oacc_csr_set_host - procedure, pass(a) :: set_sync => s_oacc_csr_set_sync - procedure, pass(a) :: set_dev => s_oacc_csr_set_dev - procedure, pass(a) :: free_space => s_oacc_csr_free_space - procedure, pass(a) :: sync_space => s_oacc_csr_sync_space - procedure, pass(a) :: sync => s_oacc_csr_sync + procedure, nopass :: get_fmt => s_oacc_csr_get_fmt + procedure, pass(a) :: sizeof => s_oacc_csr_sizeof + procedure, pass(a) :: vect_mv => psb_s_oacc_csr_vect_mv + procedure, pass(a) :: in_vect_sv => psb_s_oacc_csr_inner_vect_sv + procedure, pass(a) :: csmm => psb_s_oacc_csr_csmm + procedure, pass(a) :: csmv => psb_s_oacc_csr_csmv + procedure, pass(a) :: scals => psb_s_oacc_csr_scals + procedure, pass(a) :: scalv => psb_s_oacc_csr_scal + procedure, pass(a) :: reallocate_nz => psb_s_oacc_csr_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_oacc_csr_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_s_oacc_csr_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_oacc_csr_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_oacc_csr_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_oacc_csr_mv_from_fmt + procedure, pass(a) :: free => s_oacc_csr_free + procedure, pass(a) :: mold => psb_s_oacc_csr_mold + procedure, pass(a) :: all => s_oacc_csr_all + procedure, pass(a) :: is_host => s_oacc_csr_is_host + procedure, pass(a) :: is_sync => s_oacc_csr_is_sync + procedure, pass(a) :: is_dev => s_oacc_csr_is_dev + procedure, pass(a) :: set_host => s_oacc_csr_set_host + procedure, pass(a) :: set_sync => s_oacc_csr_set_sync + procedure, pass(a) :: set_dev => s_oacc_csr_set_dev + procedure, pass(a) :: free_dev_space => s_oacc_csr_free_dev_space + procedure, pass(a) :: sync_dev_space => s_oacc_csr_sync_dev_space + procedure, pass(a) :: sync => s_oacc_csr_sync end type psb_s_oacc_csr_sparse_mat interface @@ -156,7 +156,7 @@ module psb_s_oacc_csr_mat_mod contains - subroutine s_oacc_csr_free_space(a) + subroutine s_oacc_csr_free_dev_space(a) use psb_base_mod implicit none class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a @@ -167,7 +167,7 @@ contains if (allocated(a%irp)) call acc_delete_finalize(a%irp) return - end subroutine s_oacc_csr_free_space + end subroutine s_oacc_csr_free_dev_space subroutine s_oacc_csr_free(a) use psb_base_mod @@ -175,7 +175,7 @@ contains class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - call a%free_space() + call a%free_dev_space() call a%psb_s_csr_sparse_mat%free() return @@ -218,7 +218,7 @@ contains allocate(a%ja(nz),stat=info) allocate(a%irp(m+1),stat=info) if (info == 0) call a%set_host() - if (info == 0) call a%sync_space() + if (info == 0) call a%sync_dev_space() end subroutine s_oacc_csr_all function s_oacc_csr_is_host(a) result(res) @@ -266,13 +266,13 @@ contains a%devstate = is_dev end subroutine s_oacc_csr_set_dev - subroutine s_oacc_csr_sync_space(a) + subroutine s_oacc_csr_sync_dev_space(a) implicit none class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a if (allocated(a%val)) call acc_create(a%val) if (allocated(a%ja)) call acc_create(a%ja) if (allocated(a%irp)) call acc_create(a%irp) - end subroutine s_oacc_csr_sync_space + end subroutine s_oacc_csr_sync_dev_space subroutine s_oacc_csr_sync(a) implicit none @@ -280,7 +280,7 @@ contains class(psb_s_oacc_csr_sparse_mat), pointer :: tmpa integer(psb_ipk_) :: info - tmpa => a + tmpa => a if (a%is_dev()) then call acc_update_self(a%val) call acc_update_self(a%ja) diff --git a/openacc/psb_s_oacc_ell_mat_mod.F90 b/openacc/psb_s_oacc_ell_mat_mod.F90 index 793bf353..2893c3e5 100644 --- a/openacc/psb_s_oacc_ell_mat_mod.F90 +++ b/openacc/psb_s_oacc_ell_mat_mod.F90 @@ -12,31 +12,31 @@ module psb_s_oacc_ell_mat_mod type, extends(psb_s_ell_sparse_mat) :: psb_s_oacc_ell_sparse_mat integer(psb_ipk_) :: devstate = is_host contains - procedure, nopass :: get_fmt => s_oacc_ell_get_fmt - procedure, pass(a) :: sizeof => s_oacc_ell_sizeof - procedure, pass(a) :: is_host => s_oacc_ell_is_host - procedure, pass(a) :: is_sync => s_oacc_ell_is_sync - procedure, pass(a) :: is_dev => s_oacc_ell_is_dev - procedure, pass(a) :: set_host => s_oacc_ell_set_host - procedure, pass(a) :: set_sync => s_oacc_ell_set_sync - procedure, pass(a) :: set_dev => s_oacc_ell_set_dev - procedure, pass(a) :: sync_space => s_oacc_ell_sync_space - procedure, pass(a) :: sync => s_oacc_ell_sync - procedure, pass(a) :: free_space => s_oacc_ell_free_space - procedure, pass(a) :: free => s_oacc_ell_free - procedure, pass(a) :: vect_mv => psb_s_oacc_ell_vect_mv - procedure, pass(a) :: in_vect_sv => psb_s_oacc_ell_inner_vect_sv - procedure, pass(a) :: csmm => psb_s_oacc_ell_csmm - procedure, pass(a) :: csmv => psb_s_oacc_ell_csmv - procedure, pass(a) :: scals => psb_s_oacc_ell_scals - procedure, pass(a) :: scalv => psb_s_oacc_ell_scal - procedure, pass(a) :: reallocate_nz => psb_s_oacc_ell_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_s_oacc_ell_allocate_mnnz - procedure, pass(a) :: cp_from_coo => psb_s_oacc_ell_cp_from_coo - procedure, pass(a) :: cp_from_fmt => psb_s_oacc_ell_cp_from_fmt - procedure, pass(a) :: mv_from_coo => psb_s_oacc_ell_mv_from_coo - procedure, pass(a) :: mv_from_fmt => psb_s_oacc_ell_mv_from_fmt - procedure, pass(a) :: mold => psb_s_oacc_ell_mold + procedure, nopass :: get_fmt => s_oacc_ell_get_fmt + procedure, pass(a) :: sizeof => s_oacc_ell_sizeof + procedure, pass(a) :: is_host => s_oacc_ell_is_host + procedure, pass(a) :: is_sync => s_oacc_ell_is_sync + procedure, pass(a) :: is_dev => s_oacc_ell_is_dev + procedure, pass(a) :: set_host => s_oacc_ell_set_host + procedure, pass(a) :: set_sync => s_oacc_ell_set_sync + procedure, pass(a) :: set_dev => s_oacc_ell_set_dev + procedure, pass(a) :: sync_dev_space => s_oacc_ell_sync_dev_space + procedure, pass(a) :: sync => s_oacc_ell_sync + procedure, pass(a) :: free_dev_space => s_oacc_ell_free_dev_space + procedure, pass(a) :: free => s_oacc_ell_free + procedure, pass(a) :: vect_mv => psb_s_oacc_ell_vect_mv + procedure, pass(a) :: in_vect_sv => psb_s_oacc_ell_inner_vect_sv + procedure, pass(a) :: csmm => psb_s_oacc_ell_csmm + procedure, pass(a) :: csmv => psb_s_oacc_ell_csmv + procedure, pass(a) :: scals => psb_s_oacc_ell_scals + procedure, pass(a) :: scalv => psb_s_oacc_ell_scal + procedure, pass(a) :: reallocate_nz => psb_s_oacc_ell_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_oacc_ell_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_s_oacc_ell_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_oacc_ell_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_oacc_ell_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_oacc_ell_mv_from_fmt + procedure, pass(a) :: mold => psb_s_oacc_ell_mold end type psb_s_oacc_ell_sparse_mat @@ -154,7 +154,7 @@ module psb_s_oacc_ell_mat_mod contains - subroutine s_oacc_ell_free_space(a) + subroutine s_oacc_ell_free_dev_space(a) use psb_base_mod implicit none class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a @@ -166,7 +166,7 @@ contains if (allocated(a%idiag)) call acc_delete_finalize(a%idiag) return - end subroutine s_oacc_ell_free_space + end subroutine s_oacc_ell_free_dev_space subroutine s_oacc_ell_free(a) use psb_base_mod @@ -174,7 +174,7 @@ contains class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - call a%free_space() + call a%free_dev_space() call a%psb_s_ell_sparse_mat%free() return @@ -195,7 +195,7 @@ contains end function s_oacc_ell_sizeof - subroutine s_oacc_ell_sync_space(a) + subroutine s_oacc_ell_sync_dev_space(a) implicit none class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a @@ -203,7 +203,7 @@ contains if (allocated(a%ja)) call acc_create(a%ja) if (allocated(a%irn)) call acc_create(a%irn) if (allocated(a%idiag)) call acc_create(a%idiag) - end subroutine s_oacc_ell_sync_space + end subroutine s_oacc_ell_sync_dev_space function s_oacc_ell_is_host(a) result(res) implicit none @@ -262,7 +262,7 @@ contains class(psb_s_oacc_ell_sparse_mat), pointer :: tmpa integer(psb_ipk_) :: info - tmpa => a + tmpa => a if (a%is_dev()) then call acc_update_self(a%val) call acc_update_self(a%ja) diff --git a/openacc/psb_s_oacc_hll_mat_mod.F90 b/openacc/psb_s_oacc_hll_mat_mod.F90 index 508591e4..583a281e 100644 --- a/openacc/psb_s_oacc_hll_mat_mod.F90 +++ b/openacc/psb_s_oacc_hll_mat_mod.F90 @@ -12,31 +12,31 @@ module psb_s_oacc_hll_mat_mod type, extends(psb_s_hll_sparse_mat) :: psb_s_oacc_hll_sparse_mat integer(psb_ipk_) :: devstate = is_host contains - procedure, nopass :: get_fmt => s_oacc_hll_get_fmt - procedure, pass(a) :: sizeof => s_oacc_hll_sizeof - procedure, pass(a) :: is_host => s_oacc_hll_is_host - procedure, pass(a) :: is_sync => s_oacc_hll_is_sync - procedure, pass(a) :: is_dev => s_oacc_hll_is_dev - procedure, pass(a) :: set_host => s_oacc_hll_set_host - procedure, pass(a) :: set_sync => s_oacc_hll_set_sync - procedure, pass(a) :: set_dev => s_oacc_hll_set_dev - procedure, pass(a) :: sync_space => s_oacc_hll_sync_space - procedure, pass(a) :: sync => s_oacc_hll_sync - procedure, pass(a) :: free_space => s_oacc_hll_free_space - procedure, pass(a) :: free => s_oacc_hll_free - procedure, pass(a) :: vect_mv => psb_s_oacc_hll_vect_mv - procedure, pass(a) :: in_vect_sv => psb_s_oacc_hll_inner_vect_sv - procedure, pass(a) :: csmm => psb_s_oacc_hll_csmm - procedure, pass(a) :: csmv => psb_s_oacc_hll_csmv - procedure, pass(a) :: scals => psb_s_oacc_hll_scals - procedure, pass(a) :: scalv => psb_s_oacc_hll_scal - procedure, pass(a) :: reallocate_nz => psb_s_oacc_hll_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_s_oacc_hll_allocate_mnnz - procedure, pass(a) :: cp_from_coo => psb_s_oacc_hll_cp_from_coo - procedure, pass(a) :: cp_from_fmt => psb_s_oacc_hll_cp_from_fmt - procedure, pass(a) :: mv_from_coo => psb_s_oacc_hll_mv_from_coo - procedure, pass(a) :: mv_from_fmt => psb_s_oacc_hll_mv_from_fmt - procedure, pass(a) :: mold => psb_s_oacc_hll_mold + procedure, nopass :: get_fmt => s_oacc_hll_get_fmt + procedure, pass(a) :: sizeof => s_oacc_hll_sizeof + procedure, pass(a) :: is_host => s_oacc_hll_is_host + procedure, pass(a) :: is_sync => s_oacc_hll_is_sync + procedure, pass(a) :: is_dev => s_oacc_hll_is_dev + procedure, pass(a) :: set_host => s_oacc_hll_set_host + procedure, pass(a) :: set_sync => s_oacc_hll_set_sync + procedure, pass(a) :: set_dev => s_oacc_hll_set_dev + procedure, pass(a) :: sync_dev_space => s_oacc_hll_sync_dev_space + procedure, pass(a) :: sync => s_oacc_hll_sync + procedure, pass(a) :: free_dev_space => s_oacc_hll_free_dev_space + procedure, pass(a) :: free => s_oacc_hll_free + procedure, pass(a) :: vect_mv => psb_s_oacc_hll_vect_mv + procedure, pass(a) :: in_vect_sv => psb_s_oacc_hll_inner_vect_sv + procedure, pass(a) :: csmm => psb_s_oacc_hll_csmm + procedure, pass(a) :: csmv => psb_s_oacc_hll_csmv + procedure, pass(a) :: scals => psb_s_oacc_hll_scals + procedure, pass(a) :: scalv => psb_s_oacc_hll_scal + procedure, pass(a) :: reallocate_nz => psb_s_oacc_hll_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_s_oacc_hll_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_s_oacc_hll_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_s_oacc_hll_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_s_oacc_hll_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_s_oacc_hll_mv_from_fmt + procedure, pass(a) :: mold => psb_s_oacc_hll_mold end type psb_s_oacc_hll_sparse_mat @@ -154,7 +154,7 @@ module psb_s_oacc_hll_mat_mod contains - subroutine s_oacc_hll_free_space(a) + subroutine s_oacc_hll_free_dev_space(a) use psb_base_mod implicit none class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a @@ -167,7 +167,7 @@ contains if (allocated(a%hkoffs)) call acc_delete_finalize(a%hkoffs) return - end subroutine s_oacc_hll_free_space + end subroutine s_oacc_hll_free_dev_space subroutine s_oacc_hll_free(a) use psb_base_mod @@ -175,7 +175,7 @@ contains class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - call a%free_space() + call a%free_dev_space() call a%psb_s_hll_sparse_mat%free() return @@ -249,7 +249,7 @@ contains res = 'HLLOA' end function s_oacc_hll_get_fmt - subroutine s_oacc_hll_sync_space(a) + subroutine s_oacc_hll_sync_dev_space(a) implicit none class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a @@ -258,7 +258,7 @@ contains if (allocated(a%irn)) call acc_create(a%irn) if (allocated(a%idiag)) call acc_create(a%idiag) if (allocated(a%hkoffs)) call acc_create(a%hkoffs) - end subroutine s_oacc_hll_sync_space + end subroutine s_oacc_hll_sync_dev_space subroutine s_oacc_hll_sync(a) @@ -267,7 +267,7 @@ contains class(psb_s_oacc_hll_sparse_mat), pointer :: tmpa integer(psb_ipk_) :: info - tmpa => a + tmpa => a if (a%is_dev()) then call acc_update_self(a%val) call acc_update_self(a%ja) diff --git a/openacc/psb_s_oacc_vect_mod.F90 b/openacc/psb_s_oacc_vect_mod.F90 index 7ce4292f..16b45461 100644 --- a/openacc/psb_s_oacc_vect_mod.F90 +++ b/openacc/psb_s_oacc_vect_mod.F90 @@ -15,50 +15,50 @@ module psb_s_oacc_vect_mod integer :: state = is_host contains - procedure, pass(x) :: get_nrows => s_oacc_get_nrows - procedure, nopass :: get_fmt => s_oacc_get_fmt - - procedure, pass(x) :: all => s_oacc_vect_all - procedure, pass(x) :: zero => s_oacc_zero - procedure, pass(x) :: asb_m => s_oacc_asb_m - procedure, pass(x) :: sync => s_oacc_sync - procedure, pass(x) :: sync_space => s_oacc_sync_space - procedure, pass(x) :: bld_x => s_oacc_bld_x - procedure, pass(x) :: bld_mn => s_oacc_bld_mn - procedure, pass(x) :: free => s_oacc_vect_free - procedure, pass(x) :: ins_a => s_oacc_ins_a - procedure, pass(x) :: ins_v => s_oacc_ins_v - procedure, pass(x) :: is_host => s_oacc_is_host - procedure, pass(x) :: is_dev => s_oacc_is_dev - procedure, pass(x) :: is_sync => s_oacc_is_sync - procedure, pass(x) :: set_host => s_oacc_set_host - procedure, pass(x) :: set_dev => s_oacc_set_dev - procedure, pass(x) :: set_sync => s_oacc_set_sync - procedure, pass(x) :: set_scal => s_oacc_set_scal - - procedure, pass(x) :: gthzv_x => s_oacc_gthzv_x - procedure, pass(x) :: gthzbuf_x => s_oacc_gthzbuf - procedure, pass(y) :: sctb => s_oacc_sctb - procedure, pass(y) :: sctb_x => s_oacc_sctb_x - procedure, pass(y) :: sctb_buf => s_oacc_sctb_buf - - procedure, pass(x) :: get_size => s_oacc_get_size - - procedure, pass(x) :: dot_v => s_oacc_vect_dot - procedure, pass(x) :: dot_a => s_oacc_dot_a - procedure, pass(y) :: axpby_v => s_oacc_axpby_v - procedure, pass(y) :: axpby_a => s_oacc_axpby_a - procedure, pass(z) :: upd_xyz => s_oacc_upd_xyz - procedure, pass(y) :: mlt_a => s_oacc_mlt_a - procedure, pass(z) :: mlt_a_2 => s_oacc_mlt_a_2 - procedure, pass(y) :: mlt_v => psb_s_oacc_mlt_v - procedure, pass(z) :: mlt_v_2 => psb_s_oacc_mlt_v_2 - procedure, pass(x) :: scal => s_oacc_scal - procedure, pass(x) :: nrm2 => s_oacc_nrm2 - procedure, pass(x) :: amax => s_oacc_amax - procedure, pass(x) :: asum => s_oacc_asum - procedure, pass(x) :: absval1 => s_oacc_absval1 - procedure, pass(x) :: absval2 => s_oacc_absval2 + procedure, pass(x) :: get_nrows => s_oacc_get_nrows + procedure, nopass :: get_fmt => s_oacc_get_fmt + + procedure, pass(x) :: all => s_oacc_vect_all + procedure, pass(x) :: zero => s_oacc_zero + procedure, pass(x) :: asb_m => s_oacc_asb_m + procedure, pass(x) :: sync => s_oacc_sync + procedure, pass(x) :: sync_dev_space => s_oacc_sync_dev_space + procedure, pass(x) :: bld_x => s_oacc_bld_x + procedure, pass(x) :: bld_mn => s_oacc_bld_mn + procedure, pass(x) :: free => s_oacc_vect_free + procedure, pass(x) :: ins_a => s_oacc_ins_a + procedure, pass(x) :: ins_v => s_oacc_ins_v + procedure, pass(x) :: is_host => s_oacc_is_host + procedure, pass(x) :: is_dev => s_oacc_is_dev + procedure, pass(x) :: is_sync => s_oacc_is_sync + procedure, pass(x) :: set_host => s_oacc_set_host + procedure, pass(x) :: set_dev => s_oacc_set_dev + procedure, pass(x) :: set_sync => s_oacc_set_sync + procedure, pass(x) :: set_scal => s_oacc_set_scal + + procedure, pass(x) :: gthzv_x => s_oacc_gthzv_x + procedure, pass(x) :: gthzbuf_x => s_oacc_gthzbuf + procedure, pass(y) :: sctb => s_oacc_sctb + procedure, pass(y) :: sctb_x => s_oacc_sctb_x + procedure, pass(y) :: sctb_buf => s_oacc_sctb_buf + + procedure, pass(x) :: get_size => s_oacc_get_size + + procedure, pass(x) :: dot_v => s_oacc_vect_dot + procedure, pass(x) :: dot_a => s_oacc_dot_a + procedure, pass(y) :: axpby_v => s_oacc_axpby_v + procedure, pass(y) :: axpby_a => s_oacc_axpby_a + procedure, pass(z) :: upd_xyz => s_oacc_upd_xyz + procedure, pass(y) :: mlt_a => s_oacc_mlt_a + procedure, pass(z) :: mlt_a_2 => s_oacc_mlt_a_2 + procedure, pass(y) :: mlt_v => psb_s_oacc_mlt_v + procedure, pass(z) :: mlt_v_2 => psb_s_oacc_mlt_v_2 + procedure, pass(x) :: scal => s_oacc_scal + procedure, pass(x) :: nrm2 => s_oacc_nrm2 + procedure, pass(x) :: amax => s_oacc_amax + procedure, pass(x) :: asum => s_oacc_asum + procedure, pass(x) :: absval1 => s_oacc_absval1 + procedure, pass(x) :: absval2 => s_oacc_absval2 final :: s_oacc_final_vect_free end type psb_s_vect_oacc @@ -117,7 +117,7 @@ contains integer(psb_ipk_) :: i n = min(size(x%v), size(y%v)) - select type (yy => y) + select type (yy => y) class is (psb_s_vect_oacc) if (x%is_host()) call x%sync() if (yy%is_host()) call yy%sync() @@ -287,7 +287,7 @@ contains info = psb_success_ - select type(xx => x) + select type(xx => x) type is (psb_s_vect_oacc) if ((beta /= szero) .and. y%is_host()) call y%sync() if (xx%is_host()) call xx%sync() @@ -354,11 +354,11 @@ contains info = psb_success_ gpu_done = .false. - select type(xx => x) + select type(xx => x) class is (psb_s_vect_oacc) - select type(yy => y) + select type(yy => y) class is (psb_s_vect_oacc) - select type(zz => z) + select type(zz => z) class is (psb_s_vect_oacc) if ((beta /= szero) .and. yy%is_host()) call yy%sync() if ((delta /= szero) .and. zz%is_host()) call zz%sync() @@ -405,7 +405,7 @@ contains return end if - select type(ii => idx) + select type(ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() @@ -432,7 +432,7 @@ contains class(psb_s_vect_oacc) :: y integer(psb_ipk_) :: info, ni - select type(ii => idx) + select type(ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() class default @@ -485,7 +485,7 @@ contains return end if - select type(ii => idx) + select type(ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() class default @@ -512,7 +512,7 @@ contains info = 0 - select type(ii => idx) + select type(ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() class default @@ -544,9 +544,9 @@ contains if (psb_errstatus_fatal()) return done_oacc = .false. - select type(virl => irl) + select type(virl => irl) type is (psb_i_vect_oacc) - select type(vval => val) + select type(vval => val) type is (psb_s_vect_oacc) if (vval%is_host()) call vval%sync() if (virl%is_host()) call virl%sync() @@ -561,11 +561,11 @@ contains end select if (.not.done_oacc) then - select type(virl => irl) + select type(virl => irl) type is (psb_i_vect_oacc) if (virl%is_dev()) call virl%sync() end select - select type(vval => val) + select type(vval => val) type is (psb_s_vect_oacc) if (vval%is_dev()) call vval%sync() end select @@ -611,7 +611,7 @@ contains call psb_errpush(info, 's_oacc_bld_mn', i_err=(/n, n, n, n, n/)) end if call x%set_host() - call x%sync_space() + call x%sync_dev_space() end subroutine s_oacc_bld_mn @@ -633,7 +633,7 @@ contains end if x%v(:) = this(:) call x%set_host() - call x%sync_space() + call x%sync_dev_space() end subroutine s_oacc_bld_x @@ -719,7 +719,7 @@ contains res = szero !write(0,*) 'dot_v' - select type(yy => y) + select type(yy => y) type is (psb_s_base_vect_type) if (x%is_dev()) call x%sync() res = ddot(n, x%v, 1, yy%v, 1) @@ -762,11 +762,11 @@ contains end function s_oacc_dot_a - subroutine s_oacc_sync_space(x) + subroutine s_oacc_sync_dev_space(x) implicit none class(psb_s_vect_oacc), intent(inout) :: x if (allocated(x%v)) call acc_create(x%v) - end subroutine s_oacc_sync_space + end subroutine s_oacc_sync_dev_space subroutine s_oacc_sync(x) implicit none @@ -840,7 +840,7 @@ contains i_err=(/n, n, n, n, n/)) end if call x%set_host() - call x%sync_space() + call x%sync_dev_space() end subroutine s_oacc_vect_all subroutine s_oacc_final_vect_free(x) diff --git a/openacc/psb_z_oacc_csr_mat_mod.F90 b/openacc/psb_z_oacc_csr_mat_mod.F90 index dbafb391..47973825 100644 --- a/openacc/psb_z_oacc_csr_mat_mod.F90 +++ b/openacc/psb_z_oacc_csr_mat_mod.F90 @@ -13,32 +13,32 @@ module psb_z_oacc_csr_mat_mod type, extends(psb_z_csr_sparse_mat) :: psb_z_oacc_csr_sparse_mat integer(psb_ipk_) :: devstate = is_host contains - procedure, nopass :: get_fmt => z_oacc_csr_get_fmt - procedure, pass(a) :: sizeof => z_oacc_csr_sizeof - procedure, pass(a) :: vect_mv => psb_z_oacc_csr_vect_mv - procedure, pass(a) :: in_vect_sv => psb_z_oacc_csr_inner_vect_sv - procedure, pass(a) :: csmm => psb_z_oacc_csr_csmm - procedure, pass(a) :: csmv => psb_z_oacc_csr_csmv - procedure, pass(a) :: scals => psb_z_oacc_csr_scals - procedure, pass(a) :: scalv => psb_z_oacc_csr_scal - procedure, pass(a) :: reallocate_nz => psb_z_oacc_csr_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_z_oacc_csr_allocate_mnnz - procedure, pass(a) :: cp_from_coo => psb_z_oacc_csr_cp_from_coo - procedure, pass(a) :: cp_from_fmt => psb_z_oacc_csr_cp_from_fmt - procedure, pass(a) :: mv_from_coo => psb_z_oacc_csr_mv_from_coo - procedure, pass(a) :: mv_from_fmt => psb_z_oacc_csr_mv_from_fmt - procedure, pass(a) :: free => z_oacc_csr_free - procedure, pass(a) :: mold => psb_z_oacc_csr_mold - procedure, pass(a) :: all => z_oacc_csr_all - procedure, pass(a) :: is_host => z_oacc_csr_is_host - procedure, pass(a) :: is_sync => z_oacc_csr_is_sync - procedure, pass(a) :: is_dev => z_oacc_csr_is_dev - procedure, pass(a) :: set_host => z_oacc_csr_set_host - procedure, pass(a) :: set_sync => z_oacc_csr_set_sync - procedure, pass(a) :: set_dev => z_oacc_csr_set_dev - procedure, pass(a) :: free_space => z_oacc_csr_free_space - procedure, pass(a) :: sync_space => z_oacc_csr_sync_space - procedure, pass(a) :: sync => z_oacc_csr_sync + procedure, nopass :: get_fmt => z_oacc_csr_get_fmt + procedure, pass(a) :: sizeof => z_oacc_csr_sizeof + procedure, pass(a) :: vect_mv => psb_z_oacc_csr_vect_mv + procedure, pass(a) :: in_vect_sv => psb_z_oacc_csr_inner_vect_sv + procedure, pass(a) :: csmm => psb_z_oacc_csr_csmm + procedure, pass(a) :: csmv => psb_z_oacc_csr_csmv + procedure, pass(a) :: scals => psb_z_oacc_csr_scals + procedure, pass(a) :: scalv => psb_z_oacc_csr_scal + procedure, pass(a) :: reallocate_nz => psb_z_oacc_csr_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_oacc_csr_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_z_oacc_csr_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_oacc_csr_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_oacc_csr_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_oacc_csr_mv_from_fmt + procedure, pass(a) :: free => z_oacc_csr_free + procedure, pass(a) :: mold => psb_z_oacc_csr_mold + procedure, pass(a) :: all => z_oacc_csr_all + procedure, pass(a) :: is_host => z_oacc_csr_is_host + procedure, pass(a) :: is_sync => z_oacc_csr_is_sync + procedure, pass(a) :: is_dev => z_oacc_csr_is_dev + procedure, pass(a) :: set_host => z_oacc_csr_set_host + procedure, pass(a) :: set_sync => z_oacc_csr_set_sync + procedure, pass(a) :: set_dev => z_oacc_csr_set_dev + procedure, pass(a) :: free_dev_space => z_oacc_csr_free_dev_space + procedure, pass(a) :: sync_dev_space => z_oacc_csr_sync_dev_space + procedure, pass(a) :: sync => z_oacc_csr_sync end type psb_z_oacc_csr_sparse_mat interface @@ -156,7 +156,7 @@ module psb_z_oacc_csr_mat_mod contains - subroutine z_oacc_csr_free_space(a) + subroutine z_oacc_csr_free_dev_space(a) use psb_base_mod implicit none class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a @@ -167,7 +167,7 @@ contains if (allocated(a%irp)) call acc_delete_finalize(a%irp) return - end subroutine z_oacc_csr_free_space + end subroutine z_oacc_csr_free_dev_space subroutine z_oacc_csr_free(a) use psb_base_mod @@ -175,7 +175,7 @@ contains class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - call a%free_space() + call a%free_dev_space() call a%psb_z_csr_sparse_mat%free() return @@ -218,7 +218,7 @@ contains allocate(a%ja(nz),stat=info) allocate(a%irp(m+1),stat=info) if (info == 0) call a%set_host() - if (info == 0) call a%sync_space() + if (info == 0) call a%sync_dev_space() end subroutine z_oacc_csr_all function z_oacc_csr_is_host(a) result(res) @@ -266,13 +266,13 @@ contains a%devstate = is_dev end subroutine z_oacc_csr_set_dev - subroutine z_oacc_csr_sync_space(a) + subroutine z_oacc_csr_sync_dev_space(a) implicit none class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a if (allocated(a%val)) call acc_create(a%val) if (allocated(a%ja)) call acc_create(a%ja) if (allocated(a%irp)) call acc_create(a%irp) - end subroutine z_oacc_csr_sync_space + end subroutine z_oacc_csr_sync_dev_space subroutine z_oacc_csr_sync(a) implicit none @@ -280,7 +280,7 @@ contains class(psb_z_oacc_csr_sparse_mat), pointer :: tmpa integer(psb_ipk_) :: info - tmpa => a + tmpa => a if (a%is_dev()) then call acc_update_self(a%val) call acc_update_self(a%ja) diff --git a/openacc/psb_z_oacc_ell_mat_mod.F90 b/openacc/psb_z_oacc_ell_mat_mod.F90 index 76b0182d..4013ee9d 100644 --- a/openacc/psb_z_oacc_ell_mat_mod.F90 +++ b/openacc/psb_z_oacc_ell_mat_mod.F90 @@ -12,31 +12,31 @@ module psb_z_oacc_ell_mat_mod type, extends(psb_z_ell_sparse_mat) :: psb_z_oacc_ell_sparse_mat integer(psb_ipk_) :: devstate = is_host contains - procedure, nopass :: get_fmt => z_oacc_ell_get_fmt - procedure, pass(a) :: sizeof => z_oacc_ell_sizeof - procedure, pass(a) :: is_host => z_oacc_ell_is_host - procedure, pass(a) :: is_sync => z_oacc_ell_is_sync - procedure, pass(a) :: is_dev => z_oacc_ell_is_dev - procedure, pass(a) :: set_host => z_oacc_ell_set_host - procedure, pass(a) :: set_sync => z_oacc_ell_set_sync - procedure, pass(a) :: set_dev => z_oacc_ell_set_dev - procedure, pass(a) :: sync_space => z_oacc_ell_sync_space - procedure, pass(a) :: sync => z_oacc_ell_sync - procedure, pass(a) :: free_space => z_oacc_ell_free_space - procedure, pass(a) :: free => z_oacc_ell_free - procedure, pass(a) :: vect_mv => psb_z_oacc_ell_vect_mv - procedure, pass(a) :: in_vect_sv => psb_z_oacc_ell_inner_vect_sv - procedure, pass(a) :: csmm => psb_z_oacc_ell_csmm - procedure, pass(a) :: csmv => psb_z_oacc_ell_csmv - procedure, pass(a) :: scals => psb_z_oacc_ell_scals - procedure, pass(a) :: scalv => psb_z_oacc_ell_scal - procedure, pass(a) :: reallocate_nz => psb_z_oacc_ell_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_z_oacc_ell_allocate_mnnz - procedure, pass(a) :: cp_from_coo => psb_z_oacc_ell_cp_from_coo - procedure, pass(a) :: cp_from_fmt => psb_z_oacc_ell_cp_from_fmt - procedure, pass(a) :: mv_from_coo => psb_z_oacc_ell_mv_from_coo - procedure, pass(a) :: mv_from_fmt => psb_z_oacc_ell_mv_from_fmt - procedure, pass(a) :: mold => psb_z_oacc_ell_mold + procedure, nopass :: get_fmt => z_oacc_ell_get_fmt + procedure, pass(a) :: sizeof => z_oacc_ell_sizeof + procedure, pass(a) :: is_host => z_oacc_ell_is_host + procedure, pass(a) :: is_sync => z_oacc_ell_is_sync + procedure, pass(a) :: is_dev => z_oacc_ell_is_dev + procedure, pass(a) :: set_host => z_oacc_ell_set_host + procedure, pass(a) :: set_sync => z_oacc_ell_set_sync + procedure, pass(a) :: set_dev => z_oacc_ell_set_dev + procedure, pass(a) :: sync_dev_space => z_oacc_ell_sync_dev_space + procedure, pass(a) :: sync => z_oacc_ell_sync + procedure, pass(a) :: free_dev_space => z_oacc_ell_free_dev_space + procedure, pass(a) :: free => z_oacc_ell_free + procedure, pass(a) :: vect_mv => psb_z_oacc_ell_vect_mv + procedure, pass(a) :: in_vect_sv => psb_z_oacc_ell_inner_vect_sv + procedure, pass(a) :: csmm => psb_z_oacc_ell_csmm + procedure, pass(a) :: csmv => psb_z_oacc_ell_csmv + procedure, pass(a) :: scals => psb_z_oacc_ell_scals + procedure, pass(a) :: scalv => psb_z_oacc_ell_scal + procedure, pass(a) :: reallocate_nz => psb_z_oacc_ell_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_oacc_ell_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_z_oacc_ell_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_oacc_ell_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_oacc_ell_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_oacc_ell_mv_from_fmt + procedure, pass(a) :: mold => psb_z_oacc_ell_mold end type psb_z_oacc_ell_sparse_mat @@ -154,7 +154,7 @@ module psb_z_oacc_ell_mat_mod contains - subroutine z_oacc_ell_free_space(a) + subroutine z_oacc_ell_free_dev_space(a) use psb_base_mod implicit none class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a @@ -166,7 +166,7 @@ contains if (allocated(a%idiag)) call acc_delete_finalize(a%idiag) return - end subroutine z_oacc_ell_free_space + end subroutine z_oacc_ell_free_dev_space subroutine z_oacc_ell_free(a) use psb_base_mod @@ -174,7 +174,7 @@ contains class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - call a%free_space() + call a%free_dev_space() call a%psb_z_ell_sparse_mat%free() return @@ -195,7 +195,7 @@ contains end function z_oacc_ell_sizeof - subroutine z_oacc_ell_sync_space(a) + subroutine z_oacc_ell_sync_dev_space(a) implicit none class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a @@ -203,7 +203,7 @@ contains if (allocated(a%ja)) call acc_create(a%ja) if (allocated(a%irn)) call acc_create(a%irn) if (allocated(a%idiag)) call acc_create(a%idiag) - end subroutine z_oacc_ell_sync_space + end subroutine z_oacc_ell_sync_dev_space function z_oacc_ell_is_host(a) result(res) implicit none @@ -262,7 +262,7 @@ contains class(psb_z_oacc_ell_sparse_mat), pointer :: tmpa integer(psb_ipk_) :: info - tmpa => a + tmpa => a if (a%is_dev()) then call acc_update_self(a%val) call acc_update_self(a%ja) diff --git a/openacc/psb_z_oacc_hll_mat_mod.F90 b/openacc/psb_z_oacc_hll_mat_mod.F90 index 4a657fd1..289b78ac 100644 --- a/openacc/psb_z_oacc_hll_mat_mod.F90 +++ b/openacc/psb_z_oacc_hll_mat_mod.F90 @@ -12,31 +12,31 @@ module psb_z_oacc_hll_mat_mod type, extends(psb_z_hll_sparse_mat) :: psb_z_oacc_hll_sparse_mat integer(psb_ipk_) :: devstate = is_host contains - procedure, nopass :: get_fmt => z_oacc_hll_get_fmt - procedure, pass(a) :: sizeof => z_oacc_hll_sizeof - procedure, pass(a) :: is_host => z_oacc_hll_is_host - procedure, pass(a) :: is_sync => z_oacc_hll_is_sync - procedure, pass(a) :: is_dev => z_oacc_hll_is_dev - procedure, pass(a) :: set_host => z_oacc_hll_set_host - procedure, pass(a) :: set_sync => z_oacc_hll_set_sync - procedure, pass(a) :: set_dev => z_oacc_hll_set_dev - procedure, pass(a) :: sync_space => z_oacc_hll_sync_space - procedure, pass(a) :: sync => z_oacc_hll_sync - procedure, pass(a) :: free_space => z_oacc_hll_free_space - procedure, pass(a) :: free => z_oacc_hll_free - procedure, pass(a) :: vect_mv => psb_z_oacc_hll_vect_mv - procedure, pass(a) :: in_vect_sv => psb_z_oacc_hll_inner_vect_sv - procedure, pass(a) :: csmm => psb_z_oacc_hll_csmm - procedure, pass(a) :: csmv => psb_z_oacc_hll_csmv - procedure, pass(a) :: scals => psb_z_oacc_hll_scals - procedure, pass(a) :: scalv => psb_z_oacc_hll_scal - procedure, pass(a) :: reallocate_nz => psb_z_oacc_hll_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_z_oacc_hll_allocate_mnnz - procedure, pass(a) :: cp_from_coo => psb_z_oacc_hll_cp_from_coo - procedure, pass(a) :: cp_from_fmt => psb_z_oacc_hll_cp_from_fmt - procedure, pass(a) :: mv_from_coo => psb_z_oacc_hll_mv_from_coo - procedure, pass(a) :: mv_from_fmt => psb_z_oacc_hll_mv_from_fmt - procedure, pass(a) :: mold => psb_z_oacc_hll_mold + procedure, nopass :: get_fmt => z_oacc_hll_get_fmt + procedure, pass(a) :: sizeof => z_oacc_hll_sizeof + procedure, pass(a) :: is_host => z_oacc_hll_is_host + procedure, pass(a) :: is_sync => z_oacc_hll_is_sync + procedure, pass(a) :: is_dev => z_oacc_hll_is_dev + procedure, pass(a) :: set_host => z_oacc_hll_set_host + procedure, pass(a) :: set_sync => z_oacc_hll_set_sync + procedure, pass(a) :: set_dev => z_oacc_hll_set_dev + procedure, pass(a) :: sync_dev_space => z_oacc_hll_sync_dev_space + procedure, pass(a) :: sync => z_oacc_hll_sync + procedure, pass(a) :: free_dev_space => z_oacc_hll_free_dev_space + procedure, pass(a) :: free => z_oacc_hll_free + procedure, pass(a) :: vect_mv => psb_z_oacc_hll_vect_mv + procedure, pass(a) :: in_vect_sv => psb_z_oacc_hll_inner_vect_sv + procedure, pass(a) :: csmm => psb_z_oacc_hll_csmm + procedure, pass(a) :: csmv => psb_z_oacc_hll_csmv + procedure, pass(a) :: scals => psb_z_oacc_hll_scals + procedure, pass(a) :: scalv => psb_z_oacc_hll_scal + procedure, pass(a) :: reallocate_nz => psb_z_oacc_hll_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_z_oacc_hll_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_z_oacc_hll_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_z_oacc_hll_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_z_oacc_hll_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_z_oacc_hll_mv_from_fmt + procedure, pass(a) :: mold => psb_z_oacc_hll_mold end type psb_z_oacc_hll_sparse_mat @@ -154,7 +154,7 @@ module psb_z_oacc_hll_mat_mod contains - subroutine z_oacc_hll_free_space(a) + subroutine z_oacc_hll_free_dev_space(a) use psb_base_mod implicit none class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a @@ -167,7 +167,7 @@ contains if (allocated(a%hkoffs)) call acc_delete_finalize(a%hkoffs) return - end subroutine z_oacc_hll_free_space + end subroutine z_oacc_hll_free_dev_space subroutine z_oacc_hll_free(a) use psb_base_mod @@ -175,7 +175,7 @@ contains class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - call a%free_space() + call a%free_dev_space() call a%psb_z_hll_sparse_mat%free() return @@ -249,7 +249,7 @@ contains res = 'HLLOA' end function z_oacc_hll_get_fmt - subroutine z_oacc_hll_sync_space(a) + subroutine z_oacc_hll_sync_dev_space(a) implicit none class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a @@ -258,7 +258,7 @@ contains if (allocated(a%irn)) call acc_create(a%irn) if (allocated(a%idiag)) call acc_create(a%idiag) if (allocated(a%hkoffs)) call acc_create(a%hkoffs) - end subroutine z_oacc_hll_sync_space + end subroutine z_oacc_hll_sync_dev_space subroutine z_oacc_hll_sync(a) @@ -267,7 +267,7 @@ contains class(psb_z_oacc_hll_sparse_mat), pointer :: tmpa integer(psb_ipk_) :: info - tmpa => a + tmpa => a if (a%is_dev()) then call acc_update_self(a%val) call acc_update_self(a%ja) diff --git a/openacc/psb_z_oacc_vect_mod.F90 b/openacc/psb_z_oacc_vect_mod.F90 index 5d6f07be..9e6bbb2d 100644 --- a/openacc/psb_z_oacc_vect_mod.F90 +++ b/openacc/psb_z_oacc_vect_mod.F90 @@ -15,50 +15,50 @@ module psb_z_oacc_vect_mod integer :: state = is_host contains - procedure, pass(x) :: get_nrows => z_oacc_get_nrows - procedure, nopass :: get_fmt => z_oacc_get_fmt - - procedure, pass(x) :: all => z_oacc_vect_all - procedure, pass(x) :: zero => z_oacc_zero - procedure, pass(x) :: asb_m => z_oacc_asb_m - procedure, pass(x) :: sync => z_oacc_sync - procedure, pass(x) :: sync_space => z_oacc_sync_space - procedure, pass(x) :: bld_x => z_oacc_bld_x - procedure, pass(x) :: bld_mn => z_oacc_bld_mn - procedure, pass(x) :: free => z_oacc_vect_free - procedure, pass(x) :: ins_a => z_oacc_ins_a - procedure, pass(x) :: ins_v => z_oacc_ins_v - procedure, pass(x) :: is_host => z_oacc_is_host - procedure, pass(x) :: is_dev => z_oacc_is_dev - procedure, pass(x) :: is_sync => z_oacc_is_sync - procedure, pass(x) :: set_host => z_oacc_set_host - procedure, pass(x) :: set_dev => z_oacc_set_dev - procedure, pass(x) :: set_sync => z_oacc_set_sync - procedure, pass(x) :: set_scal => z_oacc_set_scal - - procedure, pass(x) :: gthzv_x => z_oacc_gthzv_x - procedure, pass(x) :: gthzbuf_x => z_oacc_gthzbuf - procedure, pass(y) :: sctb => z_oacc_sctb - procedure, pass(y) :: sctb_x => z_oacc_sctb_x - procedure, pass(y) :: sctb_buf => z_oacc_sctb_buf - - procedure, pass(x) :: get_size => z_oacc_get_size - - procedure, pass(x) :: dot_v => z_oacc_vect_dot - procedure, pass(x) :: dot_a => z_oacc_dot_a - procedure, pass(y) :: axpby_v => z_oacc_axpby_v - procedure, pass(y) :: axpby_a => z_oacc_axpby_a - procedure, pass(z) :: upd_xyz => z_oacc_upd_xyz - procedure, pass(y) :: mlt_a => z_oacc_mlt_a - procedure, pass(z) :: mlt_a_2 => z_oacc_mlt_a_2 - procedure, pass(y) :: mlt_v => psb_z_oacc_mlt_v - procedure, pass(z) :: mlt_v_2 => psb_z_oacc_mlt_v_2 - procedure, pass(x) :: scal => z_oacc_scal - procedure, pass(x) :: nrm2 => z_oacc_nrm2 - procedure, pass(x) :: amax => z_oacc_amax - procedure, pass(x) :: asum => z_oacc_asum - procedure, pass(x) :: absval1 => z_oacc_absval1 - procedure, pass(x) :: absval2 => z_oacc_absval2 + procedure, pass(x) :: get_nrows => z_oacc_get_nrows + procedure, nopass :: get_fmt => z_oacc_get_fmt + + procedure, pass(x) :: all => z_oacc_vect_all + procedure, pass(x) :: zero => z_oacc_zero + procedure, pass(x) :: asb_m => z_oacc_asb_m + procedure, pass(x) :: sync => z_oacc_sync + procedure, pass(x) :: sync_dev_space => z_oacc_sync_dev_space + procedure, pass(x) :: bld_x => z_oacc_bld_x + procedure, pass(x) :: bld_mn => z_oacc_bld_mn + procedure, pass(x) :: free => z_oacc_vect_free + procedure, pass(x) :: ins_a => z_oacc_ins_a + procedure, pass(x) :: ins_v => z_oacc_ins_v + procedure, pass(x) :: is_host => z_oacc_is_host + procedure, pass(x) :: is_dev => z_oacc_is_dev + procedure, pass(x) :: is_sync => z_oacc_is_sync + procedure, pass(x) :: set_host => z_oacc_set_host + procedure, pass(x) :: set_dev => z_oacc_set_dev + procedure, pass(x) :: set_sync => z_oacc_set_sync + procedure, pass(x) :: set_scal => z_oacc_set_scal + + procedure, pass(x) :: gthzv_x => z_oacc_gthzv_x + procedure, pass(x) :: gthzbuf_x => z_oacc_gthzbuf + procedure, pass(y) :: sctb => z_oacc_sctb + procedure, pass(y) :: sctb_x => z_oacc_sctb_x + procedure, pass(y) :: sctb_buf => z_oacc_sctb_buf + + procedure, pass(x) :: get_size => z_oacc_get_size + + procedure, pass(x) :: dot_v => z_oacc_vect_dot + procedure, pass(x) :: dot_a => z_oacc_dot_a + procedure, pass(y) :: axpby_v => z_oacc_axpby_v + procedure, pass(y) :: axpby_a => z_oacc_axpby_a + procedure, pass(z) :: upd_xyz => z_oacc_upd_xyz + procedure, pass(y) :: mlt_a => z_oacc_mlt_a + procedure, pass(z) :: mlt_a_2 => z_oacc_mlt_a_2 + procedure, pass(y) :: mlt_v => psb_z_oacc_mlt_v + procedure, pass(z) :: mlt_v_2 => psb_z_oacc_mlt_v_2 + procedure, pass(x) :: scal => z_oacc_scal + procedure, pass(x) :: nrm2 => z_oacc_nrm2 + procedure, pass(x) :: amax => z_oacc_amax + procedure, pass(x) :: asum => z_oacc_asum + procedure, pass(x) :: absval1 => z_oacc_absval1 + procedure, pass(x) :: absval2 => z_oacc_absval2 final :: z_oacc_final_vect_free end type psb_z_vect_oacc @@ -117,7 +117,7 @@ contains integer(psb_ipk_) :: i n = min(size(x%v), size(y%v)) - select type (yy => y) + select type (yy => y) class is (psb_z_vect_oacc) if (x%is_host()) call x%sync() if (yy%is_host()) call yy%sync() @@ -287,7 +287,7 @@ contains info = psb_success_ - select type(xx => x) + select type(xx => x) type is (psb_z_vect_oacc) if ((beta /= zzero) .and. y%is_host()) call y%sync() if (xx%is_host()) call xx%sync() @@ -354,11 +354,11 @@ contains info = psb_success_ gpu_done = .false. - select type(xx => x) + select type(xx => x) class is (psb_z_vect_oacc) - select type(yy => y) + select type(yy => y) class is (psb_z_vect_oacc) - select type(zz => z) + select type(zz => z) class is (psb_z_vect_oacc) if ((beta /= zzero) .and. yy%is_host()) call yy%sync() if ((delta /= zzero) .and. zz%is_host()) call zz%sync() @@ -405,7 +405,7 @@ contains return end if - select type(ii => idx) + select type(ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() @@ -432,7 +432,7 @@ contains class(psb_z_vect_oacc) :: y integer(psb_ipk_) :: info, ni - select type(ii => idx) + select type(ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() class default @@ -485,7 +485,7 @@ contains return end if - select type(ii => idx) + select type(ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() class default @@ -512,7 +512,7 @@ contains info = 0 - select type(ii => idx) + select type(ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() class default @@ -544,9 +544,9 @@ contains if (psb_errstatus_fatal()) return done_oacc = .false. - select type(virl => irl) + select type(virl => irl) type is (psb_i_vect_oacc) - select type(vval => val) + select type(vval => val) type is (psb_z_vect_oacc) if (vval%is_host()) call vval%sync() if (virl%is_host()) call virl%sync() @@ -561,11 +561,11 @@ contains end select if (.not.done_oacc) then - select type(virl => irl) + select type(virl => irl) type is (psb_i_vect_oacc) if (virl%is_dev()) call virl%sync() end select - select type(vval => val) + select type(vval => val) type is (psb_z_vect_oacc) if (vval%is_dev()) call vval%sync() end select @@ -611,7 +611,7 @@ contains call psb_errpush(info, 'z_oacc_bld_mn', i_err=(/n, n, n, n, n/)) end if call x%set_host() - call x%sync_space() + call x%sync_dev_space() end subroutine z_oacc_bld_mn @@ -633,7 +633,7 @@ contains end if x%v(:) = this(:) call x%set_host() - call x%sync_space() + call x%sync_dev_space() end subroutine z_oacc_bld_x @@ -719,7 +719,7 @@ contains res = zzero !write(0,*) 'dot_v' - select type(yy => y) + select type(yy => y) type is (psb_z_base_vect_type) if (x%is_dev()) call x%sync() res = ddot(n, x%v, 1, yy%v, 1) @@ -762,11 +762,11 @@ contains end function z_oacc_dot_a - subroutine z_oacc_sync_space(x) + subroutine z_oacc_sync_dev_space(x) implicit none class(psb_z_vect_oacc), intent(inout) :: x if (allocated(x%v)) call acc_create(x%v) - end subroutine z_oacc_sync_space + end subroutine z_oacc_sync_dev_space subroutine z_oacc_sync(x) implicit none @@ -840,7 +840,7 @@ contains i_err=(/n, n, n, n, n/)) end if call x%set_host() - call x%sync_space() + call x%sync_dev_space() end subroutine z_oacc_vect_all subroutine z_oacc_final_vect_free(x) From f783478df3a7d5398515cc30c52bef42743f9296 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Fri, 30 Aug 2024 16:03:04 +0200 Subject: [PATCH 35/39] Merge updates from V4 --- openacc/impl/Makefile | 136 +++++++++++++++++++----- openacc/impl/psb_c_oacc_csr_vect_mv.F90 | 37 +++++-- openacc/impl/psb_c_oacc_ell_vect_mv.F90 | 41 +++++-- openacc/impl/psb_c_oacc_hll_vect_mv.F90 | 37 +++++-- openacc/impl/psb_c_oacc_mlt_v_2.f90 | 37 ++----- openacc/impl/psb_d_oacc_csr_vect_mv.F90 | 37 +++++-- openacc/impl/psb_d_oacc_ell_vect_mv.F90 | 41 +++++-- openacc/impl/psb_d_oacc_hll_vect_mv.F90 | 37 +++++-- openacc/impl/psb_d_oacc_mlt_v_2.f90 | 37 ++----- openacc/impl/psb_s_oacc_csr_vect_mv.F90 | 37 +++++-- openacc/impl/psb_s_oacc_ell_vect_mv.F90 | 41 +++++-- openacc/impl/psb_s_oacc_hll_vect_mv.F90 | 37 +++++-- openacc/impl/psb_s_oacc_mlt_v_2.f90 | 37 ++----- openacc/impl/psb_z_oacc_csr_vect_mv.F90 | 37 +++++-- openacc/impl/psb_z_oacc_ell_vect_mv.F90 | 41 +++++-- openacc/impl/psb_z_oacc_hll_vect_mv.F90 | 37 +++++-- openacc/impl/psb_z_oacc_mlt_v_2.f90 | 37 ++----- openacc/psb_c_oacc_csr_mat_mod.F90 | 23 ---- openacc/psb_c_oacc_ell_mat_mod.F90 | 22 ---- openacc/psb_c_oacc_hll_mat_mod.F90 | 22 ---- openacc/psb_d_oacc_csr_mat_mod.F90 | 23 ---- openacc/psb_d_oacc_ell_mat_mod.F90 | 22 ---- openacc/psb_d_oacc_hll_mat_mod.F90 | 22 ---- openacc/psb_s_oacc_csr_mat_mod.F90 | 23 ---- openacc/psb_s_oacc_ell_mat_mod.F90 | 22 ---- openacc/psb_s_oacc_hll_mat_mod.F90 | 22 ---- openacc/psb_z_oacc_csr_mat_mod.F90 | 23 ---- openacc/psb_z_oacc_ell_mat_mod.F90 | 22 ---- openacc/psb_z_oacc_hll_mat_mod.F90 | 22 ---- 29 files changed, 512 insertions(+), 500 deletions(-) diff --git a/openacc/impl/Makefile b/openacc/impl/Makefile index 244f23b1..e460598b 100755 --- a/openacc/impl/Makefile +++ b/openacc/impl/Makefile @@ -13,8 +13,6 @@ LIBNAME=libpsb_openacc.a OBJS= \ psb_s_oacc_csr_vect_mv.o \ psb_s_oacc_csr_inner_vect_sv.o \ -psb_s_oacc_csr_csmm.o \ -psb_s_oacc_csr_csmv.o \ psb_s_oacc_csr_scals.o \ psb_s_oacc_csr_scal.o \ psb_s_oacc_csr_allocate_mnnz.o \ @@ -28,8 +26,6 @@ psb_s_oacc_mlt_v_2.o \ psb_s_oacc_mlt_v.o \ psb_d_oacc_csr_vect_mv.o \ psb_d_oacc_csr_inner_vect_sv.o \ -psb_d_oacc_csr_csmm.o \ -psb_d_oacc_csr_csmv.o \ psb_d_oacc_csr_scals.o \ psb_d_oacc_csr_scal.o \ psb_d_oacc_csr_allocate_mnnz.o \ @@ -43,8 +39,6 @@ psb_d_oacc_mlt_v_2.o \ psb_d_oacc_mlt_v.o \ psb_c_oacc_csr_vect_mv.o \ psb_c_oacc_csr_inner_vect_sv.o \ -psb_c_oacc_csr_csmm.o \ -psb_c_oacc_csr_csmv.o \ psb_c_oacc_csr_scals.o \ psb_c_oacc_csr_scal.o \ psb_c_oacc_csr_allocate_mnnz.o \ @@ -58,8 +52,6 @@ psb_c_oacc_mlt_v_2.o \ psb_c_oacc_mlt_v.o \ psb_z_oacc_csr_vect_mv.o \ psb_z_oacc_csr_inner_vect_sv.o \ -psb_z_oacc_csr_csmm.o \ -psb_z_oacc_csr_csmv.o \ psb_z_oacc_csr_scals.o \ psb_z_oacc_csr_scal.o \ psb_z_oacc_csr_allocate_mnnz.o \ @@ -73,8 +65,6 @@ psb_z_oacc_mlt_v_2.o \ psb_z_oacc_mlt_v.o \ psb_s_oacc_ell_vect_mv.o \ psb_s_oacc_ell_inner_vect_sv.o \ -psb_s_oacc_ell_csmm.o \ -psb_s_oacc_ell_csmv.o \ psb_s_oacc_ell_scals.o \ psb_s_oacc_ell_scal.o \ psb_s_oacc_ell_reallocate_nz.o \ @@ -93,14 +83,10 @@ psb_s_oacc_hll_allocate_mnnz.o \ psb_s_oacc_hll_reallocate_nz.o \ psb_s_oacc_hll_scal.o \ psb_s_oacc_hll_scals.o \ -psb_s_oacc_hll_csmv.o \ -psb_s_oacc_hll_csmm.o \ psb_s_oacc_hll_inner_vect_sv.o \ psb_s_oacc_hll_vect_mv.o \ psb_d_oacc_ell_vect_mv.o \ psb_d_oacc_ell_inner_vect_sv.o \ -psb_d_oacc_ell_csmm.o \ -psb_d_oacc_ell_csmv.o \ psb_d_oacc_ell_scals.o \ psb_d_oacc_ell_scal.o \ psb_d_oacc_ell_reallocate_nz.o \ @@ -119,14 +105,10 @@ psb_d_oacc_hll_allocate_mnnz.o \ psb_d_oacc_hll_reallocate_nz.o \ psb_d_oacc_hll_scal.o \ psb_d_oacc_hll_scals.o \ -psb_d_oacc_hll_csmv.o \ -psb_d_oacc_hll_csmm.o \ psb_d_oacc_hll_inner_vect_sv.o \ psb_d_oacc_hll_vect_mv.o \ psb_c_oacc_ell_vect_mv.o \ psb_c_oacc_ell_inner_vect_sv.o \ -psb_c_oacc_ell_csmm.o \ -psb_c_oacc_ell_csmv.o \ psb_c_oacc_ell_scals.o \ psb_c_oacc_ell_scal.o \ psb_c_oacc_ell_reallocate_nz.o \ @@ -145,14 +127,10 @@ psb_c_oacc_hll_allocate_mnnz.o \ psb_c_oacc_hll_reallocate_nz.o \ psb_c_oacc_hll_scal.o \ psb_c_oacc_hll_scals.o \ -psb_c_oacc_hll_csmv.o \ -psb_c_oacc_hll_csmm.o \ psb_c_oacc_hll_inner_vect_sv.o \ psb_c_oacc_hll_vect_mv.o \ psb_z_oacc_ell_vect_mv.o \ psb_z_oacc_ell_inner_vect_sv.o \ -psb_z_oacc_ell_csmm.o \ -psb_z_oacc_ell_csmv.o \ psb_z_oacc_ell_scals.o \ psb_z_oacc_ell_scal.o \ psb_z_oacc_ell_reallocate_nz.o \ @@ -171,25 +149,127 @@ psb_z_oacc_hll_allocate_mnnz.o \ psb_z_oacc_hll_reallocate_nz.o \ psb_z_oacc_hll_scal.o \ psb_z_oacc_hll_scals.o \ -psb_z_oacc_hll_csmv.o \ -psb_z_oacc_hll_csmm.o \ +psb_z_oacc_hll_inner_vect_sv.o \ +psb_z_oacc_hll_vect_mv.o \ +psb_z_oacc_ell_vect_mv.o \ +psb_z_oacc_ell_inner_vect_sv.o \ +psb_z_oacc_ell_scals.o \ +psb_z_oacc_ell_scal.o \ +psb_z_oacc_ell_reallocate_nz.o \ +psb_z_oacc_ell_allocate_mnnz.o \ +psb_z_oacc_ell_cp_from_coo.o \ +psb_z_oacc_ell_cp_from_fmt.o \ +psb_z_oacc_ell_mv_from_coo.o \ +psb_z_oacc_ell_mv_from_fmt.o \ +psb_z_oacc_ell_mold.o \ +psb_z_oacc_hll_mold.o \ +psb_z_oacc_hll_mv_from_fmt.o \ +psb_z_oacc_hll_mv_from_coo.o \ +psb_z_oacc_hll_cp_from_fmt.o \ +psb_z_oacc_hll_cp_from_coo.o \ +psb_z_oacc_hll_allocate_mnnz.o \ +psb_z_oacc_hll_reallocate_nz.o \ +psb_z_oacc_hll_scal.o \ +psb_z_oacc_hll_scals.o \ psb_z_oacc_hll_inner_vect_sv.o \ psb_z_oacc_hll_vect_mv.o + objs: $(OBJS) lib: objs ar cur ../$(LIBNAME) $(OBJS) +psb_s_oacc_csr_vect_mv.o psb_s_oacc_csr_inner_vect_sv.o \ + psb_s_oacc_csr_scals.o \ + psb_s_oacc_csr_scal.o psb_s_oacc_csr_allocate_mnnz.o \ + psb_s_oacc_csr_reallocate_nz.o psb_s_oacc_csr_cp_from_coo.o \ + psb_s_oacc_csr_cp_from_fmt.o psb_s_oacc_csr_mv_from_coo.o \ + psb_s_oacc_csr_mv_from_fmt.o psb_s_oacc_csr_mold.o: $(UP)/psb_s_oacc_csr_mat_mod.o $(UP)/psb_s_oacc_vect_mod.o -#psb_d_oacc_csr_vect_mv.o psb_d_oacc_csr_inner_vect_sv.o \ - psb_d_oacc_csr_csmm.o psb_d_oacc_csr_csmv.o psb_d_oacc_csr_scals.o \ +psb_s_oacc_ell_vect_mv.o psb_s_oacc_ell_inner_vect_sv.o \ + psb_s_oacc_ell_scals.o \ + psb_s_oacc_ell_scal.o psb_s_oacc_ell_allocate_mnnz.o \ + psb_s_oacc_ell_reallocate_nz.o psb_s_oacc_ell_cp_from_coo.o \ + psb_s_oacc_ell_cp_from_fmt.o psb_s_oacc_ell_mv_from_coo.o \ + psb_s_oacc_ell_mv_from_fmt.o psb_s_oacc_ell_mold.o: $(UP)/psb_s_oacc_ell_mat_mod.o $(UP)/psb_s_oacc_vect_mod.o + +psb_s_oacc_hll_vect_mv.o psb_s_oacc_hll_inner_vect_sv.o \ + psb_s_oacc_hll_scals.o \ + psb_s_oacc_hll_scal.o psb_s_oacc_hll_allocate_mnnz.o \ + psb_s_oacc_hll_reallocate_nz.o psb_s_oacc_hll_cp_from_coo.o \ + psb_s_oacc_hll_cp_from_fmt.o psb_s_oacc_hll_mv_from_coo.o \ + psb_s_oacc_hll_mv_from_fmt.o psb_s_oacc_hll_mold.o: $(UP)/psb_s_oacc_hll_mat_mod.o $(UP)/psb_s_oacc_vect_mod.o + + +psb_d_oacc_csr_vect_mv.o psb_d_oacc_csr_inner_vect_sv.o \ + psb_d_oacc_csr_scals.o \ psb_d_oacc_csr_scal.o psb_d_oacc_csr_allocate_mnnz.o \ psb_d_oacc_csr_reallocate_nz.o psb_d_oacc_csr_cp_from_coo.o \ psb_d_oacc_csr_cp_from_fmt.o psb_d_oacc_csr_mv_from_coo.o \ - psb_d_oacc_csr_mv_from_fmt.o psb_d_oacc_csr_mold.o: $(UP)/psb_d_oacc_csr_mat_mod.o + psb_d_oacc_csr_mv_from_fmt.o psb_d_oacc_csr_mold.o: $(UP)/psb_d_oacc_csr_mat_mod.o $(UP)/psb_d_oacc_vect_mod.o + +psb_d_oacc_ell_vect_mv.o psb_d_oacc_ell_inner_vect_sv.o \ + psb_d_oacc_ell_scals.o \ + psb_d_oacc_ell_scal.o psb_d_oacc_ell_allocate_mnnz.o \ + psb_d_oacc_ell_reallocate_nz.o psb_d_oacc_ell_cp_from_coo.o \ + psb_d_oacc_ell_cp_from_fmt.o psb_d_oacc_ell_mv_from_coo.o \ + psb_d_oacc_ell_mv_from_fmt.o psb_d_oacc_ell_mold.o: $(UP)/psb_d_oacc_ell_mat_mod.o $(UP)/psb_d_oacc_vect_mod.o + +psb_d_oacc_hll_vect_mv.o psb_d_oacc_hll_inner_vect_sv.o \ + psb_d_oacc_hll_scals.o \ + psb_d_oacc_hll_scal.o psb_d_oacc_hll_allocate_mnnz.o \ + psb_d_oacc_hll_reallocate_nz.o psb_d_oacc_hll_cp_from_coo.o \ + psb_d_oacc_hll_cp_from_fmt.o psb_d_oacc_hll_mv_from_coo.o \ + psb_d_oacc_hll_mv_from_fmt.o psb_d_oacc_hll_mold.o: $(UP)/psb_d_oacc_hll_mat_mod.o $(UP)/psb_d_oacc_vect_mod.o + +psb_c_oacc_csr_vect_mv.o psb_c_oacc_csr_inner_vect_sv.o \ + psb_c_oacc_csr_scals.o \ + psb_c_oacc_csr_scal.o psb_c_oacc_csr_allocate_mnnz.o \ + psb_c_oacc_csr_reallocate_nz.o psb_c_oacc_csr_cp_from_coo.o \ + psb_c_oacc_csr_cp_from_fmt.o psb_c_oacc_csr_mv_from_coo.o \ + psb_c_oacc_csr_mv_from_fmt.o psb_c_oacc_csr_mold.o: $(UP)/psb_c_oacc_csr_mat_mod.o $(UP)/psb_c_oacc_vect_mod.o + +psb_c_oacc_ell_vect_mv.o psb_c_oacc_ell_inner_vect_sv.o \ + psb_c_oacc_ell_scals.o \ + psb_c_oacc_ell_scal.o psb_c_oacc_ell_allocate_mnnz.o \ + psb_c_oacc_ell_reallocate_nz.o psb_c_oacc_ell_cp_from_coo.o \ + psb_c_oacc_ell_cp_from_fmt.o psb_c_oacc_ell_mv_from_coo.o \ + psb_c_oacc_ell_mv_from_fmt.o psb_c_oacc_ell_mold.o: $(UP)/psb_c_oacc_ell_mat_mod.o $(UP)/psb_c_oacc_vect_mod.o + +psb_c_oacc_hll_vect_mv.o psb_c_oacc_hll_inner_vect_sv.o \ + psb_c_oacc_hll_scals.o \ + psb_c_oacc_hll_scal.o psb_c_oacc_hll_allocate_mnnz.o \ + psb_c_oacc_hll_reallocate_nz.o psb_c_oacc_hll_cp_from_coo.o \ + psb_c_oacc_hll_cp_from_fmt.o psb_c_oacc_hll_mv_from_coo.o \ + psb_c_oacc_hll_mv_from_fmt.o psb_c_oacc_hll_mold.o: $(UP)/psb_c_oacc_hll_mat_mod.o $(UP)/psb_c_oacc_vect_mod.o + + +psb_z_oacc_csr_vect_mv.o psb_z_oacc_csr_inner_vect_sv.o \ + psb_z_oacc_csr_scals.o \ + psb_z_oacc_csr_scal.o psb_z_oacc_csr_allocate_mnnz.o \ + psb_z_oacc_csr_reallocate_nz.o psb_z_oacc_csr_cp_from_coo.o \ + psb_z_oacc_csr_cp_from_fmt.o psb_z_oacc_csr_mv_from_coo.o \ + psb_z_oacc_csr_mv_from_fmt.o psb_z_oacc_csr_mold.o: $(UP)/psb_z_oacc_csr_mat_mod.o $(UP)/psb_z_oacc_vect_mod.o + +psb_z_oacc_ell_vect_mv.o psb_z_oacc_ell_inner_vect_sv.o \ + psb_z_oacc_ell_scals.o \ + psb_z_oacc_ell_scal.o psb_z_oacc_ell_allocate_mnnz.o \ + psb_z_oacc_ell_reallocate_nz.o psb_z_oacc_ell_cp_from_coo.o \ + psb_z_oacc_ell_cp_from_fmt.o psb_z_oacc_ell_mv_from_coo.o \ + psb_z_oacc_ell_mv_from_fmt.o psb_z_oacc_ell_mold.o: $(UP)/psb_z_oacc_ell_mat_mod.o $(UP)/psb_z_oacc_vect_mod.o + +psb_z_oacc_hll_vect_mv.o psb_z_oacc_hll_inner_vect_sv.o \ + psb_z_oacc_hll_scals.o \ + psb_z_oacc_hll_scal.o psb_z_oacc_hll_allocate_mnnz.o \ + psb_z_oacc_hll_reallocate_nz.o psb_z_oacc_hll_cp_from_coo.o \ + psb_z_oacc_hll_cp_from_fmt.o psb_z_oacc_hll_mv_from_coo.o \ + psb_z_oacc_hll_mv_from_fmt.o psb_z_oacc_hll_mold.o: $(UP)/psb_z_oacc_hll_mat_mod.o $(UP)/psb_z_oacc_vect_mod.o -#psb_d_oacc_mlt_v_2.o psb_d_oacc_mlt_v.o: $(UP)/psb_d_oacc_vect_mod.o +psb_s_oacc_mlt_v_2.o psb_s_oacc_mlt_v.o: $(UP)/psb_s_oacc_vect_mod.o +psb_d_oacc_mlt_v_2.o psb_d_oacc_mlt_v.o: $(UP)/psb_d_oacc_vect_mod.o +psb_c_oacc_mlt_v_2.o psb_c_oacc_mlt_v.o: $(UP)/psb_c_oacc_vect_mod.o +psb_z_oacc_mlt_v_2.o psb_z_oacc_mlt_v.o: $(UP)/psb_z_oacc_vect_mod.o clean: diff --git a/openacc/impl/psb_c_oacc_csr_vect_mv.F90 b/openacc/impl/psb_c_oacc_csr_vect_mv.F90 index db56d9fc..3c6f6494 100644 --- a/openacc/impl/psb_c_oacc_csr_vect_mv.F90 +++ b/openacc/impl/psb_c_oacc_csr_vect_mv.F90 @@ -11,6 +11,8 @@ contains character, optional, intent(in) :: trans integer(psb_ipk_) :: m, n + character :: trans_ + logical :: device_done, tra info = psb_success_ m = a%get_nrows() @@ -21,14 +23,35 @@ contains info = psb_err_invalid_mat_state_ return end if + device_done = .false. + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') - if (a%is_host()) call a%sync() - if (x%is_host()) call x%sync() - if (y%is_host()) call y%sync() - - call inner_spmv(m, n, alpha, a%val, a%ja, a%irp, x%v, beta, y%v, info) - call y%set_dev() - + if (.not.tra) then + select type(xx => x) + class is (psb_c_vect_oacc) + select type (yy => y) + class is (psb_c_vect_oacc) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + call inner_spmv(m, n, alpha, a%val, a%ja, a%irp, x%v, beta, y%v, info) + call y%set_dev() + device_done = .true. + end select + end select + end if + + if (.not.device_done) then + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call a%psb_c_csr_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans) + call y%set_host() + end if contains subroutine inner_spmv(m, n, alpha, val, ja, irp, x, beta, y, info) diff --git a/openacc/impl/psb_c_oacc_ell_vect_mv.F90 b/openacc/impl/psb_c_oacc_ell_vect_mv.F90 index f3e78d98..8113297b 100644 --- a/openacc/impl/psb_c_oacc_ell_vect_mv.F90 +++ b/openacc/impl/psb_c_oacc_ell_vect_mv.F90 @@ -11,6 +11,8 @@ contains character, optional, intent(in) :: trans integer(psb_ipk_) :: m, n, nzt, nc + character :: trans_ + logical :: device_done, tra info = psb_success_ m = a%get_nrows() @@ -18,19 +20,40 @@ contains nzt = a%nzt nc = size(a%ja,2) if ((n /= size(x%v)) .or. (m /= size(y%v))) then - write(0,*) 'Size error ', m, n, size(x%v), size(y%v) + write(0,*) 'oellmv Size error ', m, n, size(x%v), size(y%v) info = psb_err_invalid_mat_state_ return end if + device_done = .false. + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') - if (a%is_host()) call a%sync() - if (x%is_host()) call x%sync() - if (y%is_host()) call y%sync() - - call inner_spmv(m, n, nc, alpha, a%val, a%ja, x%v, beta, y%v, info) - - call y%set_dev() - + if (.not.tra) then + select type(xx => x) + class is (psb_c_vect_oacc) + select type (yy => y) + class is (psb_c_vect_oacc) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + call inner_spmv(m, n, nc, alpha, a%val, a%ja, x%v, beta, y%v, info) + call y%set_dev() + device_done = .true. + end select + end select + end if + + if (.not.device_done) then + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call a%psb_c_ell_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans) + call y%set_host() + end if + contains subroutine inner_spmv(m, n, nc, alpha, val, ja, x, beta, y, info) diff --git a/openacc/impl/psb_c_oacc_hll_vect_mv.F90 b/openacc/impl/psb_c_oacc_hll_vect_mv.F90 index 68141e42..551b1a29 100644 --- a/openacc/impl/psb_c_oacc_hll_vect_mv.F90 +++ b/openacc/impl/psb_c_oacc_hll_vect_mv.F90 @@ -11,6 +11,8 @@ contains character, optional, intent(in) :: trans integer(psb_ipk_) :: m, n, nhacks, hksz + character :: trans_ + logical :: device_done, tra info = psb_success_ m = a%get_nrows() @@ -23,14 +25,35 @@ contains info = psb_err_invalid_mat_state_ return end if + device_done = .false. + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') - if (a%is_host()) call a%sync() - if (x%is_host()) call x%sync() - if (y%is_host()) call y%sync() - - call inner_spmv(m, nhacks, hksz, alpha, a%val, a%ja, a%hkoffs, x%v, beta, y%v, info) - call y%set_dev() - + if (.not.tra) then + select type(xx => x) + class is (psb_c_vect_oacc) + select type (yy => y) + class is (psb_c_vect_oacc) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + call inner_spmv(m, nhacks, hksz, alpha, a%val, a%ja, a%hkoffs, x%v, beta, y%v, info) + call y%set_dev() + device_done = .true. + end select + end select + end if + + if (.not.device_done) then + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call a%psb_c_hll_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans) + call y%set_host() + end if contains subroutine inner_spmv(m, nhacks, hksz, alpha, val, ja, hkoffs, x, beta, y, info) diff --git a/openacc/impl/psb_c_oacc_mlt_v_2.f90 b/openacc/impl/psb_c_oacc_mlt_v_2.f90 index b47cd1ae..ed0fc88e 100644 --- a/openacc/impl/psb_c_oacc_mlt_v_2.f90 +++ b/openacc/impl/psb_c_oacc_mlt_v_2.f90 @@ -9,10 +9,11 @@ subroutine psb_c_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) integer(psb_ipk_), intent(out) :: info character(len=1), intent(in), optional :: conjgx, conjgy integer(psb_ipk_) :: i, n - logical :: conjgx_, conjgy_ + logical :: conjgx_, conjgy_, device_done conjgx_ = .false. conjgy_ = .false. + device_done = .false. if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C') if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C') @@ -27,31 +28,10 @@ subroutine psb_c_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) if ((beta /= czero) .and. (z%is_host())) call z%sync() call c_inner_oacc_mlt_v_2(n,alpha, xx%v, yy%v, beta, z%v, info, conjgx_, conjgy_) call z%set_dev() - class default - if (xx%is_dev()) call xx%sync() - if (yy%is_dev()) call yy%sync() - if ((beta /= czero) .and. (z%is_dev())) call z%sync() - !call c_inner_oacc_mlt_v_2(n,alpha, xx%v, yy%v, beta, z%v, info, conjgx_, conjgy_) - if (conjgx_.and.conjgy_) then - do i = 1, n - z%v(i) = alpha * conjg(xx%v(i)) * conjg(yy%v(i)) + beta * z%v(i) - end do - else if (conjgx_.and.(.not.conjgy_)) then - do i = 1, n - z%v(i) = alpha * conjg(xx%v(i)) * (yy%v(i)) + beta * z%v(i) - end do - else if ((.not.conjgx_).and.(conjgy_)) then - do i = 1, n - z%v(i) = alpha * (xx%v(i)) * conjg(yy%v(i)) + beta * z%v(i) - end do - else - do i = 1, n - z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) - end do - end if - call z%set_host() + device_done = .true. end select - class default + end select + if (.not.device_done) then if (x%is_dev()) call x%sync() if (y%is_dev()) call y%sync() if ((beta /= czero) .and. (z%is_dev())) call z%sync() @@ -73,13 +53,14 @@ subroutine psb_c_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) end do end if call z%set_host() - end select + end if + contains subroutine c_inner_oacc_mlt_v_2(n,alpha, x, y, beta, z, info, conjgx, conjgy) implicit none integer(psb_ipk_), intent(in) :: n -complex(psb_spk_), intent(in) :: alpha, beta -complex(psb_spk_), intent(inout) :: x(:), y(:), z(:) + complex(psb_spk_), intent(in) :: alpha, beta + complex(psb_spk_), intent(inout) :: x(:), y(:), z(:) integer(psb_ipk_), intent(out) :: info logical, intent(in) :: conjgx, conjgy diff --git a/openacc/impl/psb_d_oacc_csr_vect_mv.F90 b/openacc/impl/psb_d_oacc_csr_vect_mv.F90 index 0001cc76..596f2b17 100644 --- a/openacc/impl/psb_d_oacc_csr_vect_mv.F90 +++ b/openacc/impl/psb_d_oacc_csr_vect_mv.F90 @@ -11,6 +11,8 @@ contains character, optional, intent(in) :: trans integer(psb_ipk_) :: m, n + character :: trans_ + logical :: device_done, tra info = psb_success_ m = a%get_nrows() @@ -21,14 +23,35 @@ contains info = psb_err_invalid_mat_state_ return end if + device_done = .false. + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') - if (a%is_host()) call a%sync() - if (x%is_host()) call x%sync() - if (y%is_host()) call y%sync() - - call inner_spmv(m, n, alpha, a%val, a%ja, a%irp, x%v, beta, y%v, info) - call y%set_dev() - + if (.not.tra) then + select type(xx => x) + class is (psb_d_vect_oacc) + select type (yy => y) + class is (psb_d_vect_oacc) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + call inner_spmv(m, n, alpha, a%val, a%ja, a%irp, x%v, beta, y%v, info) + call y%set_dev() + device_done = .true. + end select + end select + end if + + if (.not.device_done) then + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call a%psb_d_csr_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans) + call y%set_host() + end if contains subroutine inner_spmv(m, n, alpha, val, ja, irp, x, beta, y, info) diff --git a/openacc/impl/psb_d_oacc_ell_vect_mv.F90 b/openacc/impl/psb_d_oacc_ell_vect_mv.F90 index 4239d049..ddd4bfc8 100644 --- a/openacc/impl/psb_d_oacc_ell_vect_mv.F90 +++ b/openacc/impl/psb_d_oacc_ell_vect_mv.F90 @@ -11,6 +11,8 @@ contains character, optional, intent(in) :: trans integer(psb_ipk_) :: m, n, nzt, nc + character :: trans_ + logical :: device_done, tra info = psb_success_ m = a%get_nrows() @@ -18,19 +20,40 @@ contains nzt = a%nzt nc = size(a%ja,2) if ((n /= size(x%v)) .or. (m /= size(y%v))) then - write(0,*) 'Size error ', m, n, size(x%v), size(y%v) + write(0,*) 'oellmv Size error ', m, n, size(x%v), size(y%v) info = psb_err_invalid_mat_state_ return end if + device_done = .false. + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') - if (a%is_host()) call a%sync() - if (x%is_host()) call x%sync() - if (y%is_host()) call y%sync() - - call inner_spmv(m, n, nc, alpha, a%val, a%ja, x%v, beta, y%v, info) - - call y%set_dev() - + if (.not.tra) then + select type(xx => x) + class is (psb_d_vect_oacc) + select type (yy => y) + class is (psb_d_vect_oacc) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + call inner_spmv(m, n, nc, alpha, a%val, a%ja, x%v, beta, y%v, info) + call y%set_dev() + device_done = .true. + end select + end select + end if + + if (.not.device_done) then + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call a%psb_d_ell_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans) + call y%set_host() + end if + contains subroutine inner_spmv(m, n, nc, alpha, val, ja, x, beta, y, info) diff --git a/openacc/impl/psb_d_oacc_hll_vect_mv.F90 b/openacc/impl/psb_d_oacc_hll_vect_mv.F90 index e7c47b7a..f971d61a 100644 --- a/openacc/impl/psb_d_oacc_hll_vect_mv.F90 +++ b/openacc/impl/psb_d_oacc_hll_vect_mv.F90 @@ -11,6 +11,8 @@ contains character, optional, intent(in) :: trans integer(psb_ipk_) :: m, n, nhacks, hksz + character :: trans_ + logical :: device_done, tra info = psb_success_ m = a%get_nrows() @@ -23,14 +25,35 @@ contains info = psb_err_invalid_mat_state_ return end if + device_done = .false. + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') - if (a%is_host()) call a%sync() - if (x%is_host()) call x%sync() - if (y%is_host()) call y%sync() - - call inner_spmv(m, nhacks, hksz, alpha, a%val, a%ja, a%hkoffs, x%v, beta, y%v, info) - call y%set_dev() - + if (.not.tra) then + select type(xx => x) + class is (psb_d_vect_oacc) + select type (yy => y) + class is (psb_d_vect_oacc) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + call inner_spmv(m, nhacks, hksz, alpha, a%val, a%ja, a%hkoffs, x%v, beta, y%v, info) + call y%set_dev() + device_done = .true. + end select + end select + end if + + if (.not.device_done) then + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call a%psb_d_hll_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans) + call y%set_host() + end if contains subroutine inner_spmv(m, nhacks, hksz, alpha, val, ja, hkoffs, x, beta, y, info) diff --git a/openacc/impl/psb_d_oacc_mlt_v_2.f90 b/openacc/impl/psb_d_oacc_mlt_v_2.f90 index ce460924..7850329a 100644 --- a/openacc/impl/psb_d_oacc_mlt_v_2.f90 +++ b/openacc/impl/psb_d_oacc_mlt_v_2.f90 @@ -9,10 +9,11 @@ subroutine psb_d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) integer(psb_ipk_), intent(out) :: info character(len=1), intent(in), optional :: conjgx, conjgy integer(psb_ipk_) :: i, n - logical :: conjgx_, conjgy_ + logical :: conjgx_, conjgy_, device_done conjgx_ = .false. conjgy_ = .false. + device_done = .false. if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C') if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C') @@ -27,31 +28,10 @@ subroutine psb_d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) if ((beta /= dzero) .and. (z%is_host())) call z%sync() call d_inner_oacc_mlt_v_2(n,alpha, xx%v, yy%v, beta, z%v, info, conjgx_, conjgy_) call z%set_dev() - class default - if (xx%is_dev()) call xx%sync() - if (yy%is_dev()) call yy%sync() - if ((beta /= dzero) .and. (z%is_dev())) call z%sync() - !call d_inner_oacc_mlt_v_2(n,alpha, xx%v, yy%v, beta, z%v, info, conjgx_, conjgy_) - if (conjgx_.and.conjgy_) then - do i = 1, n - z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) - end do - else if (conjgx_.and.(.not.conjgy_)) then - do i = 1, n - z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) - end do - else if ((.not.conjgx_).and.(conjgy_)) then - do i = 1, n - z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) - end do - else - do i = 1, n - z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) - end do - end if - call z%set_host() + device_done = .true. end select - class default + end select + if (.not.device_done) then if (x%is_dev()) call x%sync() if (y%is_dev()) call y%sync() if ((beta /= dzero) .and. (z%is_dev())) call z%sync() @@ -73,13 +53,14 @@ subroutine psb_d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) end do end if call z%set_host() - end select + end if + contains subroutine d_inner_oacc_mlt_v_2(n,alpha, x, y, beta, z, info, conjgx, conjgy) implicit none integer(psb_ipk_), intent(in) :: n -real(psb_dpk_), intent(in) :: alpha, beta -real(psb_dpk_), intent(inout) :: x(:), y(:), z(:) + real(psb_dpk_), intent(in) :: alpha, beta + real(psb_dpk_), intent(inout) :: x(:), y(:), z(:) integer(psb_ipk_), intent(out) :: info logical, intent(in) :: conjgx, conjgy diff --git a/openacc/impl/psb_s_oacc_csr_vect_mv.F90 b/openacc/impl/psb_s_oacc_csr_vect_mv.F90 index 13ee1651..2799bd05 100644 --- a/openacc/impl/psb_s_oacc_csr_vect_mv.F90 +++ b/openacc/impl/psb_s_oacc_csr_vect_mv.F90 @@ -11,6 +11,8 @@ contains character, optional, intent(in) :: trans integer(psb_ipk_) :: m, n + character :: trans_ + logical :: device_done, tra info = psb_success_ m = a%get_nrows() @@ -21,14 +23,35 @@ contains info = psb_err_invalid_mat_state_ return end if + device_done = .false. + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') - if (a%is_host()) call a%sync() - if (x%is_host()) call x%sync() - if (y%is_host()) call y%sync() - - call inner_spmv(m, n, alpha, a%val, a%ja, a%irp, x%v, beta, y%v, info) - call y%set_dev() - + if (.not.tra) then + select type(xx => x) + class is (psb_s_vect_oacc) + select type (yy => y) + class is (psb_s_vect_oacc) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + call inner_spmv(m, n, alpha, a%val, a%ja, a%irp, x%v, beta, y%v, info) + call y%set_dev() + device_done = .true. + end select + end select + end if + + if (.not.device_done) then + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call a%psb_s_csr_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans) + call y%set_host() + end if contains subroutine inner_spmv(m, n, alpha, val, ja, irp, x, beta, y, info) diff --git a/openacc/impl/psb_s_oacc_ell_vect_mv.F90 b/openacc/impl/psb_s_oacc_ell_vect_mv.F90 index bbbdd6a3..81166643 100644 --- a/openacc/impl/psb_s_oacc_ell_vect_mv.F90 +++ b/openacc/impl/psb_s_oacc_ell_vect_mv.F90 @@ -11,6 +11,8 @@ contains character, optional, intent(in) :: trans integer(psb_ipk_) :: m, n, nzt, nc + character :: trans_ + logical :: device_done, tra info = psb_success_ m = a%get_nrows() @@ -18,19 +20,40 @@ contains nzt = a%nzt nc = size(a%ja,2) if ((n /= size(x%v)) .or. (m /= size(y%v))) then - write(0,*) 'Size error ', m, n, size(x%v), size(y%v) + write(0,*) 'oellmv Size error ', m, n, size(x%v), size(y%v) info = psb_err_invalid_mat_state_ return end if + device_done = .false. + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') - if (a%is_host()) call a%sync() - if (x%is_host()) call x%sync() - if (y%is_host()) call y%sync() - - call inner_spmv(m, n, nc, alpha, a%val, a%ja, x%v, beta, y%v, info) - - call y%set_dev() - + if (.not.tra) then + select type(xx => x) + class is (psb_s_vect_oacc) + select type (yy => y) + class is (psb_s_vect_oacc) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + call inner_spmv(m, n, nc, alpha, a%val, a%ja, x%v, beta, y%v, info) + call y%set_dev() + device_done = .true. + end select + end select + end if + + if (.not.device_done) then + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call a%psb_s_ell_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans) + call y%set_host() + end if + contains subroutine inner_spmv(m, n, nc, alpha, val, ja, x, beta, y, info) diff --git a/openacc/impl/psb_s_oacc_hll_vect_mv.F90 b/openacc/impl/psb_s_oacc_hll_vect_mv.F90 index efe9a9ca..e289f07c 100644 --- a/openacc/impl/psb_s_oacc_hll_vect_mv.F90 +++ b/openacc/impl/psb_s_oacc_hll_vect_mv.F90 @@ -11,6 +11,8 @@ contains character, optional, intent(in) :: trans integer(psb_ipk_) :: m, n, nhacks, hksz + character :: trans_ + logical :: device_done, tra info = psb_success_ m = a%get_nrows() @@ -23,14 +25,35 @@ contains info = psb_err_invalid_mat_state_ return end if + device_done = .false. + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') - if (a%is_host()) call a%sync() - if (x%is_host()) call x%sync() - if (y%is_host()) call y%sync() - - call inner_spmv(m, nhacks, hksz, alpha, a%val, a%ja, a%hkoffs, x%v, beta, y%v, info) - call y%set_dev() - + if (.not.tra) then + select type(xx => x) + class is (psb_s_vect_oacc) + select type (yy => y) + class is (psb_s_vect_oacc) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + call inner_spmv(m, nhacks, hksz, alpha, a%val, a%ja, a%hkoffs, x%v, beta, y%v, info) + call y%set_dev() + device_done = .true. + end select + end select + end if + + if (.not.device_done) then + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call a%psb_s_hll_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans) + call y%set_host() + end if contains subroutine inner_spmv(m, nhacks, hksz, alpha, val, ja, hkoffs, x, beta, y, info) diff --git a/openacc/impl/psb_s_oacc_mlt_v_2.f90 b/openacc/impl/psb_s_oacc_mlt_v_2.f90 index 2ce7fe53..b97f2f08 100644 --- a/openacc/impl/psb_s_oacc_mlt_v_2.f90 +++ b/openacc/impl/psb_s_oacc_mlt_v_2.f90 @@ -9,10 +9,11 @@ subroutine psb_s_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) integer(psb_ipk_), intent(out) :: info character(len=1), intent(in), optional :: conjgx, conjgy integer(psb_ipk_) :: i, n - logical :: conjgx_, conjgy_ + logical :: conjgx_, conjgy_, device_done conjgx_ = .false. conjgy_ = .false. + device_done = .false. if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C') if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C') @@ -27,31 +28,10 @@ subroutine psb_s_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) if ((beta /= szero) .and. (z%is_host())) call z%sync() call s_inner_oacc_mlt_v_2(n,alpha, xx%v, yy%v, beta, z%v, info, conjgx_, conjgy_) call z%set_dev() - class default - if (xx%is_dev()) call xx%sync() - if (yy%is_dev()) call yy%sync() - if ((beta /= szero) .and. (z%is_dev())) call z%sync() - !call s_inner_oacc_mlt_v_2(n,alpha, xx%v, yy%v, beta, z%v, info, conjgx_, conjgy_) - if (conjgx_.and.conjgy_) then - do i = 1, n - z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) - end do - else if (conjgx_.and.(.not.conjgy_)) then - do i = 1, n - z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) - end do - else if ((.not.conjgx_).and.(conjgy_)) then - do i = 1, n - z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) - end do - else - do i = 1, n - z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) - end do - end if - call z%set_host() + device_done = .true. end select - class default + end select + if (.not.device_done) then if (x%is_dev()) call x%sync() if (y%is_dev()) call y%sync() if ((beta /= szero) .and. (z%is_dev())) call z%sync() @@ -73,13 +53,14 @@ subroutine psb_s_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) end do end if call z%set_host() - end select + end if + contains subroutine s_inner_oacc_mlt_v_2(n,alpha, x, y, beta, z, info, conjgx, conjgy) implicit none integer(psb_ipk_), intent(in) :: n -real(psb_spk_), intent(in) :: alpha, beta -real(psb_spk_), intent(inout) :: x(:), y(:), z(:) + real(psb_spk_), intent(in) :: alpha, beta + real(psb_spk_), intent(inout) :: x(:), y(:), z(:) integer(psb_ipk_), intent(out) :: info logical, intent(in) :: conjgx, conjgy diff --git a/openacc/impl/psb_z_oacc_csr_vect_mv.F90 b/openacc/impl/psb_z_oacc_csr_vect_mv.F90 index cb34dce1..75cc693b 100644 --- a/openacc/impl/psb_z_oacc_csr_vect_mv.F90 +++ b/openacc/impl/psb_z_oacc_csr_vect_mv.F90 @@ -11,6 +11,8 @@ contains character, optional, intent(in) :: trans integer(psb_ipk_) :: m, n + character :: trans_ + logical :: device_done, tra info = psb_success_ m = a%get_nrows() @@ -21,14 +23,35 @@ contains info = psb_err_invalid_mat_state_ return end if + device_done = .false. + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') - if (a%is_host()) call a%sync() - if (x%is_host()) call x%sync() - if (y%is_host()) call y%sync() - - call inner_spmv(m, n, alpha, a%val, a%ja, a%irp, x%v, beta, y%v, info) - call y%set_dev() - + if (.not.tra) then + select type(xx => x) + class is (psb_z_vect_oacc) + select type (yy => y) + class is (psb_z_vect_oacc) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + call inner_spmv(m, n, alpha, a%val, a%ja, a%irp, x%v, beta, y%v, info) + call y%set_dev() + device_done = .true. + end select + end select + end if + + if (.not.device_done) then + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call a%psb_z_csr_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans) + call y%set_host() + end if contains subroutine inner_spmv(m, n, alpha, val, ja, irp, x, beta, y, info) diff --git a/openacc/impl/psb_z_oacc_ell_vect_mv.F90 b/openacc/impl/psb_z_oacc_ell_vect_mv.F90 index ad8460b8..8d442c1d 100644 --- a/openacc/impl/psb_z_oacc_ell_vect_mv.F90 +++ b/openacc/impl/psb_z_oacc_ell_vect_mv.F90 @@ -11,6 +11,8 @@ contains character, optional, intent(in) :: trans integer(psb_ipk_) :: m, n, nzt, nc + character :: trans_ + logical :: device_done, tra info = psb_success_ m = a%get_nrows() @@ -18,19 +20,40 @@ contains nzt = a%nzt nc = size(a%ja,2) if ((n /= size(x%v)) .or. (m /= size(y%v))) then - write(0,*) 'Size error ', m, n, size(x%v), size(y%v) + write(0,*) 'oellmv Size error ', m, n, size(x%v), size(y%v) info = psb_err_invalid_mat_state_ return end if + device_done = .false. + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') - if (a%is_host()) call a%sync() - if (x%is_host()) call x%sync() - if (y%is_host()) call y%sync() - - call inner_spmv(m, n, nc, alpha, a%val, a%ja, x%v, beta, y%v, info) - - call y%set_dev() - + if (.not.tra) then + select type(xx => x) + class is (psb_z_vect_oacc) + select type (yy => y) + class is (psb_z_vect_oacc) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + call inner_spmv(m, n, nc, alpha, a%val, a%ja, x%v, beta, y%v, info) + call y%set_dev() + device_done = .true. + end select + end select + end if + + if (.not.device_done) then + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call a%psb_z_ell_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans) + call y%set_host() + end if + contains subroutine inner_spmv(m, n, nc, alpha, val, ja, x, beta, y, info) diff --git a/openacc/impl/psb_z_oacc_hll_vect_mv.F90 b/openacc/impl/psb_z_oacc_hll_vect_mv.F90 index dbadf034..e373d6ff 100644 --- a/openacc/impl/psb_z_oacc_hll_vect_mv.F90 +++ b/openacc/impl/psb_z_oacc_hll_vect_mv.F90 @@ -11,6 +11,8 @@ contains character, optional, intent(in) :: trans integer(psb_ipk_) :: m, n, nhacks, hksz + character :: trans_ + logical :: device_done, tra info = psb_success_ m = a%get_nrows() @@ -23,14 +25,35 @@ contains info = psb_err_invalid_mat_state_ return end if + device_done = .false. + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') - if (a%is_host()) call a%sync() - if (x%is_host()) call x%sync() - if (y%is_host()) call y%sync() - - call inner_spmv(m, nhacks, hksz, alpha, a%val, a%ja, a%hkoffs, x%v, beta, y%v, info) - call y%set_dev() - + if (.not.tra) then + select type(xx => x) + class is (psb_z_vect_oacc) + select type (yy => y) + class is (psb_z_vect_oacc) + if (a%is_host()) call a%sync() + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + call inner_spmv(m, nhacks, hksz, alpha, a%val, a%ja, a%hkoffs, x%v, beta, y%v, info) + call y%set_dev() + device_done = .true. + end select + end select + end if + + if (.not.device_done) then + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() + call a%psb_z_hll_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans) + call y%set_host() + end if contains subroutine inner_spmv(m, nhacks, hksz, alpha, val, ja, hkoffs, x, beta, y, info) diff --git a/openacc/impl/psb_z_oacc_mlt_v_2.f90 b/openacc/impl/psb_z_oacc_mlt_v_2.f90 index f69d863c..c13b348c 100644 --- a/openacc/impl/psb_z_oacc_mlt_v_2.f90 +++ b/openacc/impl/psb_z_oacc_mlt_v_2.f90 @@ -9,10 +9,11 @@ subroutine psb_z_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) integer(psb_ipk_), intent(out) :: info character(len=1), intent(in), optional :: conjgx, conjgy integer(psb_ipk_) :: i, n - logical :: conjgx_, conjgy_ + logical :: conjgx_, conjgy_, device_done conjgx_ = .false. conjgy_ = .false. + device_done = .false. if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C') if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C') @@ -27,31 +28,10 @@ subroutine psb_z_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) if ((beta /= zzero) .and. (z%is_host())) call z%sync() call z_inner_oacc_mlt_v_2(n,alpha, xx%v, yy%v, beta, z%v, info, conjgx_, conjgy_) call z%set_dev() - class default - if (xx%is_dev()) call xx%sync() - if (yy%is_dev()) call yy%sync() - if ((beta /= zzero) .and. (z%is_dev())) call z%sync() - !call z_inner_oacc_mlt_v_2(n,alpha, xx%v, yy%v, beta, z%v, info, conjgx_, conjgy_) - if (conjgx_.and.conjgy_) then - do i = 1, n - z%v(i) = alpha * conjg(xx%v(i)) * conjg(yy%v(i)) + beta * z%v(i) - end do - else if (conjgx_.and.(.not.conjgy_)) then - do i = 1, n - z%v(i) = alpha * conjg(xx%v(i)) * (yy%v(i)) + beta * z%v(i) - end do - else if ((.not.conjgx_).and.(conjgy_)) then - do i = 1, n - z%v(i) = alpha * (xx%v(i)) * conjg(yy%v(i)) + beta * z%v(i) - end do - else - do i = 1, n - z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i) - end do - end if - call z%set_host() + device_done = .true. end select - class default + end select + if (.not.device_done) then if (x%is_dev()) call x%sync() if (y%is_dev()) call y%sync() if ((beta /= zzero) .and. (z%is_dev())) call z%sync() @@ -73,13 +53,14 @@ subroutine psb_z_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) end do end if call z%set_host() - end select + end if + contains subroutine z_inner_oacc_mlt_v_2(n,alpha, x, y, beta, z, info, conjgx, conjgy) implicit none integer(psb_ipk_), intent(in) :: n -complex(psb_dpk_), intent(in) :: alpha, beta -complex(psb_dpk_), intent(inout) :: x(:), y(:), z(:) + complex(psb_dpk_), intent(in) :: alpha, beta + complex(psb_dpk_), intent(inout) :: x(:), y(:), z(:) integer(psb_ipk_), intent(out) :: info logical, intent(in) :: conjgx, conjgy diff --git a/openacc/psb_c_oacc_csr_mat_mod.F90 b/openacc/psb_c_oacc_csr_mat_mod.F90 index a1f6e2c3..59794335 100644 --- a/openacc/psb_c_oacc_csr_mat_mod.F90 +++ b/openacc/psb_c_oacc_csr_mat_mod.F90 @@ -4,7 +4,6 @@ module psb_c_oacc_csr_mat_mod use openacc use psb_c_mat_mod use psb_c_oacc_vect_mod - !use oaccsparse_mod integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 @@ -17,8 +16,6 @@ module psb_c_oacc_csr_mat_mod procedure, pass(a) :: sizeof => c_oacc_csr_sizeof procedure, pass(a) :: vect_mv => psb_c_oacc_csr_vect_mv procedure, pass(a) :: in_vect_sv => psb_c_oacc_csr_inner_vect_sv - procedure, pass(a) :: csmm => psb_c_oacc_csr_csmm - procedure, pass(a) :: csmv => psb_c_oacc_csr_csmv procedure, pass(a) :: scals => psb_c_oacc_csr_scals procedure, pass(a) :: scalv => psb_c_oacc_csr_scal procedure, pass(a) :: reallocate_nz => psb_c_oacc_csr_reallocate_nz @@ -93,26 +90,6 @@ module psb_c_oacc_csr_mat_mod end subroutine psb_c_oacc_csr_inner_vect_sv end interface - interface - module subroutine psb_c_oacc_csr_csmm(alpha, a, x, beta, y, info, trans) - class(psb_c_oacc_csr_sparse_mat), intent(in) :: a - complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) - complex(psb_spk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_c_oacc_csr_csmm - end interface - - interface - module subroutine psb_c_oacc_csr_csmv(alpha, a, x, beta, y, info, trans) - class(psb_c_oacc_csr_sparse_mat), intent(in) :: a - complex(psb_spk_), intent(in) :: alpha, beta, x(:) - complex(psb_spk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_c_oacc_csr_csmv - end interface - interface module subroutine psb_c_oacc_csr_scals(d, a, info) class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a diff --git a/openacc/psb_c_oacc_ell_mat_mod.F90 b/openacc/psb_c_oacc_ell_mat_mod.F90 index 996d4628..f0559d26 100644 --- a/openacc/psb_c_oacc_ell_mat_mod.F90 +++ b/openacc/psb_c_oacc_ell_mat_mod.F90 @@ -26,8 +26,6 @@ module psb_c_oacc_ell_mat_mod procedure, pass(a) :: free => c_oacc_ell_free procedure, pass(a) :: vect_mv => psb_c_oacc_ell_vect_mv procedure, pass(a) :: in_vect_sv => psb_c_oacc_ell_inner_vect_sv - procedure, pass(a) :: csmm => psb_c_oacc_ell_csmm - procedure, pass(a) :: csmv => psb_c_oacc_ell_csmv procedure, pass(a) :: scals => psb_c_oacc_ell_scals procedure, pass(a) :: scalv => psb_c_oacc_ell_scal procedure, pass(a) :: reallocate_nz => psb_c_oacc_ell_reallocate_nz @@ -92,26 +90,6 @@ module psb_c_oacc_ell_mat_mod end subroutine psb_c_oacc_ell_inner_vect_sv end interface - interface - module subroutine psb_c_oacc_ell_csmm(alpha, a, x, beta, y, info, trans) - class(psb_c_oacc_ell_sparse_mat), intent(in) :: a - complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) - complex(psb_spk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_c_oacc_ell_csmm - end interface - - interface - module subroutine psb_c_oacc_ell_csmv(alpha, a, x, beta, y, info, trans) - class(psb_c_oacc_ell_sparse_mat), intent(in) :: a - complex(psb_spk_), intent(in) :: alpha, beta, x(:) - complex(psb_spk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_c_oacc_ell_csmv - end interface - interface module subroutine psb_c_oacc_ell_scals(d, a, info) class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a diff --git a/openacc/psb_c_oacc_hll_mat_mod.F90 b/openacc/psb_c_oacc_hll_mat_mod.F90 index ebcbf29e..93436224 100644 --- a/openacc/psb_c_oacc_hll_mat_mod.F90 +++ b/openacc/psb_c_oacc_hll_mat_mod.F90 @@ -26,8 +26,6 @@ module psb_c_oacc_hll_mat_mod procedure, pass(a) :: free => c_oacc_hll_free procedure, pass(a) :: vect_mv => psb_c_oacc_hll_vect_mv procedure, pass(a) :: in_vect_sv => psb_c_oacc_hll_inner_vect_sv - procedure, pass(a) :: csmm => psb_c_oacc_hll_csmm - procedure, pass(a) :: csmv => psb_c_oacc_hll_csmv procedure, pass(a) :: scals => psb_c_oacc_hll_scals procedure, pass(a) :: scalv => psb_c_oacc_hll_scal procedure, pass(a) :: reallocate_nz => psb_c_oacc_hll_reallocate_nz @@ -92,26 +90,6 @@ module psb_c_oacc_hll_mat_mod end subroutine psb_c_oacc_hll_inner_vect_sv end interface - interface - module subroutine psb_c_oacc_hll_csmm(alpha, a, x, beta, y, info, trans) - class(psb_c_oacc_hll_sparse_mat), intent(in) :: a - complex(psb_spk_), intent(in) :: alpha, beta, x(:,:) - complex(psb_spk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_c_oacc_hll_csmm - end interface - - interface - module subroutine psb_c_oacc_hll_csmv(alpha, a, x, beta, y, info, trans) - class(psb_c_oacc_hll_sparse_mat), intent(in) :: a - complex(psb_spk_), intent(in) :: alpha, beta, x(:) - complex(psb_spk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_c_oacc_hll_csmv - end interface - interface module subroutine psb_c_oacc_hll_scals(d, a, info) class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a diff --git a/openacc/psb_d_oacc_csr_mat_mod.F90 b/openacc/psb_d_oacc_csr_mat_mod.F90 index a9d72eae..08b75575 100644 --- a/openacc/psb_d_oacc_csr_mat_mod.F90 +++ b/openacc/psb_d_oacc_csr_mat_mod.F90 @@ -4,7 +4,6 @@ module psb_d_oacc_csr_mat_mod use openacc use psb_d_mat_mod use psb_d_oacc_vect_mod - !use oaccsparse_mod integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 @@ -17,8 +16,6 @@ module psb_d_oacc_csr_mat_mod procedure, pass(a) :: sizeof => d_oacc_csr_sizeof procedure, pass(a) :: vect_mv => psb_d_oacc_csr_vect_mv procedure, pass(a) :: in_vect_sv => psb_d_oacc_csr_inner_vect_sv - procedure, pass(a) :: csmm => psb_d_oacc_csr_csmm - procedure, pass(a) :: csmv => psb_d_oacc_csr_csmv procedure, pass(a) :: scals => psb_d_oacc_csr_scals procedure, pass(a) :: scalv => psb_d_oacc_csr_scal procedure, pass(a) :: reallocate_nz => psb_d_oacc_csr_reallocate_nz @@ -93,26 +90,6 @@ module psb_d_oacc_csr_mat_mod end subroutine psb_d_oacc_csr_inner_vect_sv end interface - interface - module subroutine psb_d_oacc_csr_csmm(alpha, a, x, beta, y, info, trans) - class(psb_d_oacc_csr_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) - real(psb_dpk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_d_oacc_csr_csmm - end interface - - interface - module subroutine psb_d_oacc_csr_csmv(alpha, a, x, beta, y, info, trans) - class(psb_d_oacc_csr_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta, x(:) - real(psb_dpk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_d_oacc_csr_csmv - end interface - interface module subroutine psb_d_oacc_csr_scals(d, a, info) class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a diff --git a/openacc/psb_d_oacc_ell_mat_mod.F90 b/openacc/psb_d_oacc_ell_mat_mod.F90 index 1409a1d6..3e25f576 100644 --- a/openacc/psb_d_oacc_ell_mat_mod.F90 +++ b/openacc/psb_d_oacc_ell_mat_mod.F90 @@ -26,8 +26,6 @@ module psb_d_oacc_ell_mat_mod procedure, pass(a) :: free => d_oacc_ell_free procedure, pass(a) :: vect_mv => psb_d_oacc_ell_vect_mv procedure, pass(a) :: in_vect_sv => psb_d_oacc_ell_inner_vect_sv - procedure, pass(a) :: csmm => psb_d_oacc_ell_csmm - procedure, pass(a) :: csmv => psb_d_oacc_ell_csmv procedure, pass(a) :: scals => psb_d_oacc_ell_scals procedure, pass(a) :: scalv => psb_d_oacc_ell_scal procedure, pass(a) :: reallocate_nz => psb_d_oacc_ell_reallocate_nz @@ -92,26 +90,6 @@ module psb_d_oacc_ell_mat_mod end subroutine psb_d_oacc_ell_inner_vect_sv end interface - interface - module subroutine psb_d_oacc_ell_csmm(alpha, a, x, beta, y, info, trans) - class(psb_d_oacc_ell_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) - real(psb_dpk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_d_oacc_ell_csmm - end interface - - interface - module subroutine psb_d_oacc_ell_csmv(alpha, a, x, beta, y, info, trans) - class(psb_d_oacc_ell_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta, x(:) - real(psb_dpk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_d_oacc_ell_csmv - end interface - interface module subroutine psb_d_oacc_ell_scals(d, a, info) class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a diff --git a/openacc/psb_d_oacc_hll_mat_mod.F90 b/openacc/psb_d_oacc_hll_mat_mod.F90 index 15ae055e..084987e5 100644 --- a/openacc/psb_d_oacc_hll_mat_mod.F90 +++ b/openacc/psb_d_oacc_hll_mat_mod.F90 @@ -26,8 +26,6 @@ module psb_d_oacc_hll_mat_mod procedure, pass(a) :: free => d_oacc_hll_free procedure, pass(a) :: vect_mv => psb_d_oacc_hll_vect_mv procedure, pass(a) :: in_vect_sv => psb_d_oacc_hll_inner_vect_sv - procedure, pass(a) :: csmm => psb_d_oacc_hll_csmm - procedure, pass(a) :: csmv => psb_d_oacc_hll_csmv procedure, pass(a) :: scals => psb_d_oacc_hll_scals procedure, pass(a) :: scalv => psb_d_oacc_hll_scal procedure, pass(a) :: reallocate_nz => psb_d_oacc_hll_reallocate_nz @@ -92,26 +90,6 @@ module psb_d_oacc_hll_mat_mod end subroutine psb_d_oacc_hll_inner_vect_sv end interface - interface - module subroutine psb_d_oacc_hll_csmm(alpha, a, x, beta, y, info, trans) - class(psb_d_oacc_hll_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) - real(psb_dpk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_d_oacc_hll_csmm - end interface - - interface - module subroutine psb_d_oacc_hll_csmv(alpha, a, x, beta, y, info, trans) - class(psb_d_oacc_hll_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta, x(:) - real(psb_dpk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_d_oacc_hll_csmv - end interface - interface module subroutine psb_d_oacc_hll_scals(d, a, info) class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a diff --git a/openacc/psb_s_oacc_csr_mat_mod.F90 b/openacc/psb_s_oacc_csr_mat_mod.F90 index 64a5f9a1..16668d25 100644 --- a/openacc/psb_s_oacc_csr_mat_mod.F90 +++ b/openacc/psb_s_oacc_csr_mat_mod.F90 @@ -4,7 +4,6 @@ module psb_s_oacc_csr_mat_mod use openacc use psb_s_mat_mod use psb_s_oacc_vect_mod - !use oaccsparse_mod integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 @@ -17,8 +16,6 @@ module psb_s_oacc_csr_mat_mod procedure, pass(a) :: sizeof => s_oacc_csr_sizeof procedure, pass(a) :: vect_mv => psb_s_oacc_csr_vect_mv procedure, pass(a) :: in_vect_sv => psb_s_oacc_csr_inner_vect_sv - procedure, pass(a) :: csmm => psb_s_oacc_csr_csmm - procedure, pass(a) :: csmv => psb_s_oacc_csr_csmv procedure, pass(a) :: scals => psb_s_oacc_csr_scals procedure, pass(a) :: scalv => psb_s_oacc_csr_scal procedure, pass(a) :: reallocate_nz => psb_s_oacc_csr_reallocate_nz @@ -93,26 +90,6 @@ module psb_s_oacc_csr_mat_mod end subroutine psb_s_oacc_csr_inner_vect_sv end interface - interface - module subroutine psb_s_oacc_csr_csmm(alpha, a, x, beta, y, info, trans) - class(psb_s_oacc_csr_sparse_mat), intent(in) :: a - real(psb_spk_), intent(in) :: alpha, beta, x(:,:) - real(psb_spk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_s_oacc_csr_csmm - end interface - - interface - module subroutine psb_s_oacc_csr_csmv(alpha, a, x, beta, y, info, trans) - class(psb_s_oacc_csr_sparse_mat), intent(in) :: a - real(psb_spk_), intent(in) :: alpha, beta, x(:) - real(psb_spk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_s_oacc_csr_csmv - end interface - interface module subroutine psb_s_oacc_csr_scals(d, a, info) class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a diff --git a/openacc/psb_s_oacc_ell_mat_mod.F90 b/openacc/psb_s_oacc_ell_mat_mod.F90 index 2893c3e5..dcfc1850 100644 --- a/openacc/psb_s_oacc_ell_mat_mod.F90 +++ b/openacc/psb_s_oacc_ell_mat_mod.F90 @@ -26,8 +26,6 @@ module psb_s_oacc_ell_mat_mod procedure, pass(a) :: free => s_oacc_ell_free procedure, pass(a) :: vect_mv => psb_s_oacc_ell_vect_mv procedure, pass(a) :: in_vect_sv => psb_s_oacc_ell_inner_vect_sv - procedure, pass(a) :: csmm => psb_s_oacc_ell_csmm - procedure, pass(a) :: csmv => psb_s_oacc_ell_csmv procedure, pass(a) :: scals => psb_s_oacc_ell_scals procedure, pass(a) :: scalv => psb_s_oacc_ell_scal procedure, pass(a) :: reallocate_nz => psb_s_oacc_ell_reallocate_nz @@ -92,26 +90,6 @@ module psb_s_oacc_ell_mat_mod end subroutine psb_s_oacc_ell_inner_vect_sv end interface - interface - module subroutine psb_s_oacc_ell_csmm(alpha, a, x, beta, y, info, trans) - class(psb_s_oacc_ell_sparse_mat), intent(in) :: a - real(psb_spk_), intent(in) :: alpha, beta, x(:,:) - real(psb_spk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_s_oacc_ell_csmm - end interface - - interface - module subroutine psb_s_oacc_ell_csmv(alpha, a, x, beta, y, info, trans) - class(psb_s_oacc_ell_sparse_mat), intent(in) :: a - real(psb_spk_), intent(in) :: alpha, beta, x(:) - real(psb_spk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_s_oacc_ell_csmv - end interface - interface module subroutine psb_s_oacc_ell_scals(d, a, info) class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a diff --git a/openacc/psb_s_oacc_hll_mat_mod.F90 b/openacc/psb_s_oacc_hll_mat_mod.F90 index 583a281e..0e7362f2 100644 --- a/openacc/psb_s_oacc_hll_mat_mod.F90 +++ b/openacc/psb_s_oacc_hll_mat_mod.F90 @@ -26,8 +26,6 @@ module psb_s_oacc_hll_mat_mod procedure, pass(a) :: free => s_oacc_hll_free procedure, pass(a) :: vect_mv => psb_s_oacc_hll_vect_mv procedure, pass(a) :: in_vect_sv => psb_s_oacc_hll_inner_vect_sv - procedure, pass(a) :: csmm => psb_s_oacc_hll_csmm - procedure, pass(a) :: csmv => psb_s_oacc_hll_csmv procedure, pass(a) :: scals => psb_s_oacc_hll_scals procedure, pass(a) :: scalv => psb_s_oacc_hll_scal procedure, pass(a) :: reallocate_nz => psb_s_oacc_hll_reallocate_nz @@ -92,26 +90,6 @@ module psb_s_oacc_hll_mat_mod end subroutine psb_s_oacc_hll_inner_vect_sv end interface - interface - module subroutine psb_s_oacc_hll_csmm(alpha, a, x, beta, y, info, trans) - class(psb_s_oacc_hll_sparse_mat), intent(in) :: a - real(psb_spk_), intent(in) :: alpha, beta, x(:,:) - real(psb_spk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_s_oacc_hll_csmm - end interface - - interface - module subroutine psb_s_oacc_hll_csmv(alpha, a, x, beta, y, info, trans) - class(psb_s_oacc_hll_sparse_mat), intent(in) :: a - real(psb_spk_), intent(in) :: alpha, beta, x(:) - real(psb_spk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_s_oacc_hll_csmv - end interface - interface module subroutine psb_s_oacc_hll_scals(d, a, info) class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a diff --git a/openacc/psb_z_oacc_csr_mat_mod.F90 b/openacc/psb_z_oacc_csr_mat_mod.F90 index 47973825..fdb59b4b 100644 --- a/openacc/psb_z_oacc_csr_mat_mod.F90 +++ b/openacc/psb_z_oacc_csr_mat_mod.F90 @@ -4,7 +4,6 @@ module psb_z_oacc_csr_mat_mod use openacc use psb_z_mat_mod use psb_z_oacc_vect_mod - !use oaccsparse_mod integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 @@ -17,8 +16,6 @@ module psb_z_oacc_csr_mat_mod procedure, pass(a) :: sizeof => z_oacc_csr_sizeof procedure, pass(a) :: vect_mv => psb_z_oacc_csr_vect_mv procedure, pass(a) :: in_vect_sv => psb_z_oacc_csr_inner_vect_sv - procedure, pass(a) :: csmm => psb_z_oacc_csr_csmm - procedure, pass(a) :: csmv => psb_z_oacc_csr_csmv procedure, pass(a) :: scals => psb_z_oacc_csr_scals procedure, pass(a) :: scalv => psb_z_oacc_csr_scal procedure, pass(a) :: reallocate_nz => psb_z_oacc_csr_reallocate_nz @@ -93,26 +90,6 @@ module psb_z_oacc_csr_mat_mod end subroutine psb_z_oacc_csr_inner_vect_sv end interface - interface - module subroutine psb_z_oacc_csr_csmm(alpha, a, x, beta, y, info, trans) - class(psb_z_oacc_csr_sparse_mat), intent(in) :: a - complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) - complex(psb_dpk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_z_oacc_csr_csmm - end interface - - interface - module subroutine psb_z_oacc_csr_csmv(alpha, a, x, beta, y, info, trans) - class(psb_z_oacc_csr_sparse_mat), intent(in) :: a - complex(psb_dpk_), intent(in) :: alpha, beta, x(:) - complex(psb_dpk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_z_oacc_csr_csmv - end interface - interface module subroutine psb_z_oacc_csr_scals(d, a, info) class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a diff --git a/openacc/psb_z_oacc_ell_mat_mod.F90 b/openacc/psb_z_oacc_ell_mat_mod.F90 index 4013ee9d..9d09d43d 100644 --- a/openacc/psb_z_oacc_ell_mat_mod.F90 +++ b/openacc/psb_z_oacc_ell_mat_mod.F90 @@ -26,8 +26,6 @@ module psb_z_oacc_ell_mat_mod procedure, pass(a) :: free => z_oacc_ell_free procedure, pass(a) :: vect_mv => psb_z_oacc_ell_vect_mv procedure, pass(a) :: in_vect_sv => psb_z_oacc_ell_inner_vect_sv - procedure, pass(a) :: csmm => psb_z_oacc_ell_csmm - procedure, pass(a) :: csmv => psb_z_oacc_ell_csmv procedure, pass(a) :: scals => psb_z_oacc_ell_scals procedure, pass(a) :: scalv => psb_z_oacc_ell_scal procedure, pass(a) :: reallocate_nz => psb_z_oacc_ell_reallocate_nz @@ -92,26 +90,6 @@ module psb_z_oacc_ell_mat_mod end subroutine psb_z_oacc_ell_inner_vect_sv end interface - interface - module subroutine psb_z_oacc_ell_csmm(alpha, a, x, beta, y, info, trans) - class(psb_z_oacc_ell_sparse_mat), intent(in) :: a - complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) - complex(psb_dpk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_z_oacc_ell_csmm - end interface - - interface - module subroutine psb_z_oacc_ell_csmv(alpha, a, x, beta, y, info, trans) - class(psb_z_oacc_ell_sparse_mat), intent(in) :: a - complex(psb_dpk_), intent(in) :: alpha, beta, x(:) - complex(psb_dpk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_z_oacc_ell_csmv - end interface - interface module subroutine psb_z_oacc_ell_scals(d, a, info) class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a diff --git a/openacc/psb_z_oacc_hll_mat_mod.F90 b/openacc/psb_z_oacc_hll_mat_mod.F90 index 289b78ac..9eb7d08c 100644 --- a/openacc/psb_z_oacc_hll_mat_mod.F90 +++ b/openacc/psb_z_oacc_hll_mat_mod.F90 @@ -26,8 +26,6 @@ module psb_z_oacc_hll_mat_mod procedure, pass(a) :: free => z_oacc_hll_free procedure, pass(a) :: vect_mv => psb_z_oacc_hll_vect_mv procedure, pass(a) :: in_vect_sv => psb_z_oacc_hll_inner_vect_sv - procedure, pass(a) :: csmm => psb_z_oacc_hll_csmm - procedure, pass(a) :: csmv => psb_z_oacc_hll_csmv procedure, pass(a) :: scals => psb_z_oacc_hll_scals procedure, pass(a) :: scalv => psb_z_oacc_hll_scal procedure, pass(a) :: reallocate_nz => psb_z_oacc_hll_reallocate_nz @@ -92,26 +90,6 @@ module psb_z_oacc_hll_mat_mod end subroutine psb_z_oacc_hll_inner_vect_sv end interface - interface - module subroutine psb_z_oacc_hll_csmm(alpha, a, x, beta, y, info, trans) - class(psb_z_oacc_hll_sparse_mat), intent(in) :: a - complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:) - complex(psb_dpk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_z_oacc_hll_csmm - end interface - - interface - module subroutine psb_z_oacc_hll_csmv(alpha, a, x, beta, y, info, trans) - class(psb_z_oacc_hll_sparse_mat), intent(in) :: a - complex(psb_dpk_), intent(in) :: alpha, beta, x(:) - complex(psb_dpk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_z_oacc_hll_csmv - end interface - interface module subroutine psb_z_oacc_hll_scals(d, a, info) class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a From 6236f3489cde833eb81232bf97b4cf7005e21183 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Fri, 30 Aug 2024 16:05:08 +0200 Subject: [PATCH 36/39] Remove obsolete files --- openacc/impl/psb_c_oacc_csr_csmm.F90 | 86 ---------------------------- openacc/impl/psb_c_oacc_csr_csmv.F90 | 81 -------------------------- openacc/impl/psb_c_oacc_ell_csmm.F90 | 86 ---------------------------- openacc/impl/psb_c_oacc_ell_csmv.F90 | 82 -------------------------- openacc/impl/psb_c_oacc_hll_csmm.F90 | 86 ---------------------------- openacc/impl/psb_c_oacc_hll_csmv.F90 | 84 --------------------------- openacc/impl/psb_d_oacc_csr_csmm.F90 | 86 ---------------------------- openacc/impl/psb_d_oacc_csr_csmv.F90 | 81 -------------------------- openacc/impl/psb_d_oacc_ell_csmm.F90 | 86 ---------------------------- openacc/impl/psb_d_oacc_ell_csmv.F90 | 82 -------------------------- openacc/impl/psb_d_oacc_hll_csmm.F90 | 86 ---------------------------- openacc/impl/psb_d_oacc_hll_csmv.F90 | 84 --------------------------- openacc/impl/psb_s_oacc_csr_csmm.F90 | 86 ---------------------------- openacc/impl/psb_s_oacc_csr_csmv.F90 | 81 -------------------------- openacc/impl/psb_s_oacc_ell_csmm.F90 | 86 ---------------------------- openacc/impl/psb_s_oacc_ell_csmv.F90 | 82 -------------------------- openacc/impl/psb_s_oacc_hll_csmm.F90 | 86 ---------------------------- openacc/impl/psb_s_oacc_hll_csmv.F90 | 84 --------------------------- openacc/impl/psb_z_oacc_csr_csmm.F90 | 86 ---------------------------- openacc/impl/psb_z_oacc_csr_csmv.F90 | 81 -------------------------- openacc/impl/psb_z_oacc_ell_csmm.F90 | 86 ---------------------------- openacc/impl/psb_z_oacc_ell_csmv.F90 | 82 -------------------------- openacc/impl/psb_z_oacc_hll_csmm.F90 | 86 ---------------------------- openacc/impl/psb_z_oacc_hll_csmv.F90 | 84 --------------------------- 24 files changed, 2020 deletions(-) delete mode 100644 openacc/impl/psb_c_oacc_csr_csmm.F90 delete mode 100644 openacc/impl/psb_c_oacc_csr_csmv.F90 delete mode 100644 openacc/impl/psb_c_oacc_ell_csmm.F90 delete mode 100644 openacc/impl/psb_c_oacc_ell_csmv.F90 delete mode 100644 openacc/impl/psb_c_oacc_hll_csmm.F90 delete mode 100644 openacc/impl/psb_c_oacc_hll_csmv.F90 delete mode 100644 openacc/impl/psb_d_oacc_csr_csmm.F90 delete mode 100644 openacc/impl/psb_d_oacc_csr_csmv.F90 delete mode 100644 openacc/impl/psb_d_oacc_ell_csmm.F90 delete mode 100644 openacc/impl/psb_d_oacc_ell_csmv.F90 delete mode 100644 openacc/impl/psb_d_oacc_hll_csmm.F90 delete mode 100644 openacc/impl/psb_d_oacc_hll_csmv.F90 delete mode 100644 openacc/impl/psb_s_oacc_csr_csmm.F90 delete mode 100644 openacc/impl/psb_s_oacc_csr_csmv.F90 delete mode 100644 openacc/impl/psb_s_oacc_ell_csmm.F90 delete mode 100644 openacc/impl/psb_s_oacc_ell_csmv.F90 delete mode 100644 openacc/impl/psb_s_oacc_hll_csmm.F90 delete mode 100644 openacc/impl/psb_s_oacc_hll_csmv.F90 delete mode 100644 openacc/impl/psb_z_oacc_csr_csmm.F90 delete mode 100644 openacc/impl/psb_z_oacc_csr_csmv.F90 delete mode 100644 openacc/impl/psb_z_oacc_ell_csmm.F90 delete mode 100644 openacc/impl/psb_z_oacc_ell_csmv.F90 delete mode 100644 openacc/impl/psb_z_oacc_hll_csmm.F90 delete mode 100644 openacc/impl/psb_z_oacc_hll_csmv.F90 diff --git a/openacc/impl/psb_c_oacc_csr_csmm.F90 b/openacc/impl/psb_c_oacc_csr_csmm.F90 deleted file mode 100644 index 5bf0bad4..00000000 --- a/openacc/impl/psb_c_oacc_csr_csmm.F90 +++ /dev/null @@ -1,86 +0,0 @@ -submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_csmm_impl - use psb_base_mod -contains - module subroutine psb_c_oacc_csr_csmm(alpha, a, x, beta, y, info, trans) - implicit none - class(psb_c_oacc_csr_sparse_mat), intent(in) :: a - complex(psb_spk_), intent(in) :: alpha, beta - complex(psb_spk_), intent(in) :: x(:,:) - complex(psb_spk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - - character :: trans_ - integer(psb_ipk_) :: i, j, m, n,k, nxy - logical :: tra - integer(psb_ipk_) :: err_act - character(len=20) :: name = 'c_oacc_csmm' - logical, parameter :: debug = .false. - - info = psb_success_ - call psb_erractionsave(err_act) - - if (present(trans)) then - trans_ = trans - else - trans_ = 'N' - end if - - if (.not.a%is_asb()) then - info = psb_err_invalid_mat_state_ - call psb_errpush(info, name) - goto 9999 - endif - tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') - - if (tra) then - m = a%get_ncols() - n = a%get_nrows() - else - n = a%get_ncols() - m = a%get_nrows() - end if - - if (size(x,1) < n) then - info = 36 - call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) - goto 9999 - end if - - if (size(y,1) < m) then - info = 36 - call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) - goto 9999 - end if - - if (tra) then - call a%psb_c_csr_sparse_mat%spmm(alpha, x, beta, y, info, trans) - else - nxy = min(size(x,2), size(y,2)) - - !$acc parallel loop collapse(2) present(a, x, y) - do j = 1, nxy - do i = 1, m - y(i,j) = beta * y(i,j) - end do - end do - - !$acc parallel loop collapse(2) present(a, x, y) - do j = 1, nxy - do i = 1, n - do k = a%irp(i), a%irp(i+1) - 1 - y(a%ja(k), j) = y(a%ja(k), j) + alpha * a%val(k) * x(i, j) - end do - end do - end do - endif - - call psb_erractionrestore(err_act) - return - -9999 call psb_error_handler(err_act) - return - - end subroutine psb_c_oacc_csr_csmm -end submodule psb_c_oacc_csr_csmm_impl - diff --git a/openacc/impl/psb_c_oacc_csr_csmv.F90 b/openacc/impl/psb_c_oacc_csr_csmv.F90 deleted file mode 100644 index e5d5f24e..00000000 --- a/openacc/impl/psb_c_oacc_csr_csmv.F90 +++ /dev/null @@ -1,81 +0,0 @@ -submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_csmv_impl - use psb_base_mod -contains - module subroutine psb_c_oacc_csr_csmv(alpha, a, x, beta, y, info, trans) - implicit none - class(psb_c_oacc_csr_sparse_mat), intent(in) :: a - complex(psb_spk_), intent(in) :: alpha, beta - complex(psb_spk_), intent(in) :: x(:) - complex(psb_spk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - - character :: trans_ - integer(psb_ipk_) :: i, j, m, n - logical :: tra - integer(psb_ipk_) :: err_act - character(len=20) :: name = 'c_oacc_csmv' - logical, parameter :: debug = .false. - - call psb_erractionsave(err_act) - info = psb_success_ - - if (present(trans)) then - trans_ = trans - else - trans_ = 'N' - end if - - if (.not.a%is_asb()) then - info = psb_err_invalid_mat_state_ - call psb_errpush(info, name) - goto 9999 - endif - - tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') - - if (tra) then - m = a%get_ncols() - n = a%get_nrows() - else - n = a%get_ncols() - m = a%get_nrows() - end if - - if (size(x,1) < n) then - info = 36 - call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) - goto 9999 - end if - - if (size(y,1) < m) then - info = 36 - call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) - goto 9999 - end if - - if (tra) then - call a%psb_c_csr_sparse_mat%spmm(alpha, x, beta, y, info, trans) - else - !$acc parallel loop present(a, x, y) - do i = 1, m - y(i) = beta * y(i) - end do - - !$acc parallel loop present(a, x, y) - do i = 1, n - do j = a%irp(i), a%irp(i+1) - 1 - y(a%ja(j)) = y(a%ja(j)) + alpha * a%val(j) * x(i) - end do - end do - endif - - call psb_erractionrestore(err_act) - return - -9999 call psb_error_handler(err_act) - return - - end subroutine psb_c_oacc_csr_csmv -end submodule psb_c_oacc_csr_csmv_impl - diff --git a/openacc/impl/psb_c_oacc_ell_csmm.F90 b/openacc/impl/psb_c_oacc_ell_csmm.F90 deleted file mode 100644 index 01ed0d8b..00000000 --- a/openacc/impl/psb_c_oacc_ell_csmm.F90 +++ /dev/null @@ -1,86 +0,0 @@ -submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_csmm_impl - use psb_base_mod -contains - module subroutine psb_c_oacc_ell_csmm(alpha, a, x, beta, y, info, trans) - implicit none - class(psb_c_oacc_ell_sparse_mat), intent(in) :: a - complex(psb_spk_), intent(in) :: alpha, beta - complex(psb_spk_), intent(in) :: x(:,:) - complex(psb_spk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - - character :: trans_ - integer(psb_ipk_) :: i, j, m, n, k, nxy, nzt - logical :: tra - integer(psb_ipk_) :: err_act - character(len=20) :: name = 'c_oacc_ell_csmm' - logical, parameter :: debug = .false. - - info = psb_success_ - call psb_erractionsave(err_act) - - if (present(trans)) then - trans_ = trans - else - trans_ = 'N' - end if - - if (.not.a%is_asb()) then - info = psb_err_invalid_mat_state_ - call psb_errpush(info, name) - goto 9999 - endif - tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') - - if (tra) then - m = a%get_ncols() - n = a%get_nrows() - else - n = a%get_ncols() - m = a%get_nrows() - end if - - if (size(x,1) < n) then - info = 36 - call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) - goto 9999 - end if - - if (size(y,1) < m) then - info = 36 - call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) - goto 9999 - end if - - if (tra) then - call a%psb_c_ell_sparse_mat%spmm(alpha, x, beta, y, info, trans) - else - nxy = min(size(x,2), size(y,2)) - nzt = a%nzt - - !$acc parallel loop collapse(2) present(a, x, y) - do j = 1, nxy - do i = 1, m - y(i,j) = beta * y(i,j) - end do - end do - - !$acc parallel loop collapse(2) present(a, x, y) - do j = 1, nxy - do i = 1, n - do k = 1, nzt - y(i, j) = y(i, j) + alpha * a%val(i, k) * x(a%ja(i, k), j) - end do - end do - end do - endif - - call psb_erractionrestore(err_act) - return - -9999 call psb_error_handler(err_act) - return - - end subroutine psb_c_oacc_ell_csmm -end submodule psb_c_oacc_ell_csmm_impl diff --git a/openacc/impl/psb_c_oacc_ell_csmv.F90 b/openacc/impl/psb_c_oacc_ell_csmv.F90 deleted file mode 100644 index dd5c1207..00000000 --- a/openacc/impl/psb_c_oacc_ell_csmv.F90 +++ /dev/null @@ -1,82 +0,0 @@ -submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_csmv_impl - use psb_base_mod -contains - module subroutine psb_c_oacc_ell_csmv(alpha, a, x, beta, y, info, trans) - implicit none - class(psb_c_oacc_ell_sparse_mat), intent(in) :: a - complex(psb_spk_), intent(in) :: alpha, beta - complex(psb_spk_), intent(in) :: x(:) - complex(psb_spk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - - character :: trans_ - integer(psb_ipk_) :: i, j, m, n, nzt - logical :: tra - integer(psb_ipk_) :: err_act - character(len=20) :: name = 'c_oacc_ell_csmv' - logical, parameter :: debug = .false. - - call psb_erractionsave(err_act) - info = psb_success_ - - if (present(trans)) then - trans_ = trans - else - trans_ = 'N' - end if - - if (.not.a%is_asb()) then - info = psb_err_invalid_mat_state_ - call psb_errpush(info, name) - goto 9999 - endif - - tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') - - if (tra) then - m = a%get_ncols() - n = a%get_nrows() - else - n = a%get_ncols() - m = a%get_nrows() - end if - - if (size(x,1) < n) then - info = 36 - call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) - goto 9999 - end if - - if (size(y,1) < m) then - info = 36 - call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) - goto 9999 - end if - - if (tra) then - call a%psb_c_ell_sparse_mat%spmm(alpha, x, beta, y, info, trans) - else - nzt = a%nzt - - !$acc parallel loop present(a, x, y) - do i = 1, m - y(i) = beta * y(i) - end do - - !$acc parallel loop present(a, x, y) - do i = 1, m - do j = 1, nzt - y(i) = y(i) + alpha * a%val(i, j) * x(a%ja(i, j)) - end do - end do - endif - - call psb_erractionrestore(err_act) - return - -9999 call psb_error_handler(err_act) - return - - end subroutine psb_c_oacc_ell_csmv -end submodule psb_c_oacc_ell_csmv_impl diff --git a/openacc/impl/psb_c_oacc_hll_csmm.F90 b/openacc/impl/psb_c_oacc_hll_csmm.F90 deleted file mode 100644 index 6b0fc637..00000000 --- a/openacc/impl/psb_c_oacc_hll_csmm.F90 +++ /dev/null @@ -1,86 +0,0 @@ -submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_csmm_impl - use psb_base_mod -contains - module subroutine psb_c_oacc_hll_csmm(alpha, a, x, beta, y, info, trans) - implicit none - class(psb_c_oacc_hll_sparse_mat), intent(in) :: a - complex(psb_spk_), intent(in) :: alpha, beta - complex(psb_spk_), intent(in) :: x(:,:) - complex(psb_spk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - - character :: trans_ - integer(psb_ipk_) :: i, j, m, n, k, nxy, nhacks - logical :: tra - integer(psb_ipk_) :: err_act - character(len=20) :: name = 'c_oacc_hll_csmm' - logical, parameter :: debug = .false. - - info = psb_success_ - call psb_erractionsave(err_act) - - if (present(trans)) then - trans_ = trans - else - trans_ = 'N' - end if - - if (.not.a%is_asb()) then - info = psb_err_invalid_mat_state_ - call psb_errpush(info, name) - goto 9999 - endif - tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') - - if (tra) then - m = a%get_ncols() - n = a%get_nrows() - else - n = a%get_ncols() - m = a%get_nrows() - end if - - if (size(x,1) < n) then - info = 36 - call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) - goto 9999 - end if - - if (size(y,1) < m) then - info = 36 - call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) - goto 9999 - end if - - if (tra) then - call a%psb_c_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans) - else - nxy = min(size(x,2), size(y,2)) - nhacks = (a%get_nrows() + a%hksz - 1) / a%hksz - - !$acc parallel loop collapse(2) present(a, x, y) - do j = 1, nxy - do i = 1, m - y(i,j) = beta * y(i,j) - end do - end do - - !$acc parallel loop present(a, x, y) - do j = 1, nxy - do k = 1, nhacks - do i = a%hkoffs(k), a%hkoffs(k + 1) - 1 - y(a%irn(i), j) = y(a%irn(i), j) + alpha * a%val(i) * x(a%ja(i), j) - end do - end do - end do - endif - - call psb_erractionrestore(err_act) - return - -9999 call psb_error_handler(err_act) - return - - end subroutine psb_c_oacc_hll_csmm -end submodule psb_c_oacc_hll_csmm_impl diff --git a/openacc/impl/psb_c_oacc_hll_csmv.F90 b/openacc/impl/psb_c_oacc_hll_csmv.F90 deleted file mode 100644 index f32e37b7..00000000 --- a/openacc/impl/psb_c_oacc_hll_csmv.F90 +++ /dev/null @@ -1,84 +0,0 @@ -submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_csmv_impl - use psb_base_mod -contains - module subroutine psb_c_oacc_hll_csmv(alpha, a, x, beta, y, info, trans) - implicit none - class(psb_c_oacc_hll_sparse_mat), intent(in) :: a - complex(psb_spk_), intent(in) :: alpha, beta - complex(psb_spk_), intent(in) :: x(:) - complex(psb_spk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - - character :: trans_ - integer(psb_ipk_) :: i, j, m, n, hksz, nhacks - logical :: tra - integer(psb_ipk_) :: err_act - character(len=20) :: name = 'c_oacc_hll_csmv' - logical, parameter :: debug = .false. - - call psb_erractionsave(err_act) - info = psb_success_ - - if (present(trans)) then - trans_ = trans - else - trans_ = 'N' - end if - - if (.not.a%is_asb()) then - info = psb_err_invalid_mat_state_ - call psb_errpush(info, name) - goto 9999 - endif - - tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') - - if (tra) then - m = a%get_ncols() - n = a%get_nrows() - else - n = a%get_ncols() - m = a%get_nrows() - end if - - if (size(x,1) < n) then - info = 36 - call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) - goto 9999 - end if - - if (size(y,1) < m) then - info = 36 - call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) - goto 9999 - end if - - if (tra) then - call a%psb_c_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans) - else - hksz = a%hksz - nhacks = (a%get_nrows() + hksz - 1) / hksz - - !$acc parallel loop present(a, x, y) - do i = 1, m - y(i) = beta * y(i) - end do - ! This loop nest cannot be run with collapse, since - ! the inner loop extent varies. - !$acc parallel loop present(a, x, y) - do i = 1, nhacks - do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 - y(a%irn(j)) = y(a%irn(j)) + alpha * a%val(j) * x(a%ja(j)) - end do - end do - endif - - call psb_erractionrestore(err_act) - return - -9999 call psb_error_handler(err_act) - return - - end subroutine psb_c_oacc_hll_csmv -end submodule psb_c_oacc_hll_csmv_impl diff --git a/openacc/impl/psb_d_oacc_csr_csmm.F90 b/openacc/impl/psb_d_oacc_csr_csmm.F90 deleted file mode 100644 index 6ab87175..00000000 --- a/openacc/impl/psb_d_oacc_csr_csmm.F90 +++ /dev/null @@ -1,86 +0,0 @@ -submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_csmm_impl - use psb_base_mod -contains - module subroutine psb_d_oacc_csr_csmm(alpha, a, x, beta, y, info, trans) - implicit none - class(psb_d_oacc_csr_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta - real(psb_dpk_), intent(in) :: x(:,:) - real(psb_dpk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - - character :: trans_ - integer(psb_ipk_) :: i, j, m, n,k, nxy - logical :: tra - integer(psb_ipk_) :: err_act - character(len=20) :: name = 'd_oacc_csmm' - logical, parameter :: debug = .false. - - info = psb_success_ - call psb_erractionsave(err_act) - - if (present(trans)) then - trans_ = trans - else - trans_ = 'N' - end if - - if (.not.a%is_asb()) then - info = psb_err_invalid_mat_state_ - call psb_errpush(info, name) - goto 9999 - endif - tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') - - if (tra) then - m = a%get_ncols() - n = a%get_nrows() - else - n = a%get_ncols() - m = a%get_nrows() - end if - - if (size(x,1) < n) then - info = 36 - call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) - goto 9999 - end if - - if (size(y,1) < m) then - info = 36 - call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) - goto 9999 - end if - - if (tra) then - call a%psb_d_csr_sparse_mat%spmm(alpha, x, beta, y, info, trans) - else - nxy = min(size(x,2), size(y,2)) - - !$acc parallel loop collapse(2) present(a, x, y) - do j = 1, nxy - do i = 1, m - y(i,j) = beta * y(i,j) - end do - end do - - !$acc parallel loop collapse(2) present(a, x, y) - do j = 1, nxy - do i = 1, n - do k = a%irp(i), a%irp(i+1) - 1 - y(a%ja(k), j) = y(a%ja(k), j) + alpha * a%val(k) * x(i, j) - end do - end do - end do - endif - - call psb_erractionrestore(err_act) - return - -9999 call psb_error_handler(err_act) - return - - end subroutine psb_d_oacc_csr_csmm -end submodule psb_d_oacc_csr_csmm_impl - diff --git a/openacc/impl/psb_d_oacc_csr_csmv.F90 b/openacc/impl/psb_d_oacc_csr_csmv.F90 deleted file mode 100644 index 205a7a59..00000000 --- a/openacc/impl/psb_d_oacc_csr_csmv.F90 +++ /dev/null @@ -1,81 +0,0 @@ -submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_csmv_impl - use psb_base_mod -contains - module subroutine psb_d_oacc_csr_csmv(alpha, a, x, beta, y, info, trans) - implicit none - class(psb_d_oacc_csr_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta - real(psb_dpk_), intent(in) :: x(:) - real(psb_dpk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - - character :: trans_ - integer(psb_ipk_) :: i, j, m, n - logical :: tra - integer(psb_ipk_) :: err_act - character(len=20) :: name = 'd_oacc_csmv' - logical, parameter :: debug = .false. - - call psb_erractionsave(err_act) - info = psb_success_ - - if (present(trans)) then - trans_ = trans - else - trans_ = 'N' - end if - - if (.not.a%is_asb()) then - info = psb_err_invalid_mat_state_ - call psb_errpush(info, name) - goto 9999 - endif - - tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') - - if (tra) then - m = a%get_ncols() - n = a%get_nrows() - else - n = a%get_ncols() - m = a%get_nrows() - end if - - if (size(x,1) < n) then - info = 36 - call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) - goto 9999 - end if - - if (size(y,1) < m) then - info = 36 - call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) - goto 9999 - end if - - if (tra) then - call a%psb_d_csr_sparse_mat%spmm(alpha, x, beta, y, info, trans) - else - !$acc parallel loop present(a, x, y) - do i = 1, m - y(i) = beta * y(i) - end do - - !$acc parallel loop present(a, x, y) - do i = 1, n - do j = a%irp(i), a%irp(i+1) - 1 - y(a%ja(j)) = y(a%ja(j)) + alpha * a%val(j) * x(i) - end do - end do - endif - - call psb_erractionrestore(err_act) - return - -9999 call psb_error_handler(err_act) - return - - end subroutine psb_d_oacc_csr_csmv -end submodule psb_d_oacc_csr_csmv_impl - diff --git a/openacc/impl/psb_d_oacc_ell_csmm.F90 b/openacc/impl/psb_d_oacc_ell_csmm.F90 deleted file mode 100644 index 6515a306..00000000 --- a/openacc/impl/psb_d_oacc_ell_csmm.F90 +++ /dev/null @@ -1,86 +0,0 @@ -submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_csmm_impl - use psb_base_mod -contains - module subroutine psb_d_oacc_ell_csmm(alpha, a, x, beta, y, info, trans) - implicit none - class(psb_d_oacc_ell_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta - real(psb_dpk_), intent(in) :: x(:,:) - real(psb_dpk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - - character :: trans_ - integer(psb_ipk_) :: i, j, m, n, k, nxy, nzt - logical :: tra - integer(psb_ipk_) :: err_act - character(len=20) :: name = 'd_oacc_ell_csmm' - logical, parameter :: debug = .false. - - info = psb_success_ - call psb_erractionsave(err_act) - - if (present(trans)) then - trans_ = trans - else - trans_ = 'N' - end if - - if (.not.a%is_asb()) then - info = psb_err_invalid_mat_state_ - call psb_errpush(info, name) - goto 9999 - endif - tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') - - if (tra) then - m = a%get_ncols() - n = a%get_nrows() - else - n = a%get_ncols() - m = a%get_nrows() - end if - - if (size(x,1) < n) then - info = 36 - call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) - goto 9999 - end if - - if (size(y,1) < m) then - info = 36 - call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) - goto 9999 - end if - - if (tra) then - call a%psb_d_ell_sparse_mat%spmm(alpha, x, beta, y, info, trans) - else - nxy = min(size(x,2), size(y,2)) - nzt = a%nzt - - !$acc parallel loop collapse(2) present(a, x, y) - do j = 1, nxy - do i = 1, m - y(i,j) = beta * y(i,j) - end do - end do - - !$acc parallel loop collapse(2) present(a, x, y) - do j = 1, nxy - do i = 1, n - do k = 1, nzt - y(i, j) = y(i, j) + alpha * a%val(i, k) * x(a%ja(i, k), j) - end do - end do - end do - endif - - call psb_erractionrestore(err_act) - return - -9999 call psb_error_handler(err_act) - return - - end subroutine psb_d_oacc_ell_csmm -end submodule psb_d_oacc_ell_csmm_impl diff --git a/openacc/impl/psb_d_oacc_ell_csmv.F90 b/openacc/impl/psb_d_oacc_ell_csmv.F90 deleted file mode 100644 index 8d5e2aaa..00000000 --- a/openacc/impl/psb_d_oacc_ell_csmv.F90 +++ /dev/null @@ -1,82 +0,0 @@ -submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_csmv_impl - use psb_base_mod -contains - module subroutine psb_d_oacc_ell_csmv(alpha, a, x, beta, y, info, trans) - implicit none - class(psb_d_oacc_ell_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta - real(psb_dpk_), intent(in) :: x(:) - real(psb_dpk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - - character :: trans_ - integer(psb_ipk_) :: i, j, m, n, nzt - logical :: tra - integer(psb_ipk_) :: err_act - character(len=20) :: name = 'd_oacc_ell_csmv' - logical, parameter :: debug = .false. - - call psb_erractionsave(err_act) - info = psb_success_ - - if (present(trans)) then - trans_ = trans - else - trans_ = 'N' - end if - - if (.not.a%is_asb()) then - info = psb_err_invalid_mat_state_ - call psb_errpush(info, name) - goto 9999 - endif - - tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') - - if (tra) then - m = a%get_ncols() - n = a%get_nrows() - else - n = a%get_ncols() - m = a%get_nrows() - end if - - if (size(x,1) < n) then - info = 36 - call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) - goto 9999 - end if - - if (size(y,1) < m) then - info = 36 - call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) - goto 9999 - end if - - if (tra) then - call a%psb_d_ell_sparse_mat%spmm(alpha, x, beta, y, info, trans) - else - nzt = a%nzt - - !$acc parallel loop present(a, x, y) - do i = 1, m - y(i) = beta * y(i) - end do - - !$acc parallel loop present(a, x, y) - do i = 1, m - do j = 1, nzt - y(i) = y(i) + alpha * a%val(i, j) * x(a%ja(i, j)) - end do - end do - endif - - call psb_erractionrestore(err_act) - return - -9999 call psb_error_handler(err_act) - return - - end subroutine psb_d_oacc_ell_csmv -end submodule psb_d_oacc_ell_csmv_impl diff --git a/openacc/impl/psb_d_oacc_hll_csmm.F90 b/openacc/impl/psb_d_oacc_hll_csmm.F90 deleted file mode 100644 index 2dd6b53b..00000000 --- a/openacc/impl/psb_d_oacc_hll_csmm.F90 +++ /dev/null @@ -1,86 +0,0 @@ -submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_csmm_impl - use psb_base_mod -contains - module subroutine psb_d_oacc_hll_csmm(alpha, a, x, beta, y, info, trans) - implicit none - class(psb_d_oacc_hll_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta - real(psb_dpk_), intent(in) :: x(:,:) - real(psb_dpk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - - character :: trans_ - integer(psb_ipk_) :: i, j, m, n, k, nxy, nhacks - logical :: tra - integer(psb_ipk_) :: err_act - character(len=20) :: name = 'd_oacc_hll_csmm' - logical, parameter :: debug = .false. - - info = psb_success_ - call psb_erractionsave(err_act) - - if (present(trans)) then - trans_ = trans - else - trans_ = 'N' - end if - - if (.not.a%is_asb()) then - info = psb_err_invalid_mat_state_ - call psb_errpush(info, name) - goto 9999 - endif - tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') - - if (tra) then - m = a%get_ncols() - n = a%get_nrows() - else - n = a%get_ncols() - m = a%get_nrows() - end if - - if (size(x,1) < n) then - info = 36 - call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) - goto 9999 - end if - - if (size(y,1) < m) then - info = 36 - call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) - goto 9999 - end if - - if (tra) then - call a%psb_d_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans) - else - nxy = min(size(x,2), size(y,2)) - nhacks = (a%get_nrows() + a%hksz - 1) / a%hksz - - !$acc parallel loop collapse(2) present(a, x, y) - do j = 1, nxy - do i = 1, m - y(i,j) = beta * y(i,j) - end do - end do - - !$acc parallel loop present(a, x, y) - do j = 1, nxy - do k = 1, nhacks - do i = a%hkoffs(k), a%hkoffs(k + 1) - 1 - y(a%irn(i), j) = y(a%irn(i), j) + alpha * a%val(i) * x(a%ja(i), j) - end do - end do - end do - endif - - call psb_erractionrestore(err_act) - return - -9999 call psb_error_handler(err_act) - return - - end subroutine psb_d_oacc_hll_csmm -end submodule psb_d_oacc_hll_csmm_impl diff --git a/openacc/impl/psb_d_oacc_hll_csmv.F90 b/openacc/impl/psb_d_oacc_hll_csmv.F90 deleted file mode 100644 index d38fca61..00000000 --- a/openacc/impl/psb_d_oacc_hll_csmv.F90 +++ /dev/null @@ -1,84 +0,0 @@ -submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_csmv_impl - use psb_base_mod -contains - module subroutine psb_d_oacc_hll_csmv(alpha, a, x, beta, y, info, trans) - implicit none - class(psb_d_oacc_hll_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta - real(psb_dpk_), intent(in) :: x(:) - real(psb_dpk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - - character :: trans_ - integer(psb_ipk_) :: i, j, m, n, hksz, nhacks - logical :: tra - integer(psb_ipk_) :: err_act - character(len=20) :: name = 'd_oacc_hll_csmv' - logical, parameter :: debug = .false. - - call psb_erractionsave(err_act) - info = psb_success_ - - if (present(trans)) then - trans_ = trans - else - trans_ = 'N' - end if - - if (.not.a%is_asb()) then - info = psb_err_invalid_mat_state_ - call psb_errpush(info, name) - goto 9999 - endif - - tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') - - if (tra) then - m = a%get_ncols() - n = a%get_nrows() - else - n = a%get_ncols() - m = a%get_nrows() - end if - - if (size(x,1) < n) then - info = 36 - call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) - goto 9999 - end if - - if (size(y,1) < m) then - info = 36 - call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) - goto 9999 - end if - - if (tra) then - call a%psb_d_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans) - else - hksz = a%hksz - nhacks = (a%get_nrows() + hksz - 1) / hksz - - !$acc parallel loop present(a, x, y) - do i = 1, m - y(i) = beta * y(i) - end do - ! This loop nest cannot be run with collapse, since - ! the inner loop extent varies. - !$acc parallel loop present(a, x, y) - do i = 1, nhacks - do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 - y(a%irn(j)) = y(a%irn(j)) + alpha * a%val(j) * x(a%ja(j)) - end do - end do - endif - - call psb_erractionrestore(err_act) - return - -9999 call psb_error_handler(err_act) - return - - end subroutine psb_d_oacc_hll_csmv -end submodule psb_d_oacc_hll_csmv_impl diff --git a/openacc/impl/psb_s_oacc_csr_csmm.F90 b/openacc/impl/psb_s_oacc_csr_csmm.F90 deleted file mode 100644 index bb8283bf..00000000 --- a/openacc/impl/psb_s_oacc_csr_csmm.F90 +++ /dev/null @@ -1,86 +0,0 @@ -submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_csmm_impl - use psb_base_mod -contains - module subroutine psb_s_oacc_csr_csmm(alpha, a, x, beta, y, info, trans) - implicit none - class(psb_s_oacc_csr_sparse_mat), intent(in) :: a - real(psb_spk_), intent(in) :: alpha, beta - real(psb_spk_), intent(in) :: x(:,:) - real(psb_spk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - - character :: trans_ - integer(psb_ipk_) :: i, j, m, n,k, nxy - logical :: tra - integer(psb_ipk_) :: err_act - character(len=20) :: name = 's_oacc_csmm' - logical, parameter :: debug = .false. - - info = psb_success_ - call psb_erractionsave(err_act) - - if (present(trans)) then - trans_ = trans - else - trans_ = 'N' - end if - - if (.not.a%is_asb()) then - info = psb_err_invalid_mat_state_ - call psb_errpush(info, name) - goto 9999 - endif - tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') - - if (tra) then - m = a%get_ncols() - n = a%get_nrows() - else - n = a%get_ncols() - m = a%get_nrows() - end if - - if (size(x,1) < n) then - info = 36 - call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) - goto 9999 - end if - - if (size(y,1) < m) then - info = 36 - call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) - goto 9999 - end if - - if (tra) then - call a%psb_s_csr_sparse_mat%spmm(alpha, x, beta, y, info, trans) - else - nxy = min(size(x,2), size(y,2)) - - !$acc parallel loop collapse(2) present(a, x, y) - do j = 1, nxy - do i = 1, m - y(i,j) = beta * y(i,j) - end do - end do - - !$acc parallel loop collapse(2) present(a, x, y) - do j = 1, nxy - do i = 1, n - do k = a%irp(i), a%irp(i+1) - 1 - y(a%ja(k), j) = y(a%ja(k), j) + alpha * a%val(k) * x(i, j) - end do - end do - end do - endif - - call psb_erractionrestore(err_act) - return - -9999 call psb_error_handler(err_act) - return - - end subroutine psb_s_oacc_csr_csmm -end submodule psb_s_oacc_csr_csmm_impl - diff --git a/openacc/impl/psb_s_oacc_csr_csmv.F90 b/openacc/impl/psb_s_oacc_csr_csmv.F90 deleted file mode 100644 index c224dc0e..00000000 --- a/openacc/impl/psb_s_oacc_csr_csmv.F90 +++ /dev/null @@ -1,81 +0,0 @@ -submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_csmv_impl - use psb_base_mod -contains - module subroutine psb_s_oacc_csr_csmv(alpha, a, x, beta, y, info, trans) - implicit none - class(psb_s_oacc_csr_sparse_mat), intent(in) :: a - real(psb_spk_), intent(in) :: alpha, beta - real(psb_spk_), intent(in) :: x(:) - real(psb_spk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - - character :: trans_ - integer(psb_ipk_) :: i, j, m, n - logical :: tra - integer(psb_ipk_) :: err_act - character(len=20) :: name = 's_oacc_csmv' - logical, parameter :: debug = .false. - - call psb_erractionsave(err_act) - info = psb_success_ - - if (present(trans)) then - trans_ = trans - else - trans_ = 'N' - end if - - if (.not.a%is_asb()) then - info = psb_err_invalid_mat_state_ - call psb_errpush(info, name) - goto 9999 - endif - - tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') - - if (tra) then - m = a%get_ncols() - n = a%get_nrows() - else - n = a%get_ncols() - m = a%get_nrows() - end if - - if (size(x,1) < n) then - info = 36 - call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) - goto 9999 - end if - - if (size(y,1) < m) then - info = 36 - call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) - goto 9999 - end if - - if (tra) then - call a%psb_s_csr_sparse_mat%spmm(alpha, x, beta, y, info, trans) - else - !$acc parallel loop present(a, x, y) - do i = 1, m - y(i) = beta * y(i) - end do - - !$acc parallel loop present(a, x, y) - do i = 1, n - do j = a%irp(i), a%irp(i+1) - 1 - y(a%ja(j)) = y(a%ja(j)) + alpha * a%val(j) * x(i) - end do - end do - endif - - call psb_erractionrestore(err_act) - return - -9999 call psb_error_handler(err_act) - return - - end subroutine psb_s_oacc_csr_csmv -end submodule psb_s_oacc_csr_csmv_impl - diff --git a/openacc/impl/psb_s_oacc_ell_csmm.F90 b/openacc/impl/psb_s_oacc_ell_csmm.F90 deleted file mode 100644 index 63219384..00000000 --- a/openacc/impl/psb_s_oacc_ell_csmm.F90 +++ /dev/null @@ -1,86 +0,0 @@ -submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_csmm_impl - use psb_base_mod -contains - module subroutine psb_s_oacc_ell_csmm(alpha, a, x, beta, y, info, trans) - implicit none - class(psb_s_oacc_ell_sparse_mat), intent(in) :: a - real(psb_spk_), intent(in) :: alpha, beta - real(psb_spk_), intent(in) :: x(:,:) - real(psb_spk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - - character :: trans_ - integer(psb_ipk_) :: i, j, m, n, k, nxy, nzt - logical :: tra - integer(psb_ipk_) :: err_act - character(len=20) :: name = 's_oacc_ell_csmm' - logical, parameter :: debug = .false. - - info = psb_success_ - call psb_erractionsave(err_act) - - if (present(trans)) then - trans_ = trans - else - trans_ = 'N' - end if - - if (.not.a%is_asb()) then - info = psb_err_invalid_mat_state_ - call psb_errpush(info, name) - goto 9999 - endif - tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') - - if (tra) then - m = a%get_ncols() - n = a%get_nrows() - else - n = a%get_ncols() - m = a%get_nrows() - end if - - if (size(x,1) < n) then - info = 36 - call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) - goto 9999 - end if - - if (size(y,1) < m) then - info = 36 - call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) - goto 9999 - end if - - if (tra) then - call a%psb_s_ell_sparse_mat%spmm(alpha, x, beta, y, info, trans) - else - nxy = min(size(x,2), size(y,2)) - nzt = a%nzt - - !$acc parallel loop collapse(2) present(a, x, y) - do j = 1, nxy - do i = 1, m - y(i,j) = beta * y(i,j) - end do - end do - - !$acc parallel loop collapse(2) present(a, x, y) - do j = 1, nxy - do i = 1, n - do k = 1, nzt - y(i, j) = y(i, j) + alpha * a%val(i, k) * x(a%ja(i, k), j) - end do - end do - end do - endif - - call psb_erractionrestore(err_act) - return - -9999 call psb_error_handler(err_act) - return - - end subroutine psb_s_oacc_ell_csmm -end submodule psb_s_oacc_ell_csmm_impl diff --git a/openacc/impl/psb_s_oacc_ell_csmv.F90 b/openacc/impl/psb_s_oacc_ell_csmv.F90 deleted file mode 100644 index d4aaa9d4..00000000 --- a/openacc/impl/psb_s_oacc_ell_csmv.F90 +++ /dev/null @@ -1,82 +0,0 @@ -submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_csmv_impl - use psb_base_mod -contains - module subroutine psb_s_oacc_ell_csmv(alpha, a, x, beta, y, info, trans) - implicit none - class(psb_s_oacc_ell_sparse_mat), intent(in) :: a - real(psb_spk_), intent(in) :: alpha, beta - real(psb_spk_), intent(in) :: x(:) - real(psb_spk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - - character :: trans_ - integer(psb_ipk_) :: i, j, m, n, nzt - logical :: tra - integer(psb_ipk_) :: err_act - character(len=20) :: name = 's_oacc_ell_csmv' - logical, parameter :: debug = .false. - - call psb_erractionsave(err_act) - info = psb_success_ - - if (present(trans)) then - trans_ = trans - else - trans_ = 'N' - end if - - if (.not.a%is_asb()) then - info = psb_err_invalid_mat_state_ - call psb_errpush(info, name) - goto 9999 - endif - - tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') - - if (tra) then - m = a%get_ncols() - n = a%get_nrows() - else - n = a%get_ncols() - m = a%get_nrows() - end if - - if (size(x,1) < n) then - info = 36 - call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) - goto 9999 - end if - - if (size(y,1) < m) then - info = 36 - call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) - goto 9999 - end if - - if (tra) then - call a%psb_s_ell_sparse_mat%spmm(alpha, x, beta, y, info, trans) - else - nzt = a%nzt - - !$acc parallel loop present(a, x, y) - do i = 1, m - y(i) = beta * y(i) - end do - - !$acc parallel loop present(a, x, y) - do i = 1, m - do j = 1, nzt - y(i) = y(i) + alpha * a%val(i, j) * x(a%ja(i, j)) - end do - end do - endif - - call psb_erractionrestore(err_act) - return - -9999 call psb_error_handler(err_act) - return - - end subroutine psb_s_oacc_ell_csmv -end submodule psb_s_oacc_ell_csmv_impl diff --git a/openacc/impl/psb_s_oacc_hll_csmm.F90 b/openacc/impl/psb_s_oacc_hll_csmm.F90 deleted file mode 100644 index 803071eb..00000000 --- a/openacc/impl/psb_s_oacc_hll_csmm.F90 +++ /dev/null @@ -1,86 +0,0 @@ -submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_csmm_impl - use psb_base_mod -contains - module subroutine psb_s_oacc_hll_csmm(alpha, a, x, beta, y, info, trans) - implicit none - class(psb_s_oacc_hll_sparse_mat), intent(in) :: a - real(psb_spk_), intent(in) :: alpha, beta - real(psb_spk_), intent(in) :: x(:,:) - real(psb_spk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - - character :: trans_ - integer(psb_ipk_) :: i, j, m, n, k, nxy, nhacks - logical :: tra - integer(psb_ipk_) :: err_act - character(len=20) :: name = 's_oacc_hll_csmm' - logical, parameter :: debug = .false. - - info = psb_success_ - call psb_erractionsave(err_act) - - if (present(trans)) then - trans_ = trans - else - trans_ = 'N' - end if - - if (.not.a%is_asb()) then - info = psb_err_invalid_mat_state_ - call psb_errpush(info, name) - goto 9999 - endif - tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') - - if (tra) then - m = a%get_ncols() - n = a%get_nrows() - else - n = a%get_ncols() - m = a%get_nrows() - end if - - if (size(x,1) < n) then - info = 36 - call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) - goto 9999 - end if - - if (size(y,1) < m) then - info = 36 - call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) - goto 9999 - end if - - if (tra) then - call a%psb_s_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans) - else - nxy = min(size(x,2), size(y,2)) - nhacks = (a%get_nrows() + a%hksz - 1) / a%hksz - - !$acc parallel loop collapse(2) present(a, x, y) - do j = 1, nxy - do i = 1, m - y(i,j) = beta * y(i,j) - end do - end do - - !$acc parallel loop present(a, x, y) - do j = 1, nxy - do k = 1, nhacks - do i = a%hkoffs(k), a%hkoffs(k + 1) - 1 - y(a%irn(i), j) = y(a%irn(i), j) + alpha * a%val(i) * x(a%ja(i), j) - end do - end do - end do - endif - - call psb_erractionrestore(err_act) - return - -9999 call psb_error_handler(err_act) - return - - end subroutine psb_s_oacc_hll_csmm -end submodule psb_s_oacc_hll_csmm_impl diff --git a/openacc/impl/psb_s_oacc_hll_csmv.F90 b/openacc/impl/psb_s_oacc_hll_csmv.F90 deleted file mode 100644 index b3c0cae8..00000000 --- a/openacc/impl/psb_s_oacc_hll_csmv.F90 +++ /dev/null @@ -1,84 +0,0 @@ -submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_csmv_impl - use psb_base_mod -contains - module subroutine psb_s_oacc_hll_csmv(alpha, a, x, beta, y, info, trans) - implicit none - class(psb_s_oacc_hll_sparse_mat), intent(in) :: a - real(psb_spk_), intent(in) :: alpha, beta - real(psb_spk_), intent(in) :: x(:) - real(psb_spk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - - character :: trans_ - integer(psb_ipk_) :: i, j, m, n, hksz, nhacks - logical :: tra - integer(psb_ipk_) :: err_act - character(len=20) :: name = 's_oacc_hll_csmv' - logical, parameter :: debug = .false. - - call psb_erractionsave(err_act) - info = psb_success_ - - if (present(trans)) then - trans_ = trans - else - trans_ = 'N' - end if - - if (.not.a%is_asb()) then - info = psb_err_invalid_mat_state_ - call psb_errpush(info, name) - goto 9999 - endif - - tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') - - if (tra) then - m = a%get_ncols() - n = a%get_nrows() - else - n = a%get_ncols() - m = a%get_nrows() - end if - - if (size(x,1) < n) then - info = 36 - call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) - goto 9999 - end if - - if (size(y,1) < m) then - info = 36 - call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) - goto 9999 - end if - - if (tra) then - call a%psb_s_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans) - else - hksz = a%hksz - nhacks = (a%get_nrows() + hksz - 1) / hksz - - !$acc parallel loop present(a, x, y) - do i = 1, m - y(i) = beta * y(i) - end do - ! This loop nest cannot be run with collapse, since - ! the inner loop extent varies. - !$acc parallel loop present(a, x, y) - do i = 1, nhacks - do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 - y(a%irn(j)) = y(a%irn(j)) + alpha * a%val(j) * x(a%ja(j)) - end do - end do - endif - - call psb_erractionrestore(err_act) - return - -9999 call psb_error_handler(err_act) - return - - end subroutine psb_s_oacc_hll_csmv -end submodule psb_s_oacc_hll_csmv_impl diff --git a/openacc/impl/psb_z_oacc_csr_csmm.F90 b/openacc/impl/psb_z_oacc_csr_csmm.F90 deleted file mode 100644 index 97a38deb..00000000 --- a/openacc/impl/psb_z_oacc_csr_csmm.F90 +++ /dev/null @@ -1,86 +0,0 @@ -submodule (psb_z_oacc_csr_mat_mod) psb_z_oacc_csr_csmm_impl - use psb_base_mod -contains - module subroutine psb_z_oacc_csr_csmm(alpha, a, x, beta, y, info, trans) - implicit none - class(psb_z_oacc_csr_sparse_mat), intent(in) :: a - complex(psb_dpk_), intent(in) :: alpha, beta - complex(psb_dpk_), intent(in) :: x(:,:) - complex(psb_dpk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - - character :: trans_ - integer(psb_ipk_) :: i, j, m, n,k, nxy - logical :: tra - integer(psb_ipk_) :: err_act - character(len=20) :: name = 'z_oacc_csmm' - logical, parameter :: debug = .false. - - info = psb_success_ - call psb_erractionsave(err_act) - - if (present(trans)) then - trans_ = trans - else - trans_ = 'N' - end if - - if (.not.a%is_asb()) then - info = psb_err_invalid_mat_state_ - call psb_errpush(info, name) - goto 9999 - endif - tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') - - if (tra) then - m = a%get_ncols() - n = a%get_nrows() - else - n = a%get_ncols() - m = a%get_nrows() - end if - - if (size(x,1) < n) then - info = 36 - call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) - goto 9999 - end if - - if (size(y,1) < m) then - info = 36 - call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) - goto 9999 - end if - - if (tra) then - call a%psb_z_csr_sparse_mat%spmm(alpha, x, beta, y, info, trans) - else - nxy = min(size(x,2), size(y,2)) - - !$acc parallel loop collapse(2) present(a, x, y) - do j = 1, nxy - do i = 1, m - y(i,j) = beta * y(i,j) - end do - end do - - !$acc parallel loop collapse(2) present(a, x, y) - do j = 1, nxy - do i = 1, n - do k = a%irp(i), a%irp(i+1) - 1 - y(a%ja(k), j) = y(a%ja(k), j) + alpha * a%val(k) * x(i, j) - end do - end do - end do - endif - - call psb_erractionrestore(err_act) - return - -9999 call psb_error_handler(err_act) - return - - end subroutine psb_z_oacc_csr_csmm -end submodule psb_z_oacc_csr_csmm_impl - diff --git a/openacc/impl/psb_z_oacc_csr_csmv.F90 b/openacc/impl/psb_z_oacc_csr_csmv.F90 deleted file mode 100644 index 8def3c76..00000000 --- a/openacc/impl/psb_z_oacc_csr_csmv.F90 +++ /dev/null @@ -1,81 +0,0 @@ -submodule (psb_z_oacc_csr_mat_mod) psb_z_oacc_csr_csmv_impl - use psb_base_mod -contains - module subroutine psb_z_oacc_csr_csmv(alpha, a, x, beta, y, info, trans) - implicit none - class(psb_z_oacc_csr_sparse_mat), intent(in) :: a - complex(psb_dpk_), intent(in) :: alpha, beta - complex(psb_dpk_), intent(in) :: x(:) - complex(psb_dpk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - - character :: trans_ - integer(psb_ipk_) :: i, j, m, n - logical :: tra - integer(psb_ipk_) :: err_act - character(len=20) :: name = 'z_oacc_csmv' - logical, parameter :: debug = .false. - - call psb_erractionsave(err_act) - info = psb_success_ - - if (present(trans)) then - trans_ = trans - else - trans_ = 'N' - end if - - if (.not.a%is_asb()) then - info = psb_err_invalid_mat_state_ - call psb_errpush(info, name) - goto 9999 - endif - - tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') - - if (tra) then - m = a%get_ncols() - n = a%get_nrows() - else - n = a%get_ncols() - m = a%get_nrows() - end if - - if (size(x,1) < n) then - info = 36 - call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) - goto 9999 - end if - - if (size(y,1) < m) then - info = 36 - call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) - goto 9999 - end if - - if (tra) then - call a%psb_z_csr_sparse_mat%spmm(alpha, x, beta, y, info, trans) - else - !$acc parallel loop present(a, x, y) - do i = 1, m - y(i) = beta * y(i) - end do - - !$acc parallel loop present(a, x, y) - do i = 1, n - do j = a%irp(i), a%irp(i+1) - 1 - y(a%ja(j)) = y(a%ja(j)) + alpha * a%val(j) * x(i) - end do - end do - endif - - call psb_erractionrestore(err_act) - return - -9999 call psb_error_handler(err_act) - return - - end subroutine psb_z_oacc_csr_csmv -end submodule psb_z_oacc_csr_csmv_impl - diff --git a/openacc/impl/psb_z_oacc_ell_csmm.F90 b/openacc/impl/psb_z_oacc_ell_csmm.F90 deleted file mode 100644 index 406ca8c5..00000000 --- a/openacc/impl/psb_z_oacc_ell_csmm.F90 +++ /dev/null @@ -1,86 +0,0 @@ -submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_csmm_impl - use psb_base_mod -contains - module subroutine psb_z_oacc_ell_csmm(alpha, a, x, beta, y, info, trans) - implicit none - class(psb_z_oacc_ell_sparse_mat), intent(in) :: a - complex(psb_dpk_), intent(in) :: alpha, beta - complex(psb_dpk_), intent(in) :: x(:,:) - complex(psb_dpk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - - character :: trans_ - integer(psb_ipk_) :: i, j, m, n, k, nxy, nzt - logical :: tra - integer(psb_ipk_) :: err_act - character(len=20) :: name = 'z_oacc_ell_csmm' - logical, parameter :: debug = .false. - - info = psb_success_ - call psb_erractionsave(err_act) - - if (present(trans)) then - trans_ = trans - else - trans_ = 'N' - end if - - if (.not.a%is_asb()) then - info = psb_err_invalid_mat_state_ - call psb_errpush(info, name) - goto 9999 - endif - tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') - - if (tra) then - m = a%get_ncols() - n = a%get_nrows() - else - n = a%get_ncols() - m = a%get_nrows() - end if - - if (size(x,1) < n) then - info = 36 - call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) - goto 9999 - end if - - if (size(y,1) < m) then - info = 36 - call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) - goto 9999 - end if - - if (tra) then - call a%psb_z_ell_sparse_mat%spmm(alpha, x, beta, y, info, trans) - else - nxy = min(size(x,2), size(y,2)) - nzt = a%nzt - - !$acc parallel loop collapse(2) present(a, x, y) - do j = 1, nxy - do i = 1, m - y(i,j) = beta * y(i,j) - end do - end do - - !$acc parallel loop collapse(2) present(a, x, y) - do j = 1, nxy - do i = 1, n - do k = 1, nzt - y(i, j) = y(i, j) + alpha * a%val(i, k) * x(a%ja(i, k), j) - end do - end do - end do - endif - - call psb_erractionrestore(err_act) - return - -9999 call psb_error_handler(err_act) - return - - end subroutine psb_z_oacc_ell_csmm -end submodule psb_z_oacc_ell_csmm_impl diff --git a/openacc/impl/psb_z_oacc_ell_csmv.F90 b/openacc/impl/psb_z_oacc_ell_csmv.F90 deleted file mode 100644 index 502dd4f1..00000000 --- a/openacc/impl/psb_z_oacc_ell_csmv.F90 +++ /dev/null @@ -1,82 +0,0 @@ -submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_csmv_impl - use psb_base_mod -contains - module subroutine psb_z_oacc_ell_csmv(alpha, a, x, beta, y, info, trans) - implicit none - class(psb_z_oacc_ell_sparse_mat), intent(in) :: a - complex(psb_dpk_), intent(in) :: alpha, beta - complex(psb_dpk_), intent(in) :: x(:) - complex(psb_dpk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - - character :: trans_ - integer(psb_ipk_) :: i, j, m, n, nzt - logical :: tra - integer(psb_ipk_) :: err_act - character(len=20) :: name = 'z_oacc_ell_csmv' - logical, parameter :: debug = .false. - - call psb_erractionsave(err_act) - info = psb_success_ - - if (present(trans)) then - trans_ = trans - else - trans_ = 'N' - end if - - if (.not.a%is_asb()) then - info = psb_err_invalid_mat_state_ - call psb_errpush(info, name) - goto 9999 - endif - - tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') - - if (tra) then - m = a%get_ncols() - n = a%get_nrows() - else - n = a%get_ncols() - m = a%get_nrows() - end if - - if (size(x,1) < n) then - info = 36 - call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) - goto 9999 - end if - - if (size(y,1) < m) then - info = 36 - call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) - goto 9999 - end if - - if (tra) then - call a%psb_z_ell_sparse_mat%spmm(alpha, x, beta, y, info, trans) - else - nzt = a%nzt - - !$acc parallel loop present(a, x, y) - do i = 1, m - y(i) = beta * y(i) - end do - - !$acc parallel loop present(a, x, y) - do i = 1, m - do j = 1, nzt - y(i) = y(i) + alpha * a%val(i, j) * x(a%ja(i, j)) - end do - end do - endif - - call psb_erractionrestore(err_act) - return - -9999 call psb_error_handler(err_act) - return - - end subroutine psb_z_oacc_ell_csmv -end submodule psb_z_oacc_ell_csmv_impl diff --git a/openacc/impl/psb_z_oacc_hll_csmm.F90 b/openacc/impl/psb_z_oacc_hll_csmm.F90 deleted file mode 100644 index 3cfe5b32..00000000 --- a/openacc/impl/psb_z_oacc_hll_csmm.F90 +++ /dev/null @@ -1,86 +0,0 @@ -submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_csmm_impl - use psb_base_mod -contains - module subroutine psb_z_oacc_hll_csmm(alpha, a, x, beta, y, info, trans) - implicit none - class(psb_z_oacc_hll_sparse_mat), intent(in) :: a - complex(psb_dpk_), intent(in) :: alpha, beta - complex(psb_dpk_), intent(in) :: x(:,:) - complex(psb_dpk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - - character :: trans_ - integer(psb_ipk_) :: i, j, m, n, k, nxy, nhacks - logical :: tra - integer(psb_ipk_) :: err_act - character(len=20) :: name = 'z_oacc_hll_csmm' - logical, parameter :: debug = .false. - - info = psb_success_ - call psb_erractionsave(err_act) - - if (present(trans)) then - trans_ = trans - else - trans_ = 'N' - end if - - if (.not.a%is_asb()) then - info = psb_err_invalid_mat_state_ - call psb_errpush(info, name) - goto 9999 - endif - tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') - - if (tra) then - m = a%get_ncols() - n = a%get_nrows() - else - n = a%get_ncols() - m = a%get_nrows() - end if - - if (size(x,1) < n) then - info = 36 - call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) - goto 9999 - end if - - if (size(y,1) < m) then - info = 36 - call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) - goto 9999 - end if - - if (tra) then - call a%psb_z_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans) - else - nxy = min(size(x,2), size(y,2)) - nhacks = (a%get_nrows() + a%hksz - 1) / a%hksz - - !$acc parallel loop collapse(2) present(a, x, y) - do j = 1, nxy - do i = 1, m - y(i,j) = beta * y(i,j) - end do - end do - - !$acc parallel loop present(a, x, y) - do j = 1, nxy - do k = 1, nhacks - do i = a%hkoffs(k), a%hkoffs(k + 1) - 1 - y(a%irn(i), j) = y(a%irn(i), j) + alpha * a%val(i) * x(a%ja(i), j) - end do - end do - end do - endif - - call psb_erractionrestore(err_act) - return - -9999 call psb_error_handler(err_act) - return - - end subroutine psb_z_oacc_hll_csmm -end submodule psb_z_oacc_hll_csmm_impl diff --git a/openacc/impl/psb_z_oacc_hll_csmv.F90 b/openacc/impl/psb_z_oacc_hll_csmv.F90 deleted file mode 100644 index 923bc061..00000000 --- a/openacc/impl/psb_z_oacc_hll_csmv.F90 +++ /dev/null @@ -1,84 +0,0 @@ -submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_csmv_impl - use psb_base_mod -contains - module subroutine psb_z_oacc_hll_csmv(alpha, a, x, beta, y, info, trans) - implicit none - class(psb_z_oacc_hll_sparse_mat), intent(in) :: a - complex(psb_dpk_), intent(in) :: alpha, beta - complex(psb_dpk_), intent(in) :: x(:) - complex(psb_dpk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - - character :: trans_ - integer(psb_ipk_) :: i, j, m, n, hksz, nhacks - logical :: tra - integer(psb_ipk_) :: err_act - character(len=20) :: name = 'z_oacc_hll_csmv' - logical, parameter :: debug = .false. - - call psb_erractionsave(err_act) - info = psb_success_ - - if (present(trans)) then - trans_ = trans - else - trans_ = 'N' - end if - - if (.not.a%is_asb()) then - info = psb_err_invalid_mat_state_ - call psb_errpush(info, name) - goto 9999 - endif - - tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') - - if (tra) then - m = a%get_ncols() - n = a%get_nrows() - else - n = a%get_ncols() - m = a%get_nrows() - end if - - if (size(x,1) < n) then - info = 36 - call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) - goto 9999 - end if - - if (size(y,1) < m) then - info = 36 - call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) - goto 9999 - end if - - if (tra) then - call a%psb_z_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans) - else - hksz = a%hksz - nhacks = (a%get_nrows() + hksz - 1) / hksz - - !$acc parallel loop present(a, x, y) - do i = 1, m - y(i) = beta * y(i) - end do - ! This loop nest cannot be run with collapse, since - ! the inner loop extent varies. - !$acc parallel loop present(a, x, y) - do i = 1, nhacks - do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 - y(a%irn(j)) = y(a%irn(j)) + alpha * a%val(j) * x(a%ja(j)) - end do - end do - endif - - call psb_erractionrestore(err_act) - return - -9999 call psb_error_handler(err_act) - return - - end subroutine psb_z_oacc_hll_csmv -end submodule psb_z_oacc_hll_csmv_impl From bcbe0c89c758086725433b91bdaed38a7868db32 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 10 Sep 2024 17:37:35 +0200 Subject: [PATCH 37/39] Backporting fixes from version 4 --- openacc/Makefile | 7 +- openacc/impl/psb_c_oacc_csr_vect_mv.F90 | 2 +- openacc/impl/psb_c_oacc_ell_vect_mv.F90 | 2 +- openacc/impl/psb_c_oacc_hll_vect_mv.F90 | 2 +- openacc/impl/psb_d_oacc_csr_vect_mv.F90 | 2 +- openacc/impl/psb_d_oacc_ell_vect_mv.F90 | 2 +- openacc/impl/psb_d_oacc_hll_vect_mv.F90 | 2 +- openacc/impl/psb_s_oacc_csr_vect_mv.F90 | 2 +- openacc/impl/psb_s_oacc_ell_vect_mv.F90 | 2 +- openacc/impl/psb_s_oacc_hll_vect_mv.F90 | 2 +- openacc/impl/psb_z_oacc_csr_vect_mv.F90 | 2 +- openacc/impl/psb_z_oacc_ell_vect_mv.F90 | 2 +- openacc/impl/psb_z_oacc_hll_vect_mv.F90 | 2 +- openacc/psb_c_oacc_vect_mod.F90 | 231 ++++++++++++++++++------ openacc/psb_d_oacc_vect_mod.F90 | 231 ++++++++++++++++++------ openacc/psb_i_oacc_vect_mod.F90 | 213 ++++++++++++++++------ openacc/psb_l_oacc_vect_mod.F90 | 213 ++++++++++++++++------ openacc/psb_oacc_env_mod.F90 | 33 ++-- openacc/psb_s_oacc_vect_mod.F90 | 231 ++++++++++++++++++------ openacc/psb_z_oacc_vect_mod.F90 | 231 ++++++++++++++++++------ 20 files changed, 1048 insertions(+), 366 deletions(-) diff --git a/openacc/Makefile b/openacc/Makefile index cdcc9f71..f60a810d 100644 --- a/openacc/Makefile +++ b/openacc/Makefile @@ -58,9 +58,10 @@ psb_oacc_mod.o : psb_i_oacc_vect_mod.o psb_l_oacc_vect_mod.o \ psb_z_oacc_ell_mat_mod.o psb_z_oacc_hll_mat_mod.o \ psb_oacc_env_mod.o -psb_s_oacc_vect_mod.o psb_d_oacc_vect_mod.o\ - psb_c_oacc_vect_mod.o psb_z_oacc_vect_mod.o: psb_i_oacc_vect_mod.o psb_l_oacc_vect_mod.o -psb_l_oacc_vect_mod.o: psb_i_oacc_vect_mod.o +psb_s_oacc_vect_mod.o psb_d_oacc_vect_mod.o \ + psb_c_oacc_vect_mod.o psb_z_oacc_vect_mod.o: psb_i_oacc_vect_mod.o psb_l_oacc_vect_mod.o psb_oacc_env_mod.o +psb_l_oacc_vect_mod.o: psb_i_oacc_vect_mod.o psb_oacc_env_mod.o +psb_i_oacc_vect_mod.o: psb_oacc_env_mod.o psb_s_oacc_csr_mat_mod.o psb_s_oacc_ell_mat_mod.o psb_s_oacc_hll_mat_mod.o: psb_s_oacc_vect_mod.o diff --git a/openacc/impl/psb_c_oacc_csr_vect_mv.F90 b/openacc/impl/psb_c_oacc_csr_vect_mv.F90 index 3c6f6494..c1030094 100644 --- a/openacc/impl/psb_c_oacc_csr_vect_mv.F90 +++ b/openacc/impl/psb_c_oacc_csr_vect_mv.F90 @@ -18,7 +18,7 @@ contains m = a%get_nrows() n = a%get_ncols() - if ((n /= size(x%v)) .or. (m /= size(y%v))) then + if ((n > size(x%v)) .or. (m > size(y%v))) then write(0,*) 'ocsrmv Size error ', m, n, size(x%v), size(y%v) info = psb_err_invalid_mat_state_ return diff --git a/openacc/impl/psb_c_oacc_ell_vect_mv.F90 b/openacc/impl/psb_c_oacc_ell_vect_mv.F90 index 8113297b..7a39c031 100644 --- a/openacc/impl/psb_c_oacc_ell_vect_mv.F90 +++ b/openacc/impl/psb_c_oacc_ell_vect_mv.F90 @@ -19,7 +19,7 @@ contains n = a%get_ncols() nzt = a%nzt nc = size(a%ja,2) - if ((n /= size(x%v)) .or. (m /= size(y%v))) then + if ((n > size(x%v)) .or. (m > size(y%v))) then write(0,*) 'oellmv Size error ', m, n, size(x%v), size(y%v) info = psb_err_invalid_mat_state_ return diff --git a/openacc/impl/psb_c_oacc_hll_vect_mv.F90 b/openacc/impl/psb_c_oacc_hll_vect_mv.F90 index 551b1a29..494ed149 100644 --- a/openacc/impl/psb_c_oacc_hll_vect_mv.F90 +++ b/openacc/impl/psb_c_oacc_hll_vect_mv.F90 @@ -20,7 +20,7 @@ contains nhacks = size(a%hkoffs) - 1 hksz = a%hksz - if ((n /= size(x%v)) .or. (m /= size(y%v))) then + if ((n > size(x%v)) .or. (m > size(y%v))) then write(0,*) 'Size error ', m, n, size(x%v), size(y%v) info = psb_err_invalid_mat_state_ return diff --git a/openacc/impl/psb_d_oacc_csr_vect_mv.F90 b/openacc/impl/psb_d_oacc_csr_vect_mv.F90 index 596f2b17..a2efdc3e 100644 --- a/openacc/impl/psb_d_oacc_csr_vect_mv.F90 +++ b/openacc/impl/psb_d_oacc_csr_vect_mv.F90 @@ -18,7 +18,7 @@ contains m = a%get_nrows() n = a%get_ncols() - if ((n /= size(x%v)) .or. (m /= size(y%v))) then + if ((n > size(x%v)) .or. (m > size(y%v))) then write(0,*) 'ocsrmv Size error ', m, n, size(x%v), size(y%v) info = psb_err_invalid_mat_state_ return diff --git a/openacc/impl/psb_d_oacc_ell_vect_mv.F90 b/openacc/impl/psb_d_oacc_ell_vect_mv.F90 index ddd4bfc8..b233669d 100644 --- a/openacc/impl/psb_d_oacc_ell_vect_mv.F90 +++ b/openacc/impl/psb_d_oacc_ell_vect_mv.F90 @@ -19,7 +19,7 @@ contains n = a%get_ncols() nzt = a%nzt nc = size(a%ja,2) - if ((n /= size(x%v)) .or. (m /= size(y%v))) then + if ((n > size(x%v)) .or. (m > size(y%v))) then write(0,*) 'oellmv Size error ', m, n, size(x%v), size(y%v) info = psb_err_invalid_mat_state_ return diff --git a/openacc/impl/psb_d_oacc_hll_vect_mv.F90 b/openacc/impl/psb_d_oacc_hll_vect_mv.F90 index f971d61a..150ade8e 100644 --- a/openacc/impl/psb_d_oacc_hll_vect_mv.F90 +++ b/openacc/impl/psb_d_oacc_hll_vect_mv.F90 @@ -20,7 +20,7 @@ contains nhacks = size(a%hkoffs) - 1 hksz = a%hksz - if ((n /= size(x%v)) .or. (m /= size(y%v))) then + if ((n > size(x%v)) .or. (m > size(y%v))) then write(0,*) 'Size error ', m, n, size(x%v), size(y%v) info = psb_err_invalid_mat_state_ return diff --git a/openacc/impl/psb_s_oacc_csr_vect_mv.F90 b/openacc/impl/psb_s_oacc_csr_vect_mv.F90 index 2799bd05..5d3cc30c 100644 --- a/openacc/impl/psb_s_oacc_csr_vect_mv.F90 +++ b/openacc/impl/psb_s_oacc_csr_vect_mv.F90 @@ -18,7 +18,7 @@ contains m = a%get_nrows() n = a%get_ncols() - if ((n /= size(x%v)) .or. (m /= size(y%v))) then + if ((n > size(x%v)) .or. (m > size(y%v))) then write(0,*) 'ocsrmv Size error ', m, n, size(x%v), size(y%v) info = psb_err_invalid_mat_state_ return diff --git a/openacc/impl/psb_s_oacc_ell_vect_mv.F90 b/openacc/impl/psb_s_oacc_ell_vect_mv.F90 index 81166643..76b1fe5b 100644 --- a/openacc/impl/psb_s_oacc_ell_vect_mv.F90 +++ b/openacc/impl/psb_s_oacc_ell_vect_mv.F90 @@ -19,7 +19,7 @@ contains n = a%get_ncols() nzt = a%nzt nc = size(a%ja,2) - if ((n /= size(x%v)) .or. (m /= size(y%v))) then + if ((n > size(x%v)) .or. (m > size(y%v))) then write(0,*) 'oellmv Size error ', m, n, size(x%v), size(y%v) info = psb_err_invalid_mat_state_ return diff --git a/openacc/impl/psb_s_oacc_hll_vect_mv.F90 b/openacc/impl/psb_s_oacc_hll_vect_mv.F90 index e289f07c..e1d42252 100644 --- a/openacc/impl/psb_s_oacc_hll_vect_mv.F90 +++ b/openacc/impl/psb_s_oacc_hll_vect_mv.F90 @@ -20,7 +20,7 @@ contains nhacks = size(a%hkoffs) - 1 hksz = a%hksz - if ((n /= size(x%v)) .or. (m /= size(y%v))) then + if ((n > size(x%v)) .or. (m > size(y%v))) then write(0,*) 'Size error ', m, n, size(x%v), size(y%v) info = psb_err_invalid_mat_state_ return diff --git a/openacc/impl/psb_z_oacc_csr_vect_mv.F90 b/openacc/impl/psb_z_oacc_csr_vect_mv.F90 index 75cc693b..b312b6b7 100644 --- a/openacc/impl/psb_z_oacc_csr_vect_mv.F90 +++ b/openacc/impl/psb_z_oacc_csr_vect_mv.F90 @@ -18,7 +18,7 @@ contains m = a%get_nrows() n = a%get_ncols() - if ((n /= size(x%v)) .or. (m /= size(y%v))) then + if ((n > size(x%v)) .or. (m > size(y%v))) then write(0,*) 'ocsrmv Size error ', m, n, size(x%v), size(y%v) info = psb_err_invalid_mat_state_ return diff --git a/openacc/impl/psb_z_oacc_ell_vect_mv.F90 b/openacc/impl/psb_z_oacc_ell_vect_mv.F90 index 8d442c1d..53283689 100644 --- a/openacc/impl/psb_z_oacc_ell_vect_mv.F90 +++ b/openacc/impl/psb_z_oacc_ell_vect_mv.F90 @@ -19,7 +19,7 @@ contains n = a%get_ncols() nzt = a%nzt nc = size(a%ja,2) - if ((n /= size(x%v)) .or. (m /= size(y%v))) then + if ((n > size(x%v)) .or. (m > size(y%v))) then write(0,*) 'oellmv Size error ', m, n, size(x%v), size(y%v) info = psb_err_invalid_mat_state_ return diff --git a/openacc/impl/psb_z_oacc_hll_vect_mv.F90 b/openacc/impl/psb_z_oacc_hll_vect_mv.F90 index e373d6ff..350592bc 100644 --- a/openacc/impl/psb_z_oacc_hll_vect_mv.F90 +++ b/openacc/impl/psb_z_oacc_hll_vect_mv.F90 @@ -20,7 +20,7 @@ contains nhacks = size(a%hkoffs) - 1 hksz = a%hksz - if ((n /= size(x%v)) .or. (m /= size(y%v))) then + if ((n > size(x%v)) .or. (m > size(y%v))) then write(0,*) 'Size error ', m, n, size(x%v), size(y%v) info = psb_err_invalid_mat_state_ return diff --git a/openacc/psb_c_oacc_vect_mod.F90 b/openacc/psb_c_oacc_vect_mod.F90 index 95c45646..4e2cca9e 100644 --- a/openacc/psb_c_oacc_vect_mod.F90 +++ b/openacc/psb_c_oacc_vect_mod.F90 @@ -3,6 +3,8 @@ module psb_c_oacc_vect_mod use openacc use psb_const_mod use psb_error_mod + use psb_realloc_mod + use psb_oacc_env_mod use psb_c_vect_mod use psb_i_vect_mod use psb_i_oacc_vect_mod @@ -26,6 +28,8 @@ module psb_c_oacc_vect_mod procedure, pass(x) :: bld_x => c_oacc_bld_x procedure, pass(x) :: bld_mn => c_oacc_bld_mn procedure, pass(x) :: free => c_oacc_vect_free + procedure, pass(x) :: free_buffer => c_oacc_vect_free_buffer + procedure, pass(x) :: maybe_free_buffer => c_oacc_vect_maybe_free_buffer procedure, pass(x) :: ins_a => c_oacc_ins_a procedure, pass(x) :: ins_v => c_oacc_ins_v procedure, pass(x) :: is_host => c_oacc_is_host @@ -36,11 +40,13 @@ module psb_c_oacc_vect_mod procedure, pass(x) :: set_sync => c_oacc_set_sync procedure, pass(x) :: set_scal => c_oacc_set_scal + procedure, pass(x) :: new_buffer => c_oacc_new_buffer procedure, pass(x) :: gthzv_x => c_oacc_gthzv_x - procedure, pass(x) :: gthzbuf_x => c_oacc_gthzbuf + procedure, pass(x) :: gthzbuf => c_oacc_gthzbuf procedure, pass(y) :: sctb => c_oacc_sctb procedure, pass(y) :: sctb_x => c_oacc_sctb_x procedure, pass(y) :: sctb_buf => c_oacc_sctb_buf + procedure, nopass :: device_wait => c_oacc_device_wait procedure, pass(x) :: get_size => c_oacc_get_size @@ -87,6 +93,11 @@ module psb_c_oacc_vect_mod contains + subroutine c_oacc_device_wait() + implicit none + call acc_wait_all() + end subroutine c_oacc_device_wait + subroutine c_oacc_absval1(x) implicit none class(psb_c_vect_oacc), intent(inout) :: x @@ -181,13 +192,17 @@ contains !$acc parallel loop reduction(max:mx) do i = 1, n if (abs(x(i)) > mx) mx = abs(x(i)) - end do - sum = szero - !$acc parallel loop reduction(+:sum) - do i = 1, n - sum = sum + abs(x(i)/mx)**2 end do - res = mx*sqrt(sum) + if (mx == szero) then + res = mx + else + sum = szero + !$acc parallel loop reduction(+:sum) + do i = 1, n + sum = sum + abs(x(i)/mx)**2 + end do + res = mx*sqrt(sum) + end if end function c_inner_oacc_nrm2 end function c_oacc_nrm2 @@ -398,29 +413,44 @@ contains class(psb_i_base_vect_type) :: idx complex(psb_spk_) :: beta class(psb_c_vect_oacc) :: y - integer(psb_ipk_) :: info - + integer(psb_ipk_) :: info, k + logical :: acc_done if (.not.allocated(y%combuf)) then call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') return end if + acc_done = .false. select type(ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() + !$acc update device(y%combuf) + call inner_sctb(n,y%combuf(i:i+n-1),beta,y%v,ii%v(i:i+n-1)) + call y%set_dev() + acc_done = .true. + end select - !$acc parallel loop - do i = 1, n - y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i) + if (.not.acc_done) then + if (idx%is_dev()) call idx%sync() + if (y%is_dev()) call y%sync() + do k = 1, n + y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + y%combuf(k) end do + end if - class default - !$acc parallel loop - do i = 1, n - y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i) + contains + subroutine inner_sctb(n,x,beta,y,idx) + integer(psb_ipk_) :: n, idx(:) + complex(psb_spk_) :: beta,x(:), y(:) + integer(psb_ipk_) :: k + !$acc parallel loop + do k = 1, n + y(idx(k)) = x(k) + beta *y(idx(k)) end do - end select + !$acc end parallel loop + end subroutine inner_sctb + end subroutine c_oacc_sctb_buf subroutine c_oacc_sctb_x(i, n, idx, x, beta, y) @@ -430,24 +460,41 @@ contains class(psb_i_base_vect_type) :: idx complex(psb_spk_) :: beta, x(:) class(psb_c_vect_oacc) :: y - integer(psb_ipk_) :: info, ni + integer(psb_ipk_) :: info, ni, k + logical :: acc_done + acc_done = .false. select type(ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() - class default - call psb_errpush(info, 'c_oacc_sctb_x') - return + if (y%is_host()) call y%sync() + if (acc_is_present(x)) then + call inner_sctb(n,x(i:i+n-1),beta,y%v,idx%v(i:i+n-1)) + acc_done = .true. + call y%set_dev() + end if end select + if (.not.acc_done) then + if (idx%is_dev()) call idx%sync() + if (y%is_dev()) call y%sync() + do k = 1, n + y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + x(k+i-1) + end do + call y%set_host() + end if - if (y%is_host()) call y%sync() - - !$acc parallel loop - do i = 1, n - y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i) - end do - - call y%set_dev() + contains + subroutine inner_sctb(n,x,beta,y,idx) + integer(psb_ipk_) :: n, idx(:) + complex(psb_spk_) :: beta, x(:), y(:) + integer(psb_ipk_) :: k + !$acc parallel loop + do k = 1, n + y(idx(k)) = x(k) + beta *y(idx(k)) + end do + !$acc end parallel loop + end subroutine inner_sctb + end subroutine c_oacc_sctb_x subroutine c_oacc_sctb(n, idx, x, beta, y) @@ -463,7 +510,6 @@ contains if (n == 0) return if (y%is_dev()) call y%sync() - !$acc parallel loop do i = 1, n y%v(idx(i)) = beta * y%v(idx(i)) + x(i) end do @@ -477,30 +523,48 @@ contains integer(psb_ipk_) :: i, n class(psb_i_base_vect_type) :: idx class(psb_c_vect_oacc) :: x - integer(psb_ipk_) :: info + integer(psb_ipk_) :: info,k + logical :: acc_done info = 0 + acc_done = .false. + if (.not.allocated(x%combuf)) then call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') return end if - select type(ii => idx) + select type (ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() - class default - call psb_errpush(info, 'c_oacc_gthzbuf') - return + if (x%is_host()) call x%sync() + call inner_gth(n,x%v,x%combuf(i:i+n-1),ii%v(i:i+n-1)) + acc_done = .true. end select - if (x%is_host()) call x%sync() + if (.not.acc_done) then + if (idx%is_dev()) call idx%sync() + if (x%is_dev()) call x%sync() + do k = 1, n + x%combuf(k+i-1) = x%v(idx%v(k+i-1)) + end do + end if - !$acc parallel loop - do i = 1, n - x%combuf(i) = x%v(idx%v(i)) - end do + contains + subroutine inner_gth(n,x,y,idx) + integer(psb_ipk_) :: n, idx(:) + complex(psb_spk_) :: x(:), y(:) + integer(psb_ipk_) :: k + + !$acc parallel loop present(y) + do k = 1, n + y(k) = x(idx(k)) + end do + !$acc end parallel loop + !$acc update self(y) + end subroutine inner_gth end subroutine c_oacc_gthzbuf - + subroutine c_oacc_gthzv_x(i, n, idx, x, y) use psb_base_mod implicit none @@ -508,24 +572,41 @@ contains class(psb_i_base_vect_type):: idx complex(psb_spk_) :: y(:) class(psb_c_vect_oacc):: x - integer(psb_ipk_) :: info + integer(psb_ipk_) :: info, k + logical :: acc_done info = 0 - - select type(ii => idx) + acc_done = .false. + select type (ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() - class default - call psb_errpush(info, 'c_oacc_gthzv_x') - return + if (x%is_host()) call x%sync() + if (acc_is_present(y)) then + call inner_gth(n,x%v,y(i:),ii%v(i:)) + acc_done=.true. + end if end select - - if (x%is_host()) call x%sync() - - !$acc parallel loop - do i = 1, n - y(i) = x%v(idx%v(i)) - end do + if (.not.acc_done) then + if (x%is_dev()) call x%sync() + if (idx%is_dev()) call idx%sync() + do k = 1, n + y(k+i-1) = x%v(idx%v(k+i-1)) + !write(0,*) 'oa gthzv ',k+i-1,idx%v(k+i-1),k,y(k) + end do + end if + contains + subroutine inner_gth(n,x,y,idx) + integer(psb_ipk_) :: n, idx(:) + complex(psb_spk_) :: x(:), y(:) + integer(psb_ipk_) :: k + + !$acc parallel loop present(y) + do k = 1, n + y(k) = x(idx(k)) + end do + !$acc end parallel loop + !$acc update self(y) + end subroutine inner_gth end subroutine c_oacc_gthzv_x subroutine c_oacc_ins_v(n, irl, val, dupl, x, info) @@ -718,7 +799,7 @@ contains integer(psb_ipk_) :: info res = czero - !write(0,*) 'dot_v' +!!$ write(0,*) 'oacc_dot_v' select type(yy => y) type is (psb_c_base_vect_type) if (x%is_dev()) call x%sync() @@ -762,6 +843,17 @@ contains end function c_oacc_dot_a + subroutine c_oacc_new_buffer(n,x,info) + implicit none + class(psb_c_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_), intent(out) :: info + if (n /= psb_size(x%combuf)) then + call x%psb_c_base_vect_type%new_buffer(n,info) + !$acc enter data copyin(x%combuf) + end if + end subroutine c_oacc_new_buffer + subroutine c_oacc_sync_dev_space(x) implicit none class(psb_c_vect_oacc), intent(inout) :: x @@ -860,12 +952,33 @@ contains class(psb_c_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 - if (allocated(x%v)) then - if (acc_is_present(x%v)) call acc_delete_finalize(x%v) - deallocate(x%v, stat=info) - end if + if (acc_is_present(x%v)) call acc_delete_finalize(x%v) + if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) + call x%psb_c_base_vect_type%free(info) end subroutine c_oacc_vect_free + + subroutine c_oacc_vect_maybe_free_buffer(x,info) + implicit none + class(psb_c_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (psb_oacc_get_maybe_free_buffer())& + & call x%free_buffer(info) + end subroutine c_oacc_vect_maybe_free_buffer + + subroutine c_oacc_vect_free_buffer(x,info) + implicit none + class(psb_c_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) + call x%psb_c_base_vect_type%free_buffer(info) + + end subroutine c_oacc_vect_free_buffer + function c_oacc_get_size(x) result(res) implicit none class(psb_c_vect_oacc), intent(inout) :: x diff --git a/openacc/psb_d_oacc_vect_mod.F90 b/openacc/psb_d_oacc_vect_mod.F90 index 3d71e54c..80ac35f7 100644 --- a/openacc/psb_d_oacc_vect_mod.F90 +++ b/openacc/psb_d_oacc_vect_mod.F90 @@ -3,6 +3,8 @@ module psb_d_oacc_vect_mod use openacc use psb_const_mod use psb_error_mod + use psb_realloc_mod + use psb_oacc_env_mod use psb_d_vect_mod use psb_i_vect_mod use psb_i_oacc_vect_mod @@ -26,6 +28,8 @@ module psb_d_oacc_vect_mod procedure, pass(x) :: bld_x => d_oacc_bld_x procedure, pass(x) :: bld_mn => d_oacc_bld_mn procedure, pass(x) :: free => d_oacc_vect_free + procedure, pass(x) :: free_buffer => d_oacc_vect_free_buffer + procedure, pass(x) :: maybe_free_buffer => d_oacc_vect_maybe_free_buffer procedure, pass(x) :: ins_a => d_oacc_ins_a procedure, pass(x) :: ins_v => d_oacc_ins_v procedure, pass(x) :: is_host => d_oacc_is_host @@ -36,11 +40,13 @@ module psb_d_oacc_vect_mod procedure, pass(x) :: set_sync => d_oacc_set_sync procedure, pass(x) :: set_scal => d_oacc_set_scal + procedure, pass(x) :: new_buffer => d_oacc_new_buffer procedure, pass(x) :: gthzv_x => d_oacc_gthzv_x - procedure, pass(x) :: gthzbuf_x => d_oacc_gthzbuf + procedure, pass(x) :: gthzbuf => d_oacc_gthzbuf procedure, pass(y) :: sctb => d_oacc_sctb procedure, pass(y) :: sctb_x => d_oacc_sctb_x procedure, pass(y) :: sctb_buf => d_oacc_sctb_buf + procedure, nopass :: device_wait => d_oacc_device_wait procedure, pass(x) :: get_size => d_oacc_get_size @@ -87,6 +93,11 @@ module psb_d_oacc_vect_mod contains + subroutine d_oacc_device_wait() + implicit none + call acc_wait_all() + end subroutine d_oacc_device_wait + subroutine d_oacc_absval1(x) implicit none class(psb_d_vect_oacc), intent(inout) :: x @@ -181,13 +192,17 @@ contains !$acc parallel loop reduction(max:mx) do i = 1, n if (abs(x(i)) > mx) mx = abs(x(i)) - end do - sum = dzero - !$acc parallel loop reduction(+:sum) - do i = 1, n - sum = sum + abs(x(i)/mx)**2 end do - res = mx*sqrt(sum) + if (mx == dzero) then + res = mx + else + sum = dzero + !$acc parallel loop reduction(+:sum) + do i = 1, n + sum = sum + abs(x(i)/mx)**2 + end do + res = mx*sqrt(sum) + end if end function d_inner_oacc_nrm2 end function d_oacc_nrm2 @@ -398,29 +413,44 @@ contains class(psb_i_base_vect_type) :: idx real(psb_dpk_) :: beta class(psb_d_vect_oacc) :: y - integer(psb_ipk_) :: info - + integer(psb_ipk_) :: info, k + logical :: acc_done if (.not.allocated(y%combuf)) then call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') return end if + acc_done = .false. select type(ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() + !$acc update device(y%combuf) + call inner_sctb(n,y%combuf(i:i+n-1),beta,y%v,ii%v(i:i+n-1)) + call y%set_dev() + acc_done = .true. + end select - !$acc parallel loop - do i = 1, n - y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i) + if (.not.acc_done) then + if (idx%is_dev()) call idx%sync() + if (y%is_dev()) call y%sync() + do k = 1, n + y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + y%combuf(k) end do + end if - class default - !$acc parallel loop - do i = 1, n - y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i) + contains + subroutine inner_sctb(n,x,beta,y,idx) + integer(psb_ipk_) :: n, idx(:) + real(psb_dpk_) :: beta,x(:), y(:) + integer(psb_ipk_) :: k + !$acc parallel loop + do k = 1, n + y(idx(k)) = x(k) + beta *y(idx(k)) end do - end select + !$acc end parallel loop + end subroutine inner_sctb + end subroutine d_oacc_sctb_buf subroutine d_oacc_sctb_x(i, n, idx, x, beta, y) @@ -430,24 +460,41 @@ contains class(psb_i_base_vect_type) :: idx real(psb_dpk_) :: beta, x(:) class(psb_d_vect_oacc) :: y - integer(psb_ipk_) :: info, ni + integer(psb_ipk_) :: info, ni, k + logical :: acc_done + acc_done = .false. select type(ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() - class default - call psb_errpush(info, 'd_oacc_sctb_x') - return + if (y%is_host()) call y%sync() + if (acc_is_present(x)) then + call inner_sctb(n,x(i:i+n-1),beta,y%v,idx%v(i:i+n-1)) + acc_done = .true. + call y%set_dev() + end if end select + if (.not.acc_done) then + if (idx%is_dev()) call idx%sync() + if (y%is_dev()) call y%sync() + do k = 1, n + y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + x(k+i-1) + end do + call y%set_host() + end if - if (y%is_host()) call y%sync() - - !$acc parallel loop - do i = 1, n - y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i) - end do - - call y%set_dev() + contains + subroutine inner_sctb(n,x,beta,y,idx) + integer(psb_ipk_) :: n, idx(:) + real(psb_dpk_) :: beta, x(:), y(:) + integer(psb_ipk_) :: k + !$acc parallel loop + do k = 1, n + y(idx(k)) = x(k) + beta *y(idx(k)) + end do + !$acc end parallel loop + end subroutine inner_sctb + end subroutine d_oacc_sctb_x subroutine d_oacc_sctb(n, idx, x, beta, y) @@ -463,7 +510,6 @@ contains if (n == 0) return if (y%is_dev()) call y%sync() - !$acc parallel loop do i = 1, n y%v(idx(i)) = beta * y%v(idx(i)) + x(i) end do @@ -477,30 +523,48 @@ contains integer(psb_ipk_) :: i, n class(psb_i_base_vect_type) :: idx class(psb_d_vect_oacc) :: x - integer(psb_ipk_) :: info + integer(psb_ipk_) :: info,k + logical :: acc_done info = 0 + acc_done = .false. + if (.not.allocated(x%combuf)) then call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') return end if - select type(ii => idx) + select type (ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() - class default - call psb_errpush(info, 'd_oacc_gthzbuf') - return + if (x%is_host()) call x%sync() + call inner_gth(n,x%v,x%combuf(i:i+n-1),ii%v(i:i+n-1)) + acc_done = .true. end select - if (x%is_host()) call x%sync() + if (.not.acc_done) then + if (idx%is_dev()) call idx%sync() + if (x%is_dev()) call x%sync() + do k = 1, n + x%combuf(k+i-1) = x%v(idx%v(k+i-1)) + end do + end if - !$acc parallel loop - do i = 1, n - x%combuf(i) = x%v(idx%v(i)) - end do + contains + subroutine inner_gth(n,x,y,idx) + integer(psb_ipk_) :: n, idx(:) + real(psb_dpk_) :: x(:), y(:) + integer(psb_ipk_) :: k + + !$acc parallel loop present(y) + do k = 1, n + y(k) = x(idx(k)) + end do + !$acc end parallel loop + !$acc update self(y) + end subroutine inner_gth end subroutine d_oacc_gthzbuf - + subroutine d_oacc_gthzv_x(i, n, idx, x, y) use psb_base_mod implicit none @@ -508,24 +572,41 @@ contains class(psb_i_base_vect_type):: idx real(psb_dpk_) :: y(:) class(psb_d_vect_oacc):: x - integer(psb_ipk_) :: info + integer(psb_ipk_) :: info, k + logical :: acc_done info = 0 - - select type(ii => idx) + acc_done = .false. + select type (ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() - class default - call psb_errpush(info, 'd_oacc_gthzv_x') - return + if (x%is_host()) call x%sync() + if (acc_is_present(y)) then + call inner_gth(n,x%v,y(i:),ii%v(i:)) + acc_done=.true. + end if end select - - if (x%is_host()) call x%sync() - - !$acc parallel loop - do i = 1, n - y(i) = x%v(idx%v(i)) - end do + if (.not.acc_done) then + if (x%is_dev()) call x%sync() + if (idx%is_dev()) call idx%sync() + do k = 1, n + y(k+i-1) = x%v(idx%v(k+i-1)) + !write(0,*) 'oa gthzv ',k+i-1,idx%v(k+i-1),k,y(k) + end do + end if + contains + subroutine inner_gth(n,x,y,idx) + integer(psb_ipk_) :: n, idx(:) + real(psb_dpk_) :: x(:), y(:) + integer(psb_ipk_) :: k + + !$acc parallel loop present(y) + do k = 1, n + y(k) = x(idx(k)) + end do + !$acc end parallel loop + !$acc update self(y) + end subroutine inner_gth end subroutine d_oacc_gthzv_x subroutine d_oacc_ins_v(n, irl, val, dupl, x, info) @@ -718,7 +799,7 @@ contains integer(psb_ipk_) :: info res = dzero - !write(0,*) 'dot_v' +!!$ write(0,*) 'oacc_dot_v' select type(yy => y) type is (psb_d_base_vect_type) if (x%is_dev()) call x%sync() @@ -762,6 +843,17 @@ contains end function d_oacc_dot_a + subroutine d_oacc_new_buffer(n,x,info) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_), intent(out) :: info + if (n /= psb_size(x%combuf)) then + call x%psb_d_base_vect_type%new_buffer(n,info) + !$acc enter data copyin(x%combuf) + end if + end subroutine d_oacc_new_buffer + subroutine d_oacc_sync_dev_space(x) implicit none class(psb_d_vect_oacc), intent(inout) :: x @@ -860,12 +952,33 @@ contains class(psb_d_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 - if (allocated(x%v)) then - if (acc_is_present(x%v)) call acc_delete_finalize(x%v) - deallocate(x%v, stat=info) - end if + if (acc_is_present(x%v)) call acc_delete_finalize(x%v) + if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) + call x%psb_d_base_vect_type%free(info) end subroutine d_oacc_vect_free + + subroutine d_oacc_vect_maybe_free_buffer(x,info) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (psb_oacc_get_maybe_free_buffer())& + & call x%free_buffer(info) + end subroutine d_oacc_vect_maybe_free_buffer + + subroutine d_oacc_vect_free_buffer(x,info) + implicit none + class(psb_d_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) + call x%psb_d_base_vect_type%free_buffer(info) + + end subroutine d_oacc_vect_free_buffer + function d_oacc_get_size(x) result(res) implicit none class(psb_d_vect_oacc), intent(inout) :: x diff --git a/openacc/psb_i_oacc_vect_mod.F90 b/openacc/psb_i_oacc_vect_mod.F90 index 42226f0c..cfd0c210 100644 --- a/openacc/psb_i_oacc_vect_mod.F90 +++ b/openacc/psb_i_oacc_vect_mod.F90 @@ -3,6 +3,8 @@ module psb_i_oacc_vect_mod use openacc use psb_const_mod use psb_error_mod + use psb_realloc_mod + use psb_oacc_env_mod use psb_i_vect_mod integer(psb_ipk_), parameter, private :: is_host = -1 @@ -24,6 +26,8 @@ module psb_i_oacc_vect_mod procedure, pass(x) :: bld_x => i_oacc_bld_x procedure, pass(x) :: bld_mn => i_oacc_bld_mn procedure, pass(x) :: free => i_oacc_vect_free + procedure, pass(x) :: free_buffer => i_oacc_vect_free_buffer + procedure, pass(x) :: maybe_free_buffer => i_oacc_vect_maybe_free_buffer procedure, pass(x) :: ins_a => i_oacc_ins_a procedure, pass(x) :: ins_v => i_oacc_ins_v procedure, pass(x) :: is_host => i_oacc_is_host @@ -34,11 +38,13 @@ module psb_i_oacc_vect_mod procedure, pass(x) :: set_sync => i_oacc_set_sync procedure, pass(x) :: set_scal => i_oacc_set_scal + procedure, pass(x) :: new_buffer => i_oacc_new_buffer procedure, pass(x) :: gthzv_x => i_oacc_gthzv_x - procedure, pass(x) :: gthzbuf_x => i_oacc_gthzbuf + procedure, pass(x) :: gthzbuf => i_oacc_gthzbuf procedure, pass(y) :: sctb => i_oacc_sctb procedure, pass(y) :: sctb_x => i_oacc_sctb_x procedure, pass(y) :: sctb_buf => i_oacc_sctb_buf + procedure, nopass :: device_wait => i_oacc_device_wait procedure, pass(x) :: get_size => i_oacc_get_size @@ -48,6 +54,11 @@ module psb_i_oacc_vect_mod contains + subroutine i_oacc_device_wait() + implicit none + call acc_wait_all() + end subroutine i_oacc_device_wait + subroutine i_oacc_sctb_buf(i, n, idx, beta, y) use psb_base_mod @@ -56,29 +67,44 @@ contains class(psb_i_base_vect_type) :: idx integer(psb_ipk_) :: beta class(psb_i_vect_oacc) :: y - integer(psb_ipk_) :: info - + integer(psb_ipk_) :: info, k + logical :: acc_done if (.not.allocated(y%combuf)) then call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') return end if + acc_done = .false. select type(ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() + !$acc update device(y%combuf) + call inner_sctb(n,y%combuf(i:i+n-1),beta,y%v,ii%v(i:i+n-1)) + call y%set_dev() + acc_done = .true. + end select - !$acc parallel loop - do i = 1, n - y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i) + if (.not.acc_done) then + if (idx%is_dev()) call idx%sync() + if (y%is_dev()) call y%sync() + do k = 1, n + y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + y%combuf(k) end do + end if - class default - !$acc parallel loop - do i = 1, n - y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i) + contains + subroutine inner_sctb(n,x,beta,y,idx) + integer(psb_ipk_) :: n, idx(:) + integer(psb_ipk_) :: beta,x(:), y(:) + integer(psb_ipk_) :: k + !$acc parallel loop + do k = 1, n + y(idx(k)) = x(k) + beta *y(idx(k)) end do - end select + !$acc end parallel loop + end subroutine inner_sctb + end subroutine i_oacc_sctb_buf subroutine i_oacc_sctb_x(i, n, idx, x, beta, y) @@ -88,24 +114,41 @@ contains class(psb_i_base_vect_type) :: idx integer(psb_ipk_) :: beta, x(:) class(psb_i_vect_oacc) :: y - integer(psb_ipk_) :: info, ni + integer(psb_ipk_) :: info, ni, k + logical :: acc_done + acc_done = .false. select type(ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() - class default - call psb_errpush(info, 'i_oacc_sctb_x') - return + if (y%is_host()) call y%sync() + if (acc_is_present(x)) then + call inner_sctb(n,x(i:i+n-1),beta,y%v,idx%v(i:i+n-1)) + acc_done = .true. + call y%set_dev() + end if end select + if (.not.acc_done) then + if (idx%is_dev()) call idx%sync() + if (y%is_dev()) call y%sync() + do k = 1, n + y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + x(k+i-1) + end do + call y%set_host() + end if - if (y%is_host()) call y%sync() - - !$acc parallel loop - do i = 1, n - y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i) - end do - - call y%set_dev() + contains + subroutine inner_sctb(n,x,beta,y,idx) + integer(psb_ipk_) :: n, idx(:) + integer(psb_ipk_) :: beta, x(:), y(:) + integer(psb_ipk_) :: k + !$acc parallel loop + do k = 1, n + y(idx(k)) = x(k) + beta *y(idx(k)) + end do + !$acc end parallel loop + end subroutine inner_sctb + end subroutine i_oacc_sctb_x subroutine i_oacc_sctb(n, idx, x, beta, y) @@ -121,7 +164,6 @@ contains if (n == 0) return if (y%is_dev()) call y%sync() - !$acc parallel loop do i = 1, n y%v(idx(i)) = beta * y%v(idx(i)) + x(i) end do @@ -135,30 +177,48 @@ contains integer(psb_ipk_) :: i, n class(psb_i_base_vect_type) :: idx class(psb_i_vect_oacc) :: x - integer(psb_ipk_) :: info + integer(psb_ipk_) :: info,k + logical :: acc_done info = 0 + acc_done = .false. + if (.not.allocated(x%combuf)) then call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') return end if - select type(ii => idx) + select type (ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() - class default - call psb_errpush(info, 'i_oacc_gthzbuf') - return + if (x%is_host()) call x%sync() + call inner_gth(n,x%v,x%combuf(i:i+n-1),ii%v(i:i+n-1)) + acc_done = .true. end select - if (x%is_host()) call x%sync() + if (.not.acc_done) then + if (idx%is_dev()) call idx%sync() + if (x%is_dev()) call x%sync() + do k = 1, n + x%combuf(k+i-1) = x%v(idx%v(k+i-1)) + end do + end if - !$acc parallel loop - do i = 1, n - x%combuf(i) = x%v(idx%v(i)) - end do + contains + subroutine inner_gth(n,x,y,idx) + integer(psb_ipk_) :: n, idx(:) + integer(psb_ipk_) :: x(:), y(:) + integer(psb_ipk_) :: k + + !$acc parallel loop present(y) + do k = 1, n + y(k) = x(idx(k)) + end do + !$acc end parallel loop + !$acc update self(y) + end subroutine inner_gth end subroutine i_oacc_gthzbuf - + subroutine i_oacc_gthzv_x(i, n, idx, x, y) use psb_base_mod implicit none @@ -166,24 +226,41 @@ contains class(psb_i_base_vect_type):: idx integer(psb_ipk_) :: y(:) class(psb_i_vect_oacc):: x - integer(psb_ipk_) :: info + integer(psb_ipk_) :: info, k + logical :: acc_done info = 0 - - select type(ii => idx) + acc_done = .false. + select type (ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() - class default - call psb_errpush(info, 'i_oacc_gthzv_x') - return + if (x%is_host()) call x%sync() + if (acc_is_present(y)) then + call inner_gth(n,x%v,y(i:),ii%v(i:)) + acc_done=.true. + end if end select - - if (x%is_host()) call x%sync() - - !$acc parallel loop - do i = 1, n - y(i) = x%v(idx%v(i)) - end do + if (.not.acc_done) then + if (x%is_dev()) call x%sync() + if (idx%is_dev()) call idx%sync() + do k = 1, n + y(k+i-1) = x%v(idx%v(k+i-1)) + !write(0,*) 'oa gthzv ',k+i-1,idx%v(k+i-1),k,y(k) + end do + end if + contains + subroutine inner_gth(n,x,y,idx) + integer(psb_ipk_) :: n, idx(:) + integer(psb_ipk_) :: x(:), y(:) + integer(psb_ipk_) :: k + + !$acc parallel loop present(y) + do k = 1, n + y(k) = x(idx(k)) + end do + !$acc end parallel loop + !$acc update self(y) + end subroutine inner_gth end subroutine i_oacc_gthzv_x subroutine i_oacc_ins_v(n, irl, val, dupl, x, info) @@ -366,6 +443,17 @@ contains end function i_oacc_get_fmt + subroutine i_oacc_new_buffer(n,x,info) + implicit none + class(psb_i_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_), intent(out) :: info + if (n /= psb_size(x%combuf)) then + call x%psb_i_base_vect_type%new_buffer(n,info) + !$acc enter data copyin(x%combuf) + end if + end subroutine i_oacc_new_buffer + subroutine i_oacc_sync_dev_space(x) implicit none class(psb_i_vect_oacc), intent(inout) :: x @@ -464,12 +552,33 @@ contains class(psb_i_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 - if (allocated(x%v)) then - if (acc_is_present(x%v)) call acc_delete_finalize(x%v) - deallocate(x%v, stat=info) - end if + if (acc_is_present(x%v)) call acc_delete_finalize(x%v) + if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) + call x%psb_i_base_vect_type%free(info) end subroutine i_oacc_vect_free + + subroutine i_oacc_vect_maybe_free_buffer(x,info) + implicit none + class(psb_i_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (psb_oacc_get_maybe_free_buffer())& + & call x%free_buffer(info) + + end subroutine i_oacc_vect_maybe_free_buffer + + subroutine i_oacc_vect_free_buffer(x,info) + implicit none + class(psb_i_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) + call x%psb_i_base_vect_type%free_buffer(info) + end subroutine i_oacc_vect_free_buffer + function i_oacc_get_size(x) result(res) implicit none class(psb_i_vect_oacc), intent(inout) :: x diff --git a/openacc/psb_l_oacc_vect_mod.F90 b/openacc/psb_l_oacc_vect_mod.F90 index eb9b2b9a..5526796f 100644 --- a/openacc/psb_l_oacc_vect_mod.F90 +++ b/openacc/psb_l_oacc_vect_mod.F90 @@ -3,6 +3,8 @@ module psb_l_oacc_vect_mod use openacc use psb_const_mod use psb_error_mod + use psb_realloc_mod + use psb_oacc_env_mod use psb_l_vect_mod use psb_i_vect_mod use psb_i_oacc_vect_mod @@ -26,6 +28,8 @@ module psb_l_oacc_vect_mod procedure, pass(x) :: bld_x => l_oacc_bld_x procedure, pass(x) :: bld_mn => l_oacc_bld_mn procedure, pass(x) :: free => l_oacc_vect_free + procedure, pass(x) :: free_buffer => l_oacc_vect_free_buffer + procedure, pass(x) :: maybe_free_buffer => l_oacc_vect_maybe_free_buffer procedure, pass(x) :: ins_a => l_oacc_ins_a procedure, pass(x) :: ins_v => l_oacc_ins_v procedure, pass(x) :: is_host => l_oacc_is_host @@ -36,11 +40,13 @@ module psb_l_oacc_vect_mod procedure, pass(x) :: set_sync => l_oacc_set_sync procedure, pass(x) :: set_scal => l_oacc_set_scal + procedure, pass(x) :: new_buffer => l_oacc_new_buffer procedure, pass(x) :: gthzv_x => l_oacc_gthzv_x - procedure, pass(x) :: gthzbuf_x => l_oacc_gthzbuf + procedure, pass(x) :: gthzbuf => l_oacc_gthzbuf procedure, pass(y) :: sctb => l_oacc_sctb procedure, pass(y) :: sctb_x => l_oacc_sctb_x procedure, pass(y) :: sctb_buf => l_oacc_sctb_buf + procedure, nopass :: device_wait => l_oacc_device_wait procedure, pass(x) :: get_size => l_oacc_get_size @@ -50,6 +56,11 @@ module psb_l_oacc_vect_mod contains + subroutine l_oacc_device_wait() + implicit none + call acc_wait_all() + end subroutine l_oacc_device_wait + subroutine l_oacc_sctb_buf(i, n, idx, beta, y) use psb_base_mod @@ -58,29 +69,44 @@ contains class(psb_i_base_vect_type) :: idx integer(psb_lpk_) :: beta class(psb_l_vect_oacc) :: y - integer(psb_ipk_) :: info - + integer(psb_ipk_) :: info, k + logical :: acc_done if (.not.allocated(y%combuf)) then call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') return end if + acc_done = .false. select type(ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() + !$acc update device(y%combuf) + call inner_sctb(n,y%combuf(i:i+n-1),beta,y%v,ii%v(i:i+n-1)) + call y%set_dev() + acc_done = .true. + end select - !$acc parallel loop - do i = 1, n - y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i) + if (.not.acc_done) then + if (idx%is_dev()) call idx%sync() + if (y%is_dev()) call y%sync() + do k = 1, n + y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + y%combuf(k) end do + end if - class default - !$acc parallel loop - do i = 1, n - y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i) + contains + subroutine inner_sctb(n,x,beta,y,idx) + integer(psb_ipk_) :: n, idx(:) + integer(psb_lpk_) :: beta,x(:), y(:) + integer(psb_ipk_) :: k + !$acc parallel loop + do k = 1, n + y(idx(k)) = x(k) + beta *y(idx(k)) end do - end select + !$acc end parallel loop + end subroutine inner_sctb + end subroutine l_oacc_sctb_buf subroutine l_oacc_sctb_x(i, n, idx, x, beta, y) @@ -90,24 +116,41 @@ contains class(psb_i_base_vect_type) :: idx integer(psb_lpk_) :: beta, x(:) class(psb_l_vect_oacc) :: y - integer(psb_ipk_) :: info, ni + integer(psb_ipk_) :: info, ni, k + logical :: acc_done + acc_done = .false. select type(ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() - class default - call psb_errpush(info, 'l_oacc_sctb_x') - return + if (y%is_host()) call y%sync() + if (acc_is_present(x)) then + call inner_sctb(n,x(i:i+n-1),beta,y%v,idx%v(i:i+n-1)) + acc_done = .true. + call y%set_dev() + end if end select + if (.not.acc_done) then + if (idx%is_dev()) call idx%sync() + if (y%is_dev()) call y%sync() + do k = 1, n + y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + x(k+i-1) + end do + call y%set_host() + end if - if (y%is_host()) call y%sync() - - !$acc parallel loop - do i = 1, n - y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i) - end do - - call y%set_dev() + contains + subroutine inner_sctb(n,x,beta,y,idx) + integer(psb_ipk_) :: n, idx(:) + integer(psb_lpk_) :: beta, x(:), y(:) + integer(psb_ipk_) :: k + !$acc parallel loop + do k = 1, n + y(idx(k)) = x(k) + beta *y(idx(k)) + end do + !$acc end parallel loop + end subroutine inner_sctb + end subroutine l_oacc_sctb_x subroutine l_oacc_sctb(n, idx, x, beta, y) @@ -123,7 +166,6 @@ contains if (n == 0) return if (y%is_dev()) call y%sync() - !$acc parallel loop do i = 1, n y%v(idx(i)) = beta * y%v(idx(i)) + x(i) end do @@ -137,30 +179,48 @@ contains integer(psb_ipk_) :: i, n class(psb_i_base_vect_type) :: idx class(psb_l_vect_oacc) :: x - integer(psb_ipk_) :: info + integer(psb_ipk_) :: info,k + logical :: acc_done info = 0 + acc_done = .false. + if (.not.allocated(x%combuf)) then call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') return end if - select type(ii => idx) + select type (ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() - class default - call psb_errpush(info, 'l_oacc_gthzbuf') - return + if (x%is_host()) call x%sync() + call inner_gth(n,x%v,x%combuf(i:i+n-1),ii%v(i:i+n-1)) + acc_done = .true. end select - if (x%is_host()) call x%sync() + if (.not.acc_done) then + if (idx%is_dev()) call idx%sync() + if (x%is_dev()) call x%sync() + do k = 1, n + x%combuf(k+i-1) = x%v(idx%v(k+i-1)) + end do + end if - !$acc parallel loop - do i = 1, n - x%combuf(i) = x%v(idx%v(i)) - end do + contains + subroutine inner_gth(n,x,y,idx) + integer(psb_ipk_) :: n, idx(:) + integer(psb_lpk_) :: x(:), y(:) + integer(psb_ipk_) :: k + + !$acc parallel loop present(y) + do k = 1, n + y(k) = x(idx(k)) + end do + !$acc end parallel loop + !$acc update self(y) + end subroutine inner_gth end subroutine l_oacc_gthzbuf - + subroutine l_oacc_gthzv_x(i, n, idx, x, y) use psb_base_mod implicit none @@ -168,24 +228,41 @@ contains class(psb_i_base_vect_type):: idx integer(psb_lpk_) :: y(:) class(psb_l_vect_oacc):: x - integer(psb_ipk_) :: info + integer(psb_ipk_) :: info, k + logical :: acc_done info = 0 - - select type(ii => idx) + acc_done = .false. + select type (ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() - class default - call psb_errpush(info, 'l_oacc_gthzv_x') - return + if (x%is_host()) call x%sync() + if (acc_is_present(y)) then + call inner_gth(n,x%v,y(i:),ii%v(i:)) + acc_done=.true. + end if end select - - if (x%is_host()) call x%sync() - - !$acc parallel loop - do i = 1, n - y(i) = x%v(idx%v(i)) - end do + if (.not.acc_done) then + if (x%is_dev()) call x%sync() + if (idx%is_dev()) call idx%sync() + do k = 1, n + y(k+i-1) = x%v(idx%v(k+i-1)) + !write(0,*) 'oa gthzv ',k+i-1,idx%v(k+i-1),k,y(k) + end do + end if + contains + subroutine inner_gth(n,x,y,idx) + integer(psb_ipk_) :: n, idx(:) + integer(psb_lpk_) :: x(:), y(:) + integer(psb_ipk_) :: k + + !$acc parallel loop present(y) + do k = 1, n + y(k) = x(idx(k)) + end do + !$acc end parallel loop + !$acc update self(y) + end subroutine inner_gth end subroutine l_oacc_gthzv_x subroutine l_oacc_ins_v(n, irl, val, dupl, x, info) @@ -368,6 +445,17 @@ contains end function l_oacc_get_fmt + subroutine l_oacc_new_buffer(n,x,info) + implicit none + class(psb_l_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_), intent(out) :: info + if (n /= psb_size(x%combuf)) then + call x%psb_l_base_vect_type%new_buffer(n,info) + !$acc enter data copyin(x%combuf) + end if + end subroutine l_oacc_new_buffer + subroutine l_oacc_sync_dev_space(x) implicit none class(psb_l_vect_oacc), intent(inout) :: x @@ -466,12 +554,33 @@ contains class(psb_l_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 - if (allocated(x%v)) then - if (acc_is_present(x%v)) call acc_delete_finalize(x%v) - deallocate(x%v, stat=info) - end if + if (acc_is_present(x%v)) call acc_delete_finalize(x%v) + if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) + call x%psb_l_base_vect_type%free(info) end subroutine l_oacc_vect_free + + subroutine l_oacc_vect_maybe_free_buffer(x,info) + implicit none + class(psb_l_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (psb_oacc_get_maybe_free_buffer())& + & call x%free_buffer(info) + + end subroutine l_oacc_vect_maybe_free_buffer + + subroutine l_oacc_vect_free_buffer(x,info) + implicit none + class(psb_l_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) + call x%psb_l_base_vect_type%free_buffer(info) + end subroutine l_oacc_vect_free_buffer + function l_oacc_get_size(x) result(res) implicit none class(psb_l_vect_oacc), intent(inout) :: x diff --git a/openacc/psb_oacc_env_mod.F90 b/openacc/psb_oacc_env_mod.F90 index 83c9426d..dc01ad3a 100644 --- a/openacc/psb_oacc_env_mod.F90 +++ b/openacc/psb_oacc_env_mod.F90 @@ -1,18 +1,29 @@ module psb_oacc_env_mod -contains + use psb_penv_mod + use psb_const_mod + use psb_error_mod + logical, private :: oacc_do_maybe_free_buffer = .false. - subroutine psb_oacc_init(ctxt, dev) - use psb_penv_mod - use psb_const_mod - use psb_error_mod - type(psb_ctxt_type), intent(in) :: ctxt - integer, intent(in), optional :: dev +contains + function psb_oacc_get_maybe_free_buffer() result(res) + logical :: res + res = oacc_do_maybe_free_buffer + end function psb_oacc_get_maybe_free_buffer - end subroutine psb_oacc_init + subroutine psb_oacc_set_maybe_free_buffer(val) + logical, intent(in) :: val + oacc_do_maybe_free_buffer = val + end subroutine psb_oacc_set_maybe_free_buffer - subroutine psb_oacc_exit() - integer :: res + subroutine psb_oacc_init(ctxt, dev) + type(psb_ctxt_type), intent(in) :: ctxt + integer, intent(in), optional :: dev + + end subroutine psb_oacc_init - end subroutine psb_oacc_exit + subroutine psb_oacc_exit() + integer :: res + + end subroutine psb_oacc_exit end module psb_oacc_env_mod diff --git a/openacc/psb_s_oacc_vect_mod.F90 b/openacc/psb_s_oacc_vect_mod.F90 index 16b45461..b80108ab 100644 --- a/openacc/psb_s_oacc_vect_mod.F90 +++ b/openacc/psb_s_oacc_vect_mod.F90 @@ -3,6 +3,8 @@ module psb_s_oacc_vect_mod use openacc use psb_const_mod use psb_error_mod + use psb_realloc_mod + use psb_oacc_env_mod use psb_s_vect_mod use psb_i_vect_mod use psb_i_oacc_vect_mod @@ -26,6 +28,8 @@ module psb_s_oacc_vect_mod procedure, pass(x) :: bld_x => s_oacc_bld_x procedure, pass(x) :: bld_mn => s_oacc_bld_mn procedure, pass(x) :: free => s_oacc_vect_free + procedure, pass(x) :: free_buffer => s_oacc_vect_free_buffer + procedure, pass(x) :: maybe_free_buffer => s_oacc_vect_maybe_free_buffer procedure, pass(x) :: ins_a => s_oacc_ins_a procedure, pass(x) :: ins_v => s_oacc_ins_v procedure, pass(x) :: is_host => s_oacc_is_host @@ -36,11 +40,13 @@ module psb_s_oacc_vect_mod procedure, pass(x) :: set_sync => s_oacc_set_sync procedure, pass(x) :: set_scal => s_oacc_set_scal + procedure, pass(x) :: new_buffer => s_oacc_new_buffer procedure, pass(x) :: gthzv_x => s_oacc_gthzv_x - procedure, pass(x) :: gthzbuf_x => s_oacc_gthzbuf + procedure, pass(x) :: gthzbuf => s_oacc_gthzbuf procedure, pass(y) :: sctb => s_oacc_sctb procedure, pass(y) :: sctb_x => s_oacc_sctb_x procedure, pass(y) :: sctb_buf => s_oacc_sctb_buf + procedure, nopass :: device_wait => s_oacc_device_wait procedure, pass(x) :: get_size => s_oacc_get_size @@ -87,6 +93,11 @@ module psb_s_oacc_vect_mod contains + subroutine s_oacc_device_wait() + implicit none + call acc_wait_all() + end subroutine s_oacc_device_wait + subroutine s_oacc_absval1(x) implicit none class(psb_s_vect_oacc), intent(inout) :: x @@ -181,13 +192,17 @@ contains !$acc parallel loop reduction(max:mx) do i = 1, n if (abs(x(i)) > mx) mx = abs(x(i)) - end do - sum = szero - !$acc parallel loop reduction(+:sum) - do i = 1, n - sum = sum + abs(x(i)/mx)**2 end do - res = mx*sqrt(sum) + if (mx == szero) then + res = mx + else + sum = szero + !$acc parallel loop reduction(+:sum) + do i = 1, n + sum = sum + abs(x(i)/mx)**2 + end do + res = mx*sqrt(sum) + end if end function s_inner_oacc_nrm2 end function s_oacc_nrm2 @@ -398,29 +413,44 @@ contains class(psb_i_base_vect_type) :: idx real(psb_spk_) :: beta class(psb_s_vect_oacc) :: y - integer(psb_ipk_) :: info - + integer(psb_ipk_) :: info, k + logical :: acc_done if (.not.allocated(y%combuf)) then call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') return end if + acc_done = .false. select type(ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() + !$acc update device(y%combuf) + call inner_sctb(n,y%combuf(i:i+n-1),beta,y%v,ii%v(i:i+n-1)) + call y%set_dev() + acc_done = .true. + end select - !$acc parallel loop - do i = 1, n - y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i) + if (.not.acc_done) then + if (idx%is_dev()) call idx%sync() + if (y%is_dev()) call y%sync() + do k = 1, n + y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + y%combuf(k) end do + end if - class default - !$acc parallel loop - do i = 1, n - y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i) + contains + subroutine inner_sctb(n,x,beta,y,idx) + integer(psb_ipk_) :: n, idx(:) + real(psb_spk_) :: beta,x(:), y(:) + integer(psb_ipk_) :: k + !$acc parallel loop + do k = 1, n + y(idx(k)) = x(k) + beta *y(idx(k)) end do - end select + !$acc end parallel loop + end subroutine inner_sctb + end subroutine s_oacc_sctb_buf subroutine s_oacc_sctb_x(i, n, idx, x, beta, y) @@ -430,24 +460,41 @@ contains class(psb_i_base_vect_type) :: idx real(psb_spk_) :: beta, x(:) class(psb_s_vect_oacc) :: y - integer(psb_ipk_) :: info, ni + integer(psb_ipk_) :: info, ni, k + logical :: acc_done + acc_done = .false. select type(ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() - class default - call psb_errpush(info, 's_oacc_sctb_x') - return + if (y%is_host()) call y%sync() + if (acc_is_present(x)) then + call inner_sctb(n,x(i:i+n-1),beta,y%v,idx%v(i:i+n-1)) + acc_done = .true. + call y%set_dev() + end if end select + if (.not.acc_done) then + if (idx%is_dev()) call idx%sync() + if (y%is_dev()) call y%sync() + do k = 1, n + y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + x(k+i-1) + end do + call y%set_host() + end if - if (y%is_host()) call y%sync() - - !$acc parallel loop - do i = 1, n - y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i) - end do - - call y%set_dev() + contains + subroutine inner_sctb(n,x,beta,y,idx) + integer(psb_ipk_) :: n, idx(:) + real(psb_spk_) :: beta, x(:), y(:) + integer(psb_ipk_) :: k + !$acc parallel loop + do k = 1, n + y(idx(k)) = x(k) + beta *y(idx(k)) + end do + !$acc end parallel loop + end subroutine inner_sctb + end subroutine s_oacc_sctb_x subroutine s_oacc_sctb(n, idx, x, beta, y) @@ -463,7 +510,6 @@ contains if (n == 0) return if (y%is_dev()) call y%sync() - !$acc parallel loop do i = 1, n y%v(idx(i)) = beta * y%v(idx(i)) + x(i) end do @@ -477,30 +523,48 @@ contains integer(psb_ipk_) :: i, n class(psb_i_base_vect_type) :: idx class(psb_s_vect_oacc) :: x - integer(psb_ipk_) :: info + integer(psb_ipk_) :: info,k + logical :: acc_done info = 0 + acc_done = .false. + if (.not.allocated(x%combuf)) then call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') return end if - select type(ii => idx) + select type (ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() - class default - call psb_errpush(info, 's_oacc_gthzbuf') - return + if (x%is_host()) call x%sync() + call inner_gth(n,x%v,x%combuf(i:i+n-1),ii%v(i:i+n-1)) + acc_done = .true. end select - if (x%is_host()) call x%sync() + if (.not.acc_done) then + if (idx%is_dev()) call idx%sync() + if (x%is_dev()) call x%sync() + do k = 1, n + x%combuf(k+i-1) = x%v(idx%v(k+i-1)) + end do + end if - !$acc parallel loop - do i = 1, n - x%combuf(i) = x%v(idx%v(i)) - end do + contains + subroutine inner_gth(n,x,y,idx) + integer(psb_ipk_) :: n, idx(:) + real(psb_spk_) :: x(:), y(:) + integer(psb_ipk_) :: k + + !$acc parallel loop present(y) + do k = 1, n + y(k) = x(idx(k)) + end do + !$acc end parallel loop + !$acc update self(y) + end subroutine inner_gth end subroutine s_oacc_gthzbuf - + subroutine s_oacc_gthzv_x(i, n, idx, x, y) use psb_base_mod implicit none @@ -508,24 +572,41 @@ contains class(psb_i_base_vect_type):: idx real(psb_spk_) :: y(:) class(psb_s_vect_oacc):: x - integer(psb_ipk_) :: info + integer(psb_ipk_) :: info, k + logical :: acc_done info = 0 - - select type(ii => idx) + acc_done = .false. + select type (ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() - class default - call psb_errpush(info, 's_oacc_gthzv_x') - return + if (x%is_host()) call x%sync() + if (acc_is_present(y)) then + call inner_gth(n,x%v,y(i:),ii%v(i:)) + acc_done=.true. + end if end select - - if (x%is_host()) call x%sync() - - !$acc parallel loop - do i = 1, n - y(i) = x%v(idx%v(i)) - end do + if (.not.acc_done) then + if (x%is_dev()) call x%sync() + if (idx%is_dev()) call idx%sync() + do k = 1, n + y(k+i-1) = x%v(idx%v(k+i-1)) + !write(0,*) 'oa gthzv ',k+i-1,idx%v(k+i-1),k,y(k) + end do + end if + contains + subroutine inner_gth(n,x,y,idx) + integer(psb_ipk_) :: n, idx(:) + real(psb_spk_) :: x(:), y(:) + integer(psb_ipk_) :: k + + !$acc parallel loop present(y) + do k = 1, n + y(k) = x(idx(k)) + end do + !$acc end parallel loop + !$acc update self(y) + end subroutine inner_gth end subroutine s_oacc_gthzv_x subroutine s_oacc_ins_v(n, irl, val, dupl, x, info) @@ -718,7 +799,7 @@ contains integer(psb_ipk_) :: info res = szero - !write(0,*) 'dot_v' +!!$ write(0,*) 'oacc_dot_v' select type(yy => y) type is (psb_s_base_vect_type) if (x%is_dev()) call x%sync() @@ -762,6 +843,17 @@ contains end function s_oacc_dot_a + subroutine s_oacc_new_buffer(n,x,info) + implicit none + class(psb_s_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_), intent(out) :: info + if (n /= psb_size(x%combuf)) then + call x%psb_s_base_vect_type%new_buffer(n,info) + !$acc enter data copyin(x%combuf) + end if + end subroutine s_oacc_new_buffer + subroutine s_oacc_sync_dev_space(x) implicit none class(psb_s_vect_oacc), intent(inout) :: x @@ -860,12 +952,33 @@ contains class(psb_s_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 - if (allocated(x%v)) then - if (acc_is_present(x%v)) call acc_delete_finalize(x%v) - deallocate(x%v, stat=info) - end if + if (acc_is_present(x%v)) call acc_delete_finalize(x%v) + if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) + call x%psb_s_base_vect_type%free(info) end subroutine s_oacc_vect_free + + subroutine s_oacc_vect_maybe_free_buffer(x,info) + implicit none + class(psb_s_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (psb_oacc_get_maybe_free_buffer())& + & call x%free_buffer(info) + end subroutine s_oacc_vect_maybe_free_buffer + + subroutine s_oacc_vect_free_buffer(x,info) + implicit none + class(psb_s_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) + call x%psb_s_base_vect_type%free_buffer(info) + + end subroutine s_oacc_vect_free_buffer + function s_oacc_get_size(x) result(res) implicit none class(psb_s_vect_oacc), intent(inout) :: x diff --git a/openacc/psb_z_oacc_vect_mod.F90 b/openacc/psb_z_oacc_vect_mod.F90 index 9e6bbb2d..86107c31 100644 --- a/openacc/psb_z_oacc_vect_mod.F90 +++ b/openacc/psb_z_oacc_vect_mod.F90 @@ -3,6 +3,8 @@ module psb_z_oacc_vect_mod use openacc use psb_const_mod use psb_error_mod + use psb_realloc_mod + use psb_oacc_env_mod use psb_z_vect_mod use psb_i_vect_mod use psb_i_oacc_vect_mod @@ -26,6 +28,8 @@ module psb_z_oacc_vect_mod procedure, pass(x) :: bld_x => z_oacc_bld_x procedure, pass(x) :: bld_mn => z_oacc_bld_mn procedure, pass(x) :: free => z_oacc_vect_free + procedure, pass(x) :: free_buffer => z_oacc_vect_free_buffer + procedure, pass(x) :: maybe_free_buffer => z_oacc_vect_maybe_free_buffer procedure, pass(x) :: ins_a => z_oacc_ins_a procedure, pass(x) :: ins_v => z_oacc_ins_v procedure, pass(x) :: is_host => z_oacc_is_host @@ -36,11 +40,13 @@ module psb_z_oacc_vect_mod procedure, pass(x) :: set_sync => z_oacc_set_sync procedure, pass(x) :: set_scal => z_oacc_set_scal + procedure, pass(x) :: new_buffer => z_oacc_new_buffer procedure, pass(x) :: gthzv_x => z_oacc_gthzv_x - procedure, pass(x) :: gthzbuf_x => z_oacc_gthzbuf + procedure, pass(x) :: gthzbuf => z_oacc_gthzbuf procedure, pass(y) :: sctb => z_oacc_sctb procedure, pass(y) :: sctb_x => z_oacc_sctb_x procedure, pass(y) :: sctb_buf => z_oacc_sctb_buf + procedure, nopass :: device_wait => z_oacc_device_wait procedure, pass(x) :: get_size => z_oacc_get_size @@ -87,6 +93,11 @@ module psb_z_oacc_vect_mod contains + subroutine z_oacc_device_wait() + implicit none + call acc_wait_all() + end subroutine z_oacc_device_wait + subroutine z_oacc_absval1(x) implicit none class(psb_z_vect_oacc), intent(inout) :: x @@ -181,13 +192,17 @@ contains !$acc parallel loop reduction(max:mx) do i = 1, n if (abs(x(i)) > mx) mx = abs(x(i)) - end do - sum = dzero - !$acc parallel loop reduction(+:sum) - do i = 1, n - sum = sum + abs(x(i)/mx)**2 end do - res = mx*sqrt(sum) + if (mx == dzero) then + res = mx + else + sum = dzero + !$acc parallel loop reduction(+:sum) + do i = 1, n + sum = sum + abs(x(i)/mx)**2 + end do + res = mx*sqrt(sum) + end if end function z_inner_oacc_nrm2 end function z_oacc_nrm2 @@ -398,29 +413,44 @@ contains class(psb_i_base_vect_type) :: idx complex(psb_dpk_) :: beta class(psb_z_vect_oacc) :: y - integer(psb_ipk_) :: info - + integer(psb_ipk_) :: info, k + logical :: acc_done if (.not.allocated(y%combuf)) then call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') return end if + acc_done = .false. select type(ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() + !$acc update device(y%combuf) + call inner_sctb(n,y%combuf(i:i+n-1),beta,y%v,ii%v(i:i+n-1)) + call y%set_dev() + acc_done = .true. + end select - !$acc parallel loop - do i = 1, n - y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i) + if (.not.acc_done) then + if (idx%is_dev()) call idx%sync() + if (y%is_dev()) call y%sync() + do k = 1, n + y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + y%combuf(k) end do + end if - class default - !$acc parallel loop - do i = 1, n - y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i) + contains + subroutine inner_sctb(n,x,beta,y,idx) + integer(psb_ipk_) :: n, idx(:) + complex(psb_dpk_) :: beta,x(:), y(:) + integer(psb_ipk_) :: k + !$acc parallel loop + do k = 1, n + y(idx(k)) = x(k) + beta *y(idx(k)) end do - end select + !$acc end parallel loop + end subroutine inner_sctb + end subroutine z_oacc_sctb_buf subroutine z_oacc_sctb_x(i, n, idx, x, beta, y) @@ -430,24 +460,41 @@ contains class(psb_i_base_vect_type) :: idx complex(psb_dpk_) :: beta, x(:) class(psb_z_vect_oacc) :: y - integer(psb_ipk_) :: info, ni + integer(psb_ipk_) :: info, ni, k + logical :: acc_done + acc_done = .false. select type(ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() - class default - call psb_errpush(info, 'z_oacc_sctb_x') - return + if (y%is_host()) call y%sync() + if (acc_is_present(x)) then + call inner_sctb(n,x(i:i+n-1),beta,y%v,idx%v(i:i+n-1)) + acc_done = .true. + call y%set_dev() + end if end select + if (.not.acc_done) then + if (idx%is_dev()) call idx%sync() + if (y%is_dev()) call y%sync() + do k = 1, n + y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + x(k+i-1) + end do + call y%set_host() + end if - if (y%is_host()) call y%sync() - - !$acc parallel loop - do i = 1, n - y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i) - end do - - call y%set_dev() + contains + subroutine inner_sctb(n,x,beta,y,idx) + integer(psb_ipk_) :: n, idx(:) + complex(psb_dpk_) :: beta, x(:), y(:) + integer(psb_ipk_) :: k + !$acc parallel loop + do k = 1, n + y(idx(k)) = x(k) + beta *y(idx(k)) + end do + !$acc end parallel loop + end subroutine inner_sctb + end subroutine z_oacc_sctb_x subroutine z_oacc_sctb(n, idx, x, beta, y) @@ -463,7 +510,6 @@ contains if (n == 0) return if (y%is_dev()) call y%sync() - !$acc parallel loop do i = 1, n y%v(idx(i)) = beta * y%v(idx(i)) + x(i) end do @@ -477,30 +523,48 @@ contains integer(psb_ipk_) :: i, n class(psb_i_base_vect_type) :: idx class(psb_z_vect_oacc) :: x - integer(psb_ipk_) :: info + integer(psb_ipk_) :: info,k + logical :: acc_done info = 0 + acc_done = .false. + if (.not.allocated(x%combuf)) then call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') return end if - select type(ii => idx) + select type (ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() - class default - call psb_errpush(info, 'z_oacc_gthzbuf') - return + if (x%is_host()) call x%sync() + call inner_gth(n,x%v,x%combuf(i:i+n-1),ii%v(i:i+n-1)) + acc_done = .true. end select - if (x%is_host()) call x%sync() + if (.not.acc_done) then + if (idx%is_dev()) call idx%sync() + if (x%is_dev()) call x%sync() + do k = 1, n + x%combuf(k+i-1) = x%v(idx%v(k+i-1)) + end do + end if - !$acc parallel loop - do i = 1, n - x%combuf(i) = x%v(idx%v(i)) - end do + contains + subroutine inner_gth(n,x,y,idx) + integer(psb_ipk_) :: n, idx(:) + complex(psb_dpk_) :: x(:), y(:) + integer(psb_ipk_) :: k + + !$acc parallel loop present(y) + do k = 1, n + y(k) = x(idx(k)) + end do + !$acc end parallel loop + !$acc update self(y) + end subroutine inner_gth end subroutine z_oacc_gthzbuf - + subroutine z_oacc_gthzv_x(i, n, idx, x, y) use psb_base_mod implicit none @@ -508,24 +572,41 @@ contains class(psb_i_base_vect_type):: idx complex(psb_dpk_) :: y(:) class(psb_z_vect_oacc):: x - integer(psb_ipk_) :: info + integer(psb_ipk_) :: info, k + logical :: acc_done info = 0 - - select type(ii => idx) + acc_done = .false. + select type (ii => idx) class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() - class default - call psb_errpush(info, 'z_oacc_gthzv_x') - return + if (x%is_host()) call x%sync() + if (acc_is_present(y)) then + call inner_gth(n,x%v,y(i:),ii%v(i:)) + acc_done=.true. + end if end select - - if (x%is_host()) call x%sync() - - !$acc parallel loop - do i = 1, n - y(i) = x%v(idx%v(i)) - end do + if (.not.acc_done) then + if (x%is_dev()) call x%sync() + if (idx%is_dev()) call idx%sync() + do k = 1, n + y(k+i-1) = x%v(idx%v(k+i-1)) + !write(0,*) 'oa gthzv ',k+i-1,idx%v(k+i-1),k,y(k) + end do + end if + contains + subroutine inner_gth(n,x,y,idx) + integer(psb_ipk_) :: n, idx(:) + complex(psb_dpk_) :: x(:), y(:) + integer(psb_ipk_) :: k + + !$acc parallel loop present(y) + do k = 1, n + y(k) = x(idx(k)) + end do + !$acc end parallel loop + !$acc update self(y) + end subroutine inner_gth end subroutine z_oacc_gthzv_x subroutine z_oacc_ins_v(n, irl, val, dupl, x, info) @@ -718,7 +799,7 @@ contains integer(psb_ipk_) :: info res = zzero - !write(0,*) 'dot_v' +!!$ write(0,*) 'oacc_dot_v' select type(yy => y) type is (psb_z_base_vect_type) if (x%is_dev()) call x%sync() @@ -762,6 +843,17 @@ contains end function z_oacc_dot_a + subroutine z_oacc_new_buffer(n,x,info) + implicit none + class(psb_z_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(in) :: n + integer(psb_ipk_), intent(out) :: info + if (n /= psb_size(x%combuf)) then + call x%psb_z_base_vect_type%new_buffer(n,info) + !$acc enter data copyin(x%combuf) + end if + end subroutine z_oacc_new_buffer + subroutine z_oacc_sync_dev_space(x) implicit none class(psb_z_vect_oacc), intent(inout) :: x @@ -860,12 +952,33 @@ contains class(psb_z_vect_oacc), intent(inout) :: x integer(psb_ipk_), intent(out) :: info info = 0 - if (allocated(x%v)) then - if (acc_is_present(x%v)) call acc_delete_finalize(x%v) - deallocate(x%v, stat=info) - end if + if (acc_is_present(x%v)) call acc_delete_finalize(x%v) + if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) + call x%psb_z_base_vect_type%free(info) end subroutine z_oacc_vect_free + + subroutine z_oacc_vect_maybe_free_buffer(x,info) + implicit none + class(psb_z_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (psb_oacc_get_maybe_free_buffer())& + & call x%free_buffer(info) + end subroutine z_oacc_vect_maybe_free_buffer + + subroutine z_oacc_vect_free_buffer(x,info) + implicit none + class(psb_z_vect_oacc), intent(inout) :: x + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf) + call x%psb_z_base_vect_type%free_buffer(info) + + end subroutine z_oacc_vect_free_buffer + function z_oacc_get_size(x) result(res) implicit none class(psb_z_vect_oacc), intent(inout) :: x From 096bce08c138df418648aef5fd5abeef1364c455 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Wed, 11 Sep 2024 12:43:46 +0200 Subject: [PATCH 38/39] Merged changes from V4 OpenACC --- openacc/psb_c_oacc_csr_mat_mod.F90 | 40 ++++++++++++++------- openacc/psb_c_oacc_ell_mat_mod.F90 | 48 ++++++++++++++++--------- openacc/psb_c_oacc_hll_mat_mod.F90 | 56 +++++++++++++++++++----------- openacc/psb_d_oacc_csr_mat_mod.F90 | 40 ++++++++++++++------- openacc/psb_d_oacc_ell_mat_mod.F90 | 48 ++++++++++++++++--------- openacc/psb_d_oacc_hll_mat_mod.F90 | 56 +++++++++++++++++++----------- openacc/psb_s_oacc_csr_mat_mod.F90 | 40 ++++++++++++++------- openacc/psb_s_oacc_ell_mat_mod.F90 | 48 ++++++++++++++++--------- openacc/psb_s_oacc_hll_mat_mod.F90 | 56 +++++++++++++++++++----------- openacc/psb_z_oacc_csr_mat_mod.F90 | 40 ++++++++++++++------- openacc/psb_z_oacc_ell_mat_mod.F90 | 48 ++++++++++++++++--------- openacc/psb_z_oacc_hll_mat_mod.F90 | 56 +++++++++++++++++++----------- 12 files changed, 376 insertions(+), 200 deletions(-) diff --git a/openacc/psb_c_oacc_csr_mat_mod.F90 b/openacc/psb_c_oacc_csr_mat_mod.F90 index 59794335..94edc5e9 100644 --- a/openacc/psb_c_oacc_csr_mat_mod.F90 +++ b/openacc/psb_c_oacc_csr_mat_mod.F90 @@ -139,9 +139,14 @@ contains class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - if (allocated(a%val)) call acc_delete_finalize(a%val) - if (allocated(a%ja)) call acc_delete_finalize(a%ja) - if (allocated(a%irp)) call acc_delete_finalize(a%irp) + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_delete_finalize(a%val) + if (psb_size(a%ja)>0) call acc_delete_finalize(a%ja) + if (psb_size(a%irp)>0) call acc_delete_finalize(a%irp) return end subroutine c_oacc_csr_free_dev_space @@ -246,9 +251,15 @@ contains subroutine c_oacc_csr_sync_dev_space(a) implicit none class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a - if (allocated(a%val)) call acc_create(a%val) - if (allocated(a%ja)) call acc_create(a%ja) - if (allocated(a%irp)) call acc_create(a%irp) + + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_create(a%val) + if (psb_size(a%ja)>0) call acc_create(a%ja) + if (psb_size(a%irp)>0) call acc_create(a%irp) end subroutine c_oacc_csr_sync_dev_space subroutine c_oacc_csr_sync(a) @@ -258,14 +269,19 @@ contains integer(psb_ipk_) :: info tmpa => a + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! if (a%is_dev()) then - call acc_update_self(a%val) - call acc_update_self(a%ja) - call acc_update_self(a%irp) + if (psb_size(a%val)>0) call acc_update_self(a%val) + if (psb_size(a%ja)>0) call acc_update_self(a%ja) + if (psb_size(a%irp)>0) call acc_update_self(a%irp) else if (a%is_host()) then - call acc_update_device(a%val) - call acc_update_device(a%ja) - call acc_update_device(a%irp) + if (psb_size(a%val)>0) call acc_update_device(a%val) + if (psb_size(a%ja)>0) call acc_update_device(a%ja) + if (psb_size(a%irp)>0) call acc_update_device(a%irp) end if call tmpa%set_sync() end subroutine c_oacc_csr_sync diff --git a/openacc/psb_c_oacc_ell_mat_mod.F90 b/openacc/psb_c_oacc_ell_mat_mod.F90 index f0559d26..f0b9779b 100644 --- a/openacc/psb_c_oacc_ell_mat_mod.F90 +++ b/openacc/psb_c_oacc_ell_mat_mod.F90 @@ -138,11 +138,15 @@ contains class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - if (allocated(a%val)) call acc_delete_finalize(a%val) - if (allocated(a%ja)) call acc_delete_finalize(a%ja) - if (allocated(a%irn)) call acc_delete_finalize(a%irn) - if (allocated(a%idiag)) call acc_delete_finalize(a%idiag) - + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_delete_finalize(a%val) + if (psb_size(a%ja)>0) call acc_delete_finalize(a%ja) + if (psb_size(a%irn)>0) call acc_delete_finalize(a%irn) + if (psb_size(a%idiag)>0) call acc_delete_finalize(a%idiag) return end subroutine c_oacc_ell_free_dev_space @@ -177,10 +181,15 @@ contains implicit none class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a - if (allocated(a%val)) call acc_create(a%val) - if (allocated(a%ja)) call acc_create(a%ja) - if (allocated(a%irn)) call acc_create(a%irn) - if (allocated(a%idiag)) call acc_create(a%idiag) + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_create(a%val) + if (psb_size(a%ja)>0) call acc_create(a%ja) + if (psb_size(a%irn)>0) call acc_create(a%irn) + if (psb_size(a%idiag)>0) call acc_create(a%idiag) end subroutine c_oacc_ell_sync_dev_space function c_oacc_ell_is_host(a) result(res) @@ -241,16 +250,21 @@ contains integer(psb_ipk_) :: info tmpa => a + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! if (a%is_dev()) then - call acc_update_self(a%val) - call acc_update_self(a%ja) - call acc_update_self(a%irn) - call acc_update_self(a%idiag) + if (psb_size(a%val)>0) call acc_update_self(a%val) + if (psb_size(a%ja)>0) call acc_update_self(a%ja) + if (psb_size(a%irn)>0) call acc_update_self(a%irn) + if (psb_size(a%idiag)>0) call acc_update_self(a%idiag) else if (a%is_host()) then - call acc_update_device(a%val) - call acc_update_device(a%ja) - call acc_update_device(a%irn) - call acc_update_device(a%idiag) + if (psb_size(a%val)>0) call acc_update_device(a%val) + if (psb_size(a%ja)>0) call acc_update_device(a%ja) + if (psb_size(a%irn)>0) call acc_update_device(a%irn) + if (psb_size(a%idiag)>0) call acc_update_device(a%idiag) end if call tmpa%set_sync() end subroutine c_oacc_ell_sync diff --git a/openacc/psb_c_oacc_hll_mat_mod.F90 b/openacc/psb_c_oacc_hll_mat_mod.F90 index 93436224..98c6a2ee 100644 --- a/openacc/psb_c_oacc_hll_mat_mod.F90 +++ b/openacc/psb_c_oacc_hll_mat_mod.F90 @@ -138,12 +138,16 @@ contains class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - if (allocated(a%val)) call acc_delete_finalize(a%val) - if (allocated(a%ja)) call acc_delete_finalize(a%ja) - if (allocated(a%irn)) call acc_delete_finalize(a%irn) - if (allocated(a%idiag)) call acc_delete_finalize(a%idiag) - if (allocated(a%hkoffs)) call acc_delete_finalize(a%hkoffs) - + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_delete_finalize(a%val) + if (psb_size(a%ja)>0) call acc_delete_finalize(a%ja) + if (psb_size(a%irn)>0) call acc_delete_finalize(a%irn) + if (psb_size(a%idiag)>0) call acc_delete_finalize(a%idiag) + if (psb_size(a%hkoffs)>0) call acc_delete_finalize(a%hkoffs) return end subroutine c_oacc_hll_free_dev_space @@ -231,11 +235,16 @@ contains implicit none class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a - if (allocated(a%val)) call acc_create(a%val) - if (allocated(a%ja)) call acc_create(a%ja) - if (allocated(a%irn)) call acc_create(a%irn) - if (allocated(a%idiag)) call acc_create(a%idiag) - if (allocated(a%hkoffs)) call acc_create(a%hkoffs) + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_create(a%val) + if (psb_size(a%ja)>0) call acc_create(a%ja) + if (psb_size(a%irn)>0) call acc_create(a%irn) + if (psb_size(a%idiag)>0) call acc_create(a%idiag) + if (psb_size(a%hkoffs)>0) call acc_create(a%hkoffs) end subroutine c_oacc_hll_sync_dev_space @@ -246,18 +255,23 @@ contains integer(psb_ipk_) :: info tmpa => a + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! if (a%is_dev()) then - call acc_update_self(a%val) - call acc_update_self(a%ja) - call acc_update_self(a%irn) - call acc_update_self(a%idiag) - call acc_update_self(a%hkoffs) + if (psb_size(a%val)>0) call acc_update_self(a%val) + if (psb_size(a%ja)>0) call acc_update_self(a%ja) + if (psb_size(a%irn)>0) call acc_update_self(a%irn) + if (psb_size(a%idiag)>0) call acc_update_self(a%idiag) + if (psb_size(a%hkoffs)>0) call acc_update_self(a%hkoffs) else if (a%is_host()) then - call acc_update_device(a%val) - call acc_update_device(a%ja) - call acc_update_device(a%irn) - call acc_update_device(a%idiag) - call acc_update_device(a%hkoffs) + if (psb_size(a%val)>0) call acc_update_device(a%val) + if (psb_size(a%ja)>0) call acc_update_device(a%ja) + if (psb_size(a%irn)>0) call acc_update_device(a%irn) + if (psb_size(a%idiag)>0) call acc_update_device(a%idiag) + if (psb_size(a%hkoffs)>0) call acc_update_device(a%hkoffs) end if call tmpa%set_sync() end subroutine c_oacc_hll_sync diff --git a/openacc/psb_d_oacc_csr_mat_mod.F90 b/openacc/psb_d_oacc_csr_mat_mod.F90 index 08b75575..a3119b64 100644 --- a/openacc/psb_d_oacc_csr_mat_mod.F90 +++ b/openacc/psb_d_oacc_csr_mat_mod.F90 @@ -139,9 +139,14 @@ contains class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - if (allocated(a%val)) call acc_delete_finalize(a%val) - if (allocated(a%ja)) call acc_delete_finalize(a%ja) - if (allocated(a%irp)) call acc_delete_finalize(a%irp) + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_delete_finalize(a%val) + if (psb_size(a%ja)>0) call acc_delete_finalize(a%ja) + if (psb_size(a%irp)>0) call acc_delete_finalize(a%irp) return end subroutine d_oacc_csr_free_dev_space @@ -246,9 +251,15 @@ contains subroutine d_oacc_csr_sync_dev_space(a) implicit none class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a - if (allocated(a%val)) call acc_create(a%val) - if (allocated(a%ja)) call acc_create(a%ja) - if (allocated(a%irp)) call acc_create(a%irp) + + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_create(a%val) + if (psb_size(a%ja)>0) call acc_create(a%ja) + if (psb_size(a%irp)>0) call acc_create(a%irp) end subroutine d_oacc_csr_sync_dev_space subroutine d_oacc_csr_sync(a) @@ -258,14 +269,19 @@ contains integer(psb_ipk_) :: info tmpa => a + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! if (a%is_dev()) then - call acc_update_self(a%val) - call acc_update_self(a%ja) - call acc_update_self(a%irp) + if (psb_size(a%val)>0) call acc_update_self(a%val) + if (psb_size(a%ja)>0) call acc_update_self(a%ja) + if (psb_size(a%irp)>0) call acc_update_self(a%irp) else if (a%is_host()) then - call acc_update_device(a%val) - call acc_update_device(a%ja) - call acc_update_device(a%irp) + if (psb_size(a%val)>0) call acc_update_device(a%val) + if (psb_size(a%ja)>0) call acc_update_device(a%ja) + if (psb_size(a%irp)>0) call acc_update_device(a%irp) end if call tmpa%set_sync() end subroutine d_oacc_csr_sync diff --git a/openacc/psb_d_oacc_ell_mat_mod.F90 b/openacc/psb_d_oacc_ell_mat_mod.F90 index 3e25f576..3932e286 100644 --- a/openacc/psb_d_oacc_ell_mat_mod.F90 +++ b/openacc/psb_d_oacc_ell_mat_mod.F90 @@ -138,11 +138,15 @@ contains class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - if (allocated(a%val)) call acc_delete_finalize(a%val) - if (allocated(a%ja)) call acc_delete_finalize(a%ja) - if (allocated(a%irn)) call acc_delete_finalize(a%irn) - if (allocated(a%idiag)) call acc_delete_finalize(a%idiag) - + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_delete_finalize(a%val) + if (psb_size(a%ja)>0) call acc_delete_finalize(a%ja) + if (psb_size(a%irn)>0) call acc_delete_finalize(a%irn) + if (psb_size(a%idiag)>0) call acc_delete_finalize(a%idiag) return end subroutine d_oacc_ell_free_dev_space @@ -177,10 +181,15 @@ contains implicit none class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a - if (allocated(a%val)) call acc_create(a%val) - if (allocated(a%ja)) call acc_create(a%ja) - if (allocated(a%irn)) call acc_create(a%irn) - if (allocated(a%idiag)) call acc_create(a%idiag) + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_create(a%val) + if (psb_size(a%ja)>0) call acc_create(a%ja) + if (psb_size(a%irn)>0) call acc_create(a%irn) + if (psb_size(a%idiag)>0) call acc_create(a%idiag) end subroutine d_oacc_ell_sync_dev_space function d_oacc_ell_is_host(a) result(res) @@ -241,16 +250,21 @@ contains integer(psb_ipk_) :: info tmpa => a + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! if (a%is_dev()) then - call acc_update_self(a%val) - call acc_update_self(a%ja) - call acc_update_self(a%irn) - call acc_update_self(a%idiag) + if (psb_size(a%val)>0) call acc_update_self(a%val) + if (psb_size(a%ja)>0) call acc_update_self(a%ja) + if (psb_size(a%irn)>0) call acc_update_self(a%irn) + if (psb_size(a%idiag)>0) call acc_update_self(a%idiag) else if (a%is_host()) then - call acc_update_device(a%val) - call acc_update_device(a%ja) - call acc_update_device(a%irn) - call acc_update_device(a%idiag) + if (psb_size(a%val)>0) call acc_update_device(a%val) + if (psb_size(a%ja)>0) call acc_update_device(a%ja) + if (psb_size(a%irn)>0) call acc_update_device(a%irn) + if (psb_size(a%idiag)>0) call acc_update_device(a%idiag) end if call tmpa%set_sync() end subroutine d_oacc_ell_sync diff --git a/openacc/psb_d_oacc_hll_mat_mod.F90 b/openacc/psb_d_oacc_hll_mat_mod.F90 index 084987e5..fd0fe54f 100644 --- a/openacc/psb_d_oacc_hll_mat_mod.F90 +++ b/openacc/psb_d_oacc_hll_mat_mod.F90 @@ -138,12 +138,16 @@ contains class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - if (allocated(a%val)) call acc_delete_finalize(a%val) - if (allocated(a%ja)) call acc_delete_finalize(a%ja) - if (allocated(a%irn)) call acc_delete_finalize(a%irn) - if (allocated(a%idiag)) call acc_delete_finalize(a%idiag) - if (allocated(a%hkoffs)) call acc_delete_finalize(a%hkoffs) - + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_delete_finalize(a%val) + if (psb_size(a%ja)>0) call acc_delete_finalize(a%ja) + if (psb_size(a%irn)>0) call acc_delete_finalize(a%irn) + if (psb_size(a%idiag)>0) call acc_delete_finalize(a%idiag) + if (psb_size(a%hkoffs)>0) call acc_delete_finalize(a%hkoffs) return end subroutine d_oacc_hll_free_dev_space @@ -231,11 +235,16 @@ contains implicit none class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a - if (allocated(a%val)) call acc_create(a%val) - if (allocated(a%ja)) call acc_create(a%ja) - if (allocated(a%irn)) call acc_create(a%irn) - if (allocated(a%idiag)) call acc_create(a%idiag) - if (allocated(a%hkoffs)) call acc_create(a%hkoffs) + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_create(a%val) + if (psb_size(a%ja)>0) call acc_create(a%ja) + if (psb_size(a%irn)>0) call acc_create(a%irn) + if (psb_size(a%idiag)>0) call acc_create(a%idiag) + if (psb_size(a%hkoffs)>0) call acc_create(a%hkoffs) end subroutine d_oacc_hll_sync_dev_space @@ -246,18 +255,23 @@ contains integer(psb_ipk_) :: info tmpa => a + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! if (a%is_dev()) then - call acc_update_self(a%val) - call acc_update_self(a%ja) - call acc_update_self(a%irn) - call acc_update_self(a%idiag) - call acc_update_self(a%hkoffs) + if (psb_size(a%val)>0) call acc_update_self(a%val) + if (psb_size(a%ja)>0) call acc_update_self(a%ja) + if (psb_size(a%irn)>0) call acc_update_self(a%irn) + if (psb_size(a%idiag)>0) call acc_update_self(a%idiag) + if (psb_size(a%hkoffs)>0) call acc_update_self(a%hkoffs) else if (a%is_host()) then - call acc_update_device(a%val) - call acc_update_device(a%ja) - call acc_update_device(a%irn) - call acc_update_device(a%idiag) - call acc_update_device(a%hkoffs) + if (psb_size(a%val)>0) call acc_update_device(a%val) + if (psb_size(a%ja)>0) call acc_update_device(a%ja) + if (psb_size(a%irn)>0) call acc_update_device(a%irn) + if (psb_size(a%idiag)>0) call acc_update_device(a%idiag) + if (psb_size(a%hkoffs)>0) call acc_update_device(a%hkoffs) end if call tmpa%set_sync() end subroutine d_oacc_hll_sync diff --git a/openacc/psb_s_oacc_csr_mat_mod.F90 b/openacc/psb_s_oacc_csr_mat_mod.F90 index 16668d25..5eaf80f7 100644 --- a/openacc/psb_s_oacc_csr_mat_mod.F90 +++ b/openacc/psb_s_oacc_csr_mat_mod.F90 @@ -139,9 +139,14 @@ contains class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - if (allocated(a%val)) call acc_delete_finalize(a%val) - if (allocated(a%ja)) call acc_delete_finalize(a%ja) - if (allocated(a%irp)) call acc_delete_finalize(a%irp) + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_delete_finalize(a%val) + if (psb_size(a%ja)>0) call acc_delete_finalize(a%ja) + if (psb_size(a%irp)>0) call acc_delete_finalize(a%irp) return end subroutine s_oacc_csr_free_dev_space @@ -246,9 +251,15 @@ contains subroutine s_oacc_csr_sync_dev_space(a) implicit none class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a - if (allocated(a%val)) call acc_create(a%val) - if (allocated(a%ja)) call acc_create(a%ja) - if (allocated(a%irp)) call acc_create(a%irp) + + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_create(a%val) + if (psb_size(a%ja)>0) call acc_create(a%ja) + if (psb_size(a%irp)>0) call acc_create(a%irp) end subroutine s_oacc_csr_sync_dev_space subroutine s_oacc_csr_sync(a) @@ -258,14 +269,19 @@ contains integer(psb_ipk_) :: info tmpa => a + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! if (a%is_dev()) then - call acc_update_self(a%val) - call acc_update_self(a%ja) - call acc_update_self(a%irp) + if (psb_size(a%val)>0) call acc_update_self(a%val) + if (psb_size(a%ja)>0) call acc_update_self(a%ja) + if (psb_size(a%irp)>0) call acc_update_self(a%irp) else if (a%is_host()) then - call acc_update_device(a%val) - call acc_update_device(a%ja) - call acc_update_device(a%irp) + if (psb_size(a%val)>0) call acc_update_device(a%val) + if (psb_size(a%ja)>0) call acc_update_device(a%ja) + if (psb_size(a%irp)>0) call acc_update_device(a%irp) end if call tmpa%set_sync() end subroutine s_oacc_csr_sync diff --git a/openacc/psb_s_oacc_ell_mat_mod.F90 b/openacc/psb_s_oacc_ell_mat_mod.F90 index dcfc1850..56775879 100644 --- a/openacc/psb_s_oacc_ell_mat_mod.F90 +++ b/openacc/psb_s_oacc_ell_mat_mod.F90 @@ -138,11 +138,15 @@ contains class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - if (allocated(a%val)) call acc_delete_finalize(a%val) - if (allocated(a%ja)) call acc_delete_finalize(a%ja) - if (allocated(a%irn)) call acc_delete_finalize(a%irn) - if (allocated(a%idiag)) call acc_delete_finalize(a%idiag) - + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_delete_finalize(a%val) + if (psb_size(a%ja)>0) call acc_delete_finalize(a%ja) + if (psb_size(a%irn)>0) call acc_delete_finalize(a%irn) + if (psb_size(a%idiag)>0) call acc_delete_finalize(a%idiag) return end subroutine s_oacc_ell_free_dev_space @@ -177,10 +181,15 @@ contains implicit none class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a - if (allocated(a%val)) call acc_create(a%val) - if (allocated(a%ja)) call acc_create(a%ja) - if (allocated(a%irn)) call acc_create(a%irn) - if (allocated(a%idiag)) call acc_create(a%idiag) + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_create(a%val) + if (psb_size(a%ja)>0) call acc_create(a%ja) + if (psb_size(a%irn)>0) call acc_create(a%irn) + if (psb_size(a%idiag)>0) call acc_create(a%idiag) end subroutine s_oacc_ell_sync_dev_space function s_oacc_ell_is_host(a) result(res) @@ -241,16 +250,21 @@ contains integer(psb_ipk_) :: info tmpa => a + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! if (a%is_dev()) then - call acc_update_self(a%val) - call acc_update_self(a%ja) - call acc_update_self(a%irn) - call acc_update_self(a%idiag) + if (psb_size(a%val)>0) call acc_update_self(a%val) + if (psb_size(a%ja)>0) call acc_update_self(a%ja) + if (psb_size(a%irn)>0) call acc_update_self(a%irn) + if (psb_size(a%idiag)>0) call acc_update_self(a%idiag) else if (a%is_host()) then - call acc_update_device(a%val) - call acc_update_device(a%ja) - call acc_update_device(a%irn) - call acc_update_device(a%idiag) + if (psb_size(a%val)>0) call acc_update_device(a%val) + if (psb_size(a%ja)>0) call acc_update_device(a%ja) + if (psb_size(a%irn)>0) call acc_update_device(a%irn) + if (psb_size(a%idiag)>0) call acc_update_device(a%idiag) end if call tmpa%set_sync() end subroutine s_oacc_ell_sync diff --git a/openacc/psb_s_oacc_hll_mat_mod.F90 b/openacc/psb_s_oacc_hll_mat_mod.F90 index 0e7362f2..997433a1 100644 --- a/openacc/psb_s_oacc_hll_mat_mod.F90 +++ b/openacc/psb_s_oacc_hll_mat_mod.F90 @@ -138,12 +138,16 @@ contains class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - if (allocated(a%val)) call acc_delete_finalize(a%val) - if (allocated(a%ja)) call acc_delete_finalize(a%ja) - if (allocated(a%irn)) call acc_delete_finalize(a%irn) - if (allocated(a%idiag)) call acc_delete_finalize(a%idiag) - if (allocated(a%hkoffs)) call acc_delete_finalize(a%hkoffs) - + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_delete_finalize(a%val) + if (psb_size(a%ja)>0) call acc_delete_finalize(a%ja) + if (psb_size(a%irn)>0) call acc_delete_finalize(a%irn) + if (psb_size(a%idiag)>0) call acc_delete_finalize(a%idiag) + if (psb_size(a%hkoffs)>0) call acc_delete_finalize(a%hkoffs) return end subroutine s_oacc_hll_free_dev_space @@ -231,11 +235,16 @@ contains implicit none class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a - if (allocated(a%val)) call acc_create(a%val) - if (allocated(a%ja)) call acc_create(a%ja) - if (allocated(a%irn)) call acc_create(a%irn) - if (allocated(a%idiag)) call acc_create(a%idiag) - if (allocated(a%hkoffs)) call acc_create(a%hkoffs) + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_create(a%val) + if (psb_size(a%ja)>0) call acc_create(a%ja) + if (psb_size(a%irn)>0) call acc_create(a%irn) + if (psb_size(a%idiag)>0) call acc_create(a%idiag) + if (psb_size(a%hkoffs)>0) call acc_create(a%hkoffs) end subroutine s_oacc_hll_sync_dev_space @@ -246,18 +255,23 @@ contains integer(psb_ipk_) :: info tmpa => a + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! if (a%is_dev()) then - call acc_update_self(a%val) - call acc_update_self(a%ja) - call acc_update_self(a%irn) - call acc_update_self(a%idiag) - call acc_update_self(a%hkoffs) + if (psb_size(a%val)>0) call acc_update_self(a%val) + if (psb_size(a%ja)>0) call acc_update_self(a%ja) + if (psb_size(a%irn)>0) call acc_update_self(a%irn) + if (psb_size(a%idiag)>0) call acc_update_self(a%idiag) + if (psb_size(a%hkoffs)>0) call acc_update_self(a%hkoffs) else if (a%is_host()) then - call acc_update_device(a%val) - call acc_update_device(a%ja) - call acc_update_device(a%irn) - call acc_update_device(a%idiag) - call acc_update_device(a%hkoffs) + if (psb_size(a%val)>0) call acc_update_device(a%val) + if (psb_size(a%ja)>0) call acc_update_device(a%ja) + if (psb_size(a%irn)>0) call acc_update_device(a%irn) + if (psb_size(a%idiag)>0) call acc_update_device(a%idiag) + if (psb_size(a%hkoffs)>0) call acc_update_device(a%hkoffs) end if call tmpa%set_sync() end subroutine s_oacc_hll_sync diff --git a/openacc/psb_z_oacc_csr_mat_mod.F90 b/openacc/psb_z_oacc_csr_mat_mod.F90 index fdb59b4b..ed92373d 100644 --- a/openacc/psb_z_oacc_csr_mat_mod.F90 +++ b/openacc/psb_z_oacc_csr_mat_mod.F90 @@ -139,9 +139,14 @@ contains class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - if (allocated(a%val)) call acc_delete_finalize(a%val) - if (allocated(a%ja)) call acc_delete_finalize(a%ja) - if (allocated(a%irp)) call acc_delete_finalize(a%irp) + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_delete_finalize(a%val) + if (psb_size(a%ja)>0) call acc_delete_finalize(a%ja) + if (psb_size(a%irp)>0) call acc_delete_finalize(a%irp) return end subroutine z_oacc_csr_free_dev_space @@ -246,9 +251,15 @@ contains subroutine z_oacc_csr_sync_dev_space(a) implicit none class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a - if (allocated(a%val)) call acc_create(a%val) - if (allocated(a%ja)) call acc_create(a%ja) - if (allocated(a%irp)) call acc_create(a%irp) + + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_create(a%val) + if (psb_size(a%ja)>0) call acc_create(a%ja) + if (psb_size(a%irp)>0) call acc_create(a%irp) end subroutine z_oacc_csr_sync_dev_space subroutine z_oacc_csr_sync(a) @@ -258,14 +269,19 @@ contains integer(psb_ipk_) :: info tmpa => a + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! if (a%is_dev()) then - call acc_update_self(a%val) - call acc_update_self(a%ja) - call acc_update_self(a%irp) + if (psb_size(a%val)>0) call acc_update_self(a%val) + if (psb_size(a%ja)>0) call acc_update_self(a%ja) + if (psb_size(a%irp)>0) call acc_update_self(a%irp) else if (a%is_host()) then - call acc_update_device(a%val) - call acc_update_device(a%ja) - call acc_update_device(a%irp) + if (psb_size(a%val)>0) call acc_update_device(a%val) + if (psb_size(a%ja)>0) call acc_update_device(a%ja) + if (psb_size(a%irp)>0) call acc_update_device(a%irp) end if call tmpa%set_sync() end subroutine z_oacc_csr_sync diff --git a/openacc/psb_z_oacc_ell_mat_mod.F90 b/openacc/psb_z_oacc_ell_mat_mod.F90 index 9d09d43d..d494922f 100644 --- a/openacc/psb_z_oacc_ell_mat_mod.F90 +++ b/openacc/psb_z_oacc_ell_mat_mod.F90 @@ -138,11 +138,15 @@ contains class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - if (allocated(a%val)) call acc_delete_finalize(a%val) - if (allocated(a%ja)) call acc_delete_finalize(a%ja) - if (allocated(a%irn)) call acc_delete_finalize(a%irn) - if (allocated(a%idiag)) call acc_delete_finalize(a%idiag) - + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_delete_finalize(a%val) + if (psb_size(a%ja)>0) call acc_delete_finalize(a%ja) + if (psb_size(a%irn)>0) call acc_delete_finalize(a%irn) + if (psb_size(a%idiag)>0) call acc_delete_finalize(a%idiag) return end subroutine z_oacc_ell_free_dev_space @@ -177,10 +181,15 @@ contains implicit none class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a - if (allocated(a%val)) call acc_create(a%val) - if (allocated(a%ja)) call acc_create(a%ja) - if (allocated(a%irn)) call acc_create(a%irn) - if (allocated(a%idiag)) call acc_create(a%idiag) + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_create(a%val) + if (psb_size(a%ja)>0) call acc_create(a%ja) + if (psb_size(a%irn)>0) call acc_create(a%irn) + if (psb_size(a%idiag)>0) call acc_create(a%idiag) end subroutine z_oacc_ell_sync_dev_space function z_oacc_ell_is_host(a) result(res) @@ -241,16 +250,21 @@ contains integer(psb_ipk_) :: info tmpa => a + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! if (a%is_dev()) then - call acc_update_self(a%val) - call acc_update_self(a%ja) - call acc_update_self(a%irn) - call acc_update_self(a%idiag) + if (psb_size(a%val)>0) call acc_update_self(a%val) + if (psb_size(a%ja)>0) call acc_update_self(a%ja) + if (psb_size(a%irn)>0) call acc_update_self(a%irn) + if (psb_size(a%idiag)>0) call acc_update_self(a%idiag) else if (a%is_host()) then - call acc_update_device(a%val) - call acc_update_device(a%ja) - call acc_update_device(a%irn) - call acc_update_device(a%idiag) + if (psb_size(a%val)>0) call acc_update_device(a%val) + if (psb_size(a%ja)>0) call acc_update_device(a%ja) + if (psb_size(a%irn)>0) call acc_update_device(a%irn) + if (psb_size(a%idiag)>0) call acc_update_device(a%idiag) end if call tmpa%set_sync() end subroutine z_oacc_ell_sync diff --git a/openacc/psb_z_oacc_hll_mat_mod.F90 b/openacc/psb_z_oacc_hll_mat_mod.F90 index 9eb7d08c..07739348 100644 --- a/openacc/psb_z_oacc_hll_mat_mod.F90 +++ b/openacc/psb_z_oacc_hll_mat_mod.F90 @@ -138,12 +138,16 @@ contains class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a integer(psb_ipk_) :: info - if (allocated(a%val)) call acc_delete_finalize(a%val) - if (allocated(a%ja)) call acc_delete_finalize(a%ja) - if (allocated(a%irn)) call acc_delete_finalize(a%irn) - if (allocated(a%idiag)) call acc_delete_finalize(a%idiag) - if (allocated(a%hkoffs)) call acc_delete_finalize(a%hkoffs) - + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_delete_finalize(a%val) + if (psb_size(a%ja)>0) call acc_delete_finalize(a%ja) + if (psb_size(a%irn)>0) call acc_delete_finalize(a%irn) + if (psb_size(a%idiag)>0) call acc_delete_finalize(a%idiag) + if (psb_size(a%hkoffs)>0) call acc_delete_finalize(a%hkoffs) return end subroutine z_oacc_hll_free_dev_space @@ -231,11 +235,16 @@ contains implicit none class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a - if (allocated(a%val)) call acc_create(a%val) - if (allocated(a%ja)) call acc_create(a%ja) - if (allocated(a%irn)) call acc_create(a%irn) - if (allocated(a%idiag)) call acc_create(a%idiag) - if (allocated(a%hkoffs)) call acc_create(a%hkoffs) + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! + if (psb_size(a%val)>0) call acc_create(a%val) + if (psb_size(a%ja)>0) call acc_create(a%ja) + if (psb_size(a%irn)>0) call acc_create(a%irn) + if (psb_size(a%idiag)>0) call acc_create(a%idiag) + if (psb_size(a%hkoffs)>0) call acc_create(a%hkoffs) end subroutine z_oacc_hll_sync_dev_space @@ -246,18 +255,23 @@ contains integer(psb_ipk_) :: info tmpa => a + ! + ! Note: at least on GNU, if an array is allocated + ! but with size 0, then CREATE,UPDATE and DELETE + ! will fail + ! if (a%is_dev()) then - call acc_update_self(a%val) - call acc_update_self(a%ja) - call acc_update_self(a%irn) - call acc_update_self(a%idiag) - call acc_update_self(a%hkoffs) + if (psb_size(a%val)>0) call acc_update_self(a%val) + if (psb_size(a%ja)>0) call acc_update_self(a%ja) + if (psb_size(a%irn)>0) call acc_update_self(a%irn) + if (psb_size(a%idiag)>0) call acc_update_self(a%idiag) + if (psb_size(a%hkoffs)>0) call acc_update_self(a%hkoffs) else if (a%is_host()) then - call acc_update_device(a%val) - call acc_update_device(a%ja) - call acc_update_device(a%irn) - call acc_update_device(a%idiag) - call acc_update_device(a%hkoffs) + if (psb_size(a%val)>0) call acc_update_device(a%val) + if (psb_size(a%ja)>0) call acc_update_device(a%ja) + if (psb_size(a%irn)>0) call acc_update_device(a%irn) + if (psb_size(a%idiag)>0) call acc_update_device(a%idiag) + if (psb_size(a%hkoffs)>0) call acc_update_device(a%hkoffs) end if call tmpa%set_sync() end subroutine z_oacc_hll_sync From e5504ddddc39bb4d2f6dbf1a1fcc6bcb6af58653 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Thu, 12 Sep 2024 15:08:21 +0200 Subject: [PATCH 39/39] Fix memory traffic in GTH/SCT --- openacc/psb_c_oacc_vect_mod.F90 | 7 ++++--- openacc/psb_d_oacc_vect_mod.F90 | 7 ++++--- openacc/psb_i_oacc_vect_mod.F90 | 7 ++++--- openacc/psb_l_oacc_vect_mod.F90 | 7 ++++--- openacc/psb_s_oacc_vect_mod.F90 | 7 ++++--- openacc/psb_z_oacc_vect_mod.F90 | 7 ++++--- 6 files changed, 24 insertions(+), 18 deletions(-) diff --git a/openacc/psb_c_oacc_vect_mod.F90 b/openacc/psb_c_oacc_vect_mod.F90 index 4e2cca9e..e479f58d 100644 --- a/openacc/psb_c_oacc_vect_mod.F90 +++ b/openacc/psb_c_oacc_vect_mod.F90 @@ -425,7 +425,6 @@ contains class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() - !$acc update device(y%combuf) call inner_sctb(n,y%combuf(i:i+n-1),beta,y%v,ii%v(i:i+n-1)) call y%set_dev() acc_done = .true. @@ -444,6 +443,7 @@ contains integer(psb_ipk_) :: n, idx(:) complex(psb_spk_) :: beta,x(:), y(:) integer(psb_ipk_) :: k + !$acc update device(x(1:n)) async !$acc parallel loop do k = 1, n y(idx(k)) = x(k) + beta *y(idx(k)) @@ -488,6 +488,7 @@ contains integer(psb_ipk_) :: n, idx(:) complex(psb_spk_) :: beta, x(:), y(:) integer(psb_ipk_) :: k + !$acc update device(x(1:n)) async !$acc parallel loop do k = 1, n y(idx(k)) = x(k) + beta *y(idx(k)) @@ -561,7 +562,7 @@ contains y(k) = x(idx(k)) end do !$acc end parallel loop - !$acc update self(y) + !$acc update self(y(1:n)) async end subroutine inner_gth end subroutine c_oacc_gthzbuf @@ -605,7 +606,7 @@ contains y(k) = x(idx(k)) end do !$acc end parallel loop - !$acc update self(y) + !$acc update self(y(1:n)) async end subroutine inner_gth end subroutine c_oacc_gthzv_x diff --git a/openacc/psb_d_oacc_vect_mod.F90 b/openacc/psb_d_oacc_vect_mod.F90 index 80ac35f7..7fd2a441 100644 --- a/openacc/psb_d_oacc_vect_mod.F90 +++ b/openacc/psb_d_oacc_vect_mod.F90 @@ -425,7 +425,6 @@ contains class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() - !$acc update device(y%combuf) call inner_sctb(n,y%combuf(i:i+n-1),beta,y%v,ii%v(i:i+n-1)) call y%set_dev() acc_done = .true. @@ -444,6 +443,7 @@ contains integer(psb_ipk_) :: n, idx(:) real(psb_dpk_) :: beta,x(:), y(:) integer(psb_ipk_) :: k + !$acc update device(x(1:n)) async !$acc parallel loop do k = 1, n y(idx(k)) = x(k) + beta *y(idx(k)) @@ -488,6 +488,7 @@ contains integer(psb_ipk_) :: n, idx(:) real(psb_dpk_) :: beta, x(:), y(:) integer(psb_ipk_) :: k + !$acc update device(x(1:n)) async !$acc parallel loop do k = 1, n y(idx(k)) = x(k) + beta *y(idx(k)) @@ -561,7 +562,7 @@ contains y(k) = x(idx(k)) end do !$acc end parallel loop - !$acc update self(y) + !$acc update self(y(1:n)) async end subroutine inner_gth end subroutine d_oacc_gthzbuf @@ -605,7 +606,7 @@ contains y(k) = x(idx(k)) end do !$acc end parallel loop - !$acc update self(y) + !$acc update self(y(1:n)) async end subroutine inner_gth end subroutine d_oacc_gthzv_x diff --git a/openacc/psb_i_oacc_vect_mod.F90 b/openacc/psb_i_oacc_vect_mod.F90 index cfd0c210..455453a1 100644 --- a/openacc/psb_i_oacc_vect_mod.F90 +++ b/openacc/psb_i_oacc_vect_mod.F90 @@ -79,7 +79,6 @@ contains class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() - !$acc update device(y%combuf) call inner_sctb(n,y%combuf(i:i+n-1),beta,y%v,ii%v(i:i+n-1)) call y%set_dev() acc_done = .true. @@ -98,6 +97,7 @@ contains integer(psb_ipk_) :: n, idx(:) integer(psb_ipk_) :: beta,x(:), y(:) integer(psb_ipk_) :: k + !$acc update device(x(1:n)) async !$acc parallel loop do k = 1, n y(idx(k)) = x(k) + beta *y(idx(k)) @@ -142,6 +142,7 @@ contains integer(psb_ipk_) :: n, idx(:) integer(psb_ipk_) :: beta, x(:), y(:) integer(psb_ipk_) :: k + !$acc update device(x(1:n)) async !$acc parallel loop do k = 1, n y(idx(k)) = x(k) + beta *y(idx(k)) @@ -215,7 +216,7 @@ contains y(k) = x(idx(k)) end do !$acc end parallel loop - !$acc update self(y) + !$acc update self(y(1:n)) async end subroutine inner_gth end subroutine i_oacc_gthzbuf @@ -259,7 +260,7 @@ contains y(k) = x(idx(k)) end do !$acc end parallel loop - !$acc update self(y) + !$acc update self(y(1:n)) async end subroutine inner_gth end subroutine i_oacc_gthzv_x diff --git a/openacc/psb_l_oacc_vect_mod.F90 b/openacc/psb_l_oacc_vect_mod.F90 index 5526796f..d35e9141 100644 --- a/openacc/psb_l_oacc_vect_mod.F90 +++ b/openacc/psb_l_oacc_vect_mod.F90 @@ -81,7 +81,6 @@ contains class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() - !$acc update device(y%combuf) call inner_sctb(n,y%combuf(i:i+n-1),beta,y%v,ii%v(i:i+n-1)) call y%set_dev() acc_done = .true. @@ -100,6 +99,7 @@ contains integer(psb_ipk_) :: n, idx(:) integer(psb_lpk_) :: beta,x(:), y(:) integer(psb_ipk_) :: k + !$acc update device(x(1:n)) async !$acc parallel loop do k = 1, n y(idx(k)) = x(k) + beta *y(idx(k)) @@ -144,6 +144,7 @@ contains integer(psb_ipk_) :: n, idx(:) integer(psb_lpk_) :: beta, x(:), y(:) integer(psb_ipk_) :: k + !$acc update device(x(1:n)) async !$acc parallel loop do k = 1, n y(idx(k)) = x(k) + beta *y(idx(k)) @@ -217,7 +218,7 @@ contains y(k) = x(idx(k)) end do !$acc end parallel loop - !$acc update self(y) + !$acc update self(y(1:n)) async end subroutine inner_gth end subroutine l_oacc_gthzbuf @@ -261,7 +262,7 @@ contains y(k) = x(idx(k)) end do !$acc end parallel loop - !$acc update self(y) + !$acc update self(y(1:n)) async end subroutine inner_gth end subroutine l_oacc_gthzv_x diff --git a/openacc/psb_s_oacc_vect_mod.F90 b/openacc/psb_s_oacc_vect_mod.F90 index b80108ab..87eeccea 100644 --- a/openacc/psb_s_oacc_vect_mod.F90 +++ b/openacc/psb_s_oacc_vect_mod.F90 @@ -425,7 +425,6 @@ contains class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() - !$acc update device(y%combuf) call inner_sctb(n,y%combuf(i:i+n-1),beta,y%v,ii%v(i:i+n-1)) call y%set_dev() acc_done = .true. @@ -444,6 +443,7 @@ contains integer(psb_ipk_) :: n, idx(:) real(psb_spk_) :: beta,x(:), y(:) integer(psb_ipk_) :: k + !$acc update device(x(1:n)) async !$acc parallel loop do k = 1, n y(idx(k)) = x(k) + beta *y(idx(k)) @@ -488,6 +488,7 @@ contains integer(psb_ipk_) :: n, idx(:) real(psb_spk_) :: beta, x(:), y(:) integer(psb_ipk_) :: k + !$acc update device(x(1:n)) async !$acc parallel loop do k = 1, n y(idx(k)) = x(k) + beta *y(idx(k)) @@ -561,7 +562,7 @@ contains y(k) = x(idx(k)) end do !$acc end parallel loop - !$acc update self(y) + !$acc update self(y(1:n)) async end subroutine inner_gth end subroutine s_oacc_gthzbuf @@ -605,7 +606,7 @@ contains y(k) = x(idx(k)) end do !$acc end parallel loop - !$acc update self(y) + !$acc update self(y(1:n)) async end subroutine inner_gth end subroutine s_oacc_gthzv_x diff --git a/openacc/psb_z_oacc_vect_mod.F90 b/openacc/psb_z_oacc_vect_mod.F90 index 86107c31..0fe1adaa 100644 --- a/openacc/psb_z_oacc_vect_mod.F90 +++ b/openacc/psb_z_oacc_vect_mod.F90 @@ -425,7 +425,6 @@ contains class is (psb_i_vect_oacc) if (ii%is_host()) call ii%sync() if (y%is_host()) call y%sync() - !$acc update device(y%combuf) call inner_sctb(n,y%combuf(i:i+n-1),beta,y%v,ii%v(i:i+n-1)) call y%set_dev() acc_done = .true. @@ -444,6 +443,7 @@ contains integer(psb_ipk_) :: n, idx(:) complex(psb_dpk_) :: beta,x(:), y(:) integer(psb_ipk_) :: k + !$acc update device(x(1:n)) async !$acc parallel loop do k = 1, n y(idx(k)) = x(k) + beta *y(idx(k)) @@ -488,6 +488,7 @@ contains integer(psb_ipk_) :: n, idx(:) complex(psb_dpk_) :: beta, x(:), y(:) integer(psb_ipk_) :: k + !$acc update device(x(1:n)) async !$acc parallel loop do k = 1, n y(idx(k)) = x(k) + beta *y(idx(k)) @@ -561,7 +562,7 @@ contains y(k) = x(idx(k)) end do !$acc end parallel loop - !$acc update self(y) + !$acc update self(y(1:n)) async end subroutine inner_gth end subroutine z_oacc_gthzbuf @@ -605,7 +606,7 @@ contains y(k) = x(idx(k)) end do !$acc end parallel loop - !$acc update self(y) + !$acc update self(y(1:n)) async end subroutine inner_gth end subroutine z_oacc_gthzv_x