From 2b8671fba63ed5e4d6266c7a6c1a7b89ed914341 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Th=C3=A9ophane=20Loloum?=
Date: Mon, 8 Jul 2024 10:14:42 +0200
Subject: [PATCH 01/86] oacc : d_vect + i_vect + d_csr
---
openacc/Makefile | 48 ++
openacc/impl/psb_d_oacc_csr_vect_mv.F90 | 61 ++
openacc/psb_d_oacc_csr_mat_mod.F90 | 185 +++++
openacc/psb_d_oacc_vect_mod.F90 | 966 ++++++++++++++++++++++++
openacc/psb_i_oacc_vect_mod.F90 | 455 +++++++++++
openacc/psb_oacc_mod.F90 | 7 +
test/openacc/Makefile | 53 ++
test/openacc/test.F90 | 617 +++++++++++++++
test/openacc/timers.c | 97 +++
test/openacc/vectoacc.F90 | 85 +++
10 files changed, 2574 insertions(+)
create mode 100644 openacc/Makefile
create mode 100644 openacc/impl/psb_d_oacc_csr_vect_mv.F90
create mode 100644 openacc/psb_d_oacc_csr_mat_mod.F90
create mode 100644 openacc/psb_d_oacc_vect_mod.F90
create mode 100644 openacc/psb_i_oacc_vect_mod.F90
create mode 100644 openacc/psb_oacc_mod.F90
create mode 100644 test/openacc/Makefile
create mode 100644 test/openacc/test.F90
create mode 100644 test/openacc/timers.c
create mode 100644 test/openacc/vectoacc.F90
diff --git a/openacc/Makefile b/openacc/Makefile
new file mode 100644
index 00000000..3590249c
--- /dev/null
+++ b/openacc/Makefile
@@ -0,0 +1,48 @@
+.SUFFIXES:
+.SUFFIXES: .F90 .f90 .o .s .c
+
+# Compilers and flags
+CC=mpicc
+FC=mpif90
+FCOPT=-O0 -march=native
+OFFLOAD=-fopenacc -foffload=nvptx-none="-march=sm_70"
+
+# Directories
+LIBDIR=../lib
+INCDIR=../include
+MODDIR=../modules
+IMPLDIR=./impl # Adding the impl directory
+
+# Include and library paths
+INCLUDES=-I$(LIBDIR) -I$(INCDIR) -I$(MODDIR) -I$(IMPLDIR)
+LIBS=-L$(LIBDIR) -lpsb_util -lpsb_ext -lpsb_base -lopenblas -lmetis
+
+# Source files
+FOBJS= psb_i_oacc_vect_mod.o psb_d_oacc_vect_mod.o \
+ psb_oacc_mod.o psb_d_oacc_csr_mat_mod.o \
+ impl/psb_d_oacc_csr_vect_mv.o
+
+# Library name
+LIBNAME=libpsb_openacc.a
+
+# Rules
+all: $(LIBNAME)
+
+$(LIBNAME): $(FOBJS)
+ ar cr $(LIBNAME) $(FOBJS)
+ /bin/cp -p $(LIBNAME) $(LIBDIR)
+
+clean:
+ /bin/rm -fr *.o $(LIBNAME) *.mod impl/*.o
+
+.f90.o:
+ $(FC) $(FCOPT) $(OFFLOAD) $(INCLUDES) -c $< -o $@
+
+.c.o:
+ $(CC) -c $< -o $@
+
+.F90.o:
+ $(FC) $(FCOPT) $(OFFLOAD) $(INCLUDES) -c $< -o $@
+
+.F90.s:
+ $(FC) $(FCOPT) $(INCLUDES) -c -S $< -o $@
diff --git a/openacc/impl/psb_d_oacc_csr_vect_mv.F90 b/openacc/impl/psb_d_oacc_csr_vect_mv.F90
new file mode 100644
index 00000000..f0394591
--- /dev/null
+++ b/openacc/impl/psb_d_oacc_csr_vect_mv.F90
@@ -0,0 +1,61 @@
+subroutine psb_d_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans)
+ use psb_base_mod
+ use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_vect_mv
+ implicit none
+
+ real(psb_dpk_), intent(in) :: alpha, beta
+ class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
+ class(psb_d_base_vect_type), intent(inout) :: x, y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ integer(psb_ipk_) :: m, n
+
+ info = psb_success_
+ m = a%get_nrows()
+ n = a%get_ncols()
+
+ if ((n /= size(x%v)) .or. (n /= size(y%v))) then
+ write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
+ info = psb_err_invalid_mat_state_
+ return
+ end if
+
+ if (a%is_host()) call a%sync()
+ if (x%is_host()) call x%sync()
+ if (y%is_host()) call y%sync()
+
+ call inner_spmv(m, n, alpha, a%val, a%ja, a%irp, x%v, beta, y%v, info)
+ call y%set_dev()
+
+contains
+
+ subroutine inner_spmv(m, n, alpha, val, ja, irp, x, beta, y, info)
+ implicit none
+ integer(psb_ipk_) :: m, n
+ real(psb_dpk_), intent(in) :: alpha, beta
+ real(psb_dpk_) :: val(:), x(:), y(:)
+ integer(psb_ipk_) :: ja(:), irp(:)
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: i, j, ii, isz
+ real(psb_dpk_) :: tmp
+ integer(psb_ipk_), parameter :: vsz = 256
+
+ info = 0
+
+ !$acc parallel loop vector_length(vsz) private(isz)
+ do ii = 1, m, vsz
+ isz = min(vsz, m - ii + 1)
+ !$acc loop independent private(tmp)
+ do i = ii, ii + isz - 1
+ tmp = 0.0_psb_dpk_
+ !$acc loop seq
+ do j = irp(i), irp(i + 1) - 1
+ tmp = tmp + val(j) * x(ja(j))
+ end do
+ y(i) = alpha * tmp + beta * y(i)
+ end do
+ end do
+ end subroutine inner_spmv
+
+end subroutine psb_d_oacc_csr_vect_mv
diff --git a/openacc/psb_d_oacc_csr_mat_mod.F90 b/openacc/psb_d_oacc_csr_mat_mod.F90
new file mode 100644
index 00000000..c6b7e3d0
--- /dev/null
+++ b/openacc/psb_d_oacc_csr_mat_mod.F90
@@ -0,0 +1,185 @@
+module psb_d_oacc_csr_mat_mod
+
+ use iso_c_binding
+ use psb_d_mat_mod
+ use psb_d_oacc_vect_mod
+ use oaccsparse_mod
+
+ integer(psb_ipk_), parameter, private :: is_host = -1
+ integer(psb_ipk_), parameter, private :: is_sync = 0
+ integer(psb_ipk_), parameter, private :: is_dev = 1
+
+ type, extends(psb_d_csr_sparse_mat) :: psb_d_oacc_csr_sparse_mat
+ integer(psb_ipk_) :: devstate = is_host
+ contains
+ procedure, pass(a) :: all => d_oacc_csr_all
+ procedure, pass(a) :: is_host => d_oacc_csr_is_host
+ procedure, pass(a) :: is_sync => d_oacc_csr_is_sync
+ procedure, pass(a) :: is_dev => d_oacc_csr_is_dev
+ procedure, pass(a) :: set_host => d_oacc_csr_set_host
+ procedure, pass(a) :: set_sync => d_oacc_csr_set_sync
+ procedure, pass(a) :: set_dev => d_oacc_csr_set_dev
+ procedure, pass(a) :: sync_space => d_oacc_csr_sync_space
+ procedure, pass(a) :: sync => d_oacc_csr_sync
+ procedure, pass(a) :: vect_mv => psb_d_oacc_csr_vect_mv
+ end type psb_d_oacc_csr_sparse_mat
+
+ interface
+ subroutine psb_d_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans)
+ import :: psb_d_oacc_csr_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_
+ class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
+ real(psb_dpk_), intent(in) :: alpha, beta
+ class(psb_d_base_vect_type), intent(inout) :: x, y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_d_oacc_csr_vect_mv
+ end interface
+
+contains
+
+ subroutine d_oacc_csr_all(m, n, nz, a, info)
+ implicit none
+ integer(psb_ipk_), intent(in) :: m, n, nz
+ class(psb_d_oacc_csr_sparse_mat), intent(out) :: a
+ integer(psb_ipk_), intent(out) :: info
+
+ info = 0
+ if (allocated(a%val)) then
+ !$acc exit data delete(a%val) finalize
+ deallocate(a%val, stat=info)
+ end if
+ if (allocated(a%ja)) then
+ !$acc exit data delete(a%ja) finalize
+ deallocate(a%ja, stat=info)
+ end if
+ if (allocated(a%irp)) then
+ !$acc exit data delete(a%irp) finalize
+ deallocate(a%irp, stat=info)
+ end if
+
+ call a%set_nrows(m)
+ call a%set_ncols(n)
+
+ allocate(a%val(nz),stat=info)
+ allocate(a%ja(nz),stat=info)
+ allocate(a%irp(m+1),stat=info)
+ if (info == 0) call a%set_host()
+ if (info == 0) call a%sync_space()
+ end subroutine d_oacc_csr_all
+
+ function d_oacc_csr_is_host(a) result(res)
+ implicit none
+ class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_host)
+ end function d_oacc_csr_is_host
+
+ function d_oacc_csr_is_sync(a) result(res)
+ implicit none
+ class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_sync)
+ end function d_oacc_csr_is_sync
+
+ function d_oacc_csr_is_dev(a) result(res)
+ implicit none
+ class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_dev)
+ end function d_oacc_csr_is_dev
+
+ subroutine d_oacc_csr_set_host(a)
+ implicit none
+ class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_host
+ end subroutine d_oacc_csr_set_host
+
+ subroutine d_oacc_csr_set_sync(a)
+ implicit none
+ class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_sync
+ end subroutine d_oacc_csr_set_sync
+
+ subroutine d_oacc_csr_set_dev(a)
+ implicit none
+ class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_dev
+ end subroutine d_oacc_csr_set_dev
+
+ subroutine d_oacc_csr_sync_space(a)
+ implicit none
+ class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
+ if (allocated(a%val)) then
+ call d_oacc_create_dev(a%val)
+ end if
+ if (allocated(a%ja)) then
+ call i_oacc_create_dev(a%ja)
+ end if
+ if (allocated(a%irp)) then
+ call i_oacc_create_dev(a%irp)
+ end if
+ contains
+ subroutine d_oacc_create_dev(v)
+ implicit none
+ real(psb_dpk_), intent(in) :: v(:)
+ !$acc enter data copyin(v)
+ end subroutine d_oacc_create_dev
+ subroutine i_oacc_create_dev(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc enter data copyin(v)
+ end subroutine i_oacc_create_dev
+ end subroutine d_oacc_csr_sync_space
+
+ subroutine d_oacc_csr_sync(a)
+ implicit none
+ class(psb_d_oacc_csr_sparse_mat), target, intent(in) :: a
+ class(psb_d_oacc_csr_sparse_mat), pointer :: tmpa
+ integer(psb_ipk_) :: info
+
+ tmpa => a
+ if (a%is_dev()) then
+ call d_oacc_csr_to_host(a%val)
+ call i_oacc_csr_to_host(a%ja)
+ call i_oacc_csr_to_host(a%irp)
+ else if (a%is_host()) then
+ call d_oacc_csr_to_dev(a%val)
+ call i_oacc_csr_to_dev(a%ja)
+ call i_oacc_csr_to_dev(a%irp)
+ end if
+ call tmpa%set_sync()
+ end subroutine d_oacc_csr_sync
+
+ subroutine d_oacc_csr_to_dev(v)
+ implicit none
+ real(psb_dpk_), intent(in) :: v(:)
+ !$acc update device(v)
+ end subroutine d_oacc_csr_to_dev
+
+ subroutine d_oacc_csr_to_host(v)
+ implicit none
+ real(psb_dpk_), intent(in) :: v(:)
+ !$acc update self(v)
+ end subroutine d_oacc_csr_to_host
+
+ subroutine i_oacc_csr_to_dev(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc update device(v)
+ end subroutine i_oacc_csr_to_dev
+
+ subroutine i_oacc_csr_to_host(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc update self(v)
+ end subroutine i_oacc_csr_to_host
+
+
+end module psb_d_oacc_csr_mat_mod
+
diff --git a/openacc/psb_d_oacc_vect_mod.F90 b/openacc/psb_d_oacc_vect_mod.F90
new file mode 100644
index 00000000..ac5428f3
--- /dev/null
+++ b/openacc/psb_d_oacc_vect_mod.F90
@@ -0,0 +1,966 @@
+module psb_d_oacc_vect_mod
+ use iso_c_binding
+ use psb_const_mod
+ use psb_error_mod
+ use psb_d_vect_mod
+ use psb_i_oacc_vect_mod
+ use psb_i_vect_mod
+
+ integer(psb_ipk_), parameter, private :: is_host = -1
+ integer(psb_ipk_), parameter, private :: is_sync = 0
+ integer(psb_ipk_), parameter, private :: is_dev = 1
+
+ type, extends(psb_d_base_vect_type) :: psb_d_vect_oacc
+ integer :: state = is_host
+
+ contains
+ procedure, pass(x) :: get_nrows => d_oacc_get_nrows
+ procedure, nopass :: get_fmt => d_oacc_get_fmt
+
+ procedure, pass(x) :: all => d_oacc_vect_all
+ procedure, pass(x) :: zero => d_oacc_zero
+ procedure, pass(x) :: asb_m => d_oacc_asb_m
+ procedure, pass(x) :: sync => d_oacc_sync
+ procedure, pass(x) :: sync_space => d_oacc_sync_space
+ procedure, pass(x) :: bld_x => d_oacc_bld_x
+ procedure, pass(x) :: bld_mn => d_oacc_bld_mn
+ procedure, pass(x) :: free => d_oacc_vect_free
+ procedure, pass(x) :: ins_a => d_oacc_ins_a
+ procedure, pass(x) :: ins_v => d_oacc_ins_v
+ procedure, pass(x) :: is_host => d_oacc_is_host
+ procedure, pass(x) :: is_dev => d_oacc_is_dev
+ procedure, pass(x) :: is_sync => d_oacc_is_sync
+ procedure, pass(x) :: set_host => d_oacc_set_host
+ procedure, pass(x) :: set_dev => d_oacc_set_dev
+ procedure, pass(x) :: set_sync => d_oacc_set_sync
+ procedure, pass(x) :: set_scal => d_oacc_set_scal
+
+ procedure, pass(x) :: gthzv_x => d_oacc_gthzv_x
+ procedure, pass(x) :: gthzbuf_x => d_oacc_gthzbuf
+ procedure, pass(y) :: sctb => d_oacc_sctb
+ procedure, pass(y) :: sctb_x => d_oacc_sctb_x
+ procedure, pass(y) :: sctb_buf => d_oacc_sctb_buf
+
+ procedure, pass(x) :: get_size => oacc_get_size
+ procedure, pass(x) :: dot_v => d_oacc_vect_dot
+ procedure, pass(x) :: dot_a => d_oacc_dot_a
+ procedure, pass(y) :: axpby_v => d_oacc_axpby_v
+ procedure, pass(y) :: axpby_a => d_oacc_axpby_a
+ procedure, pass(z) :: abgdxyz => d_oacc_abgdxyz
+ procedure, pass(y) :: mlt_v => d_oacc_mlt_v
+ procedure, pass(y) :: mlt_a => d_oacc_mlt_a
+ procedure, pass(z) :: mlt_a_2 => d_oacc_mlt_a_2
+ procedure, pass(z) :: mlt_v_2 => d_oacc_mlt_v_2
+ procedure, pass(x) :: scal => d_oacc_scal
+ procedure, pass(x) :: nrm2 => d_oacc_nrm2
+ procedure, pass(x) :: amax => d_oacc_amax
+ procedure, pass(x) :: asum => d_oacc_asum
+ procedure, pass(x) :: absval1 => d_oacc_absval1
+ procedure, pass(x) :: absval2 => d_oacc_absval2
+
+ end type psb_d_vect_oacc
+
+ real(psb_dpk_), allocatable :: v1(:),v2(:),p(:)
+
+contains
+
+ subroutine d_oacc_absval1(x)
+ implicit none
+ class(psb_d_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_) :: n, i
+
+ if (x%is_host()) call x%sync_space()
+ n = size(x%v)
+ !$acc parallel loop
+ do i = 1, n
+ x%v(i) = abs(x%v(i))
+ end do
+ call x%set_dev()
+ end subroutine d_oacc_absval1
+
+ subroutine d_oacc_absval2(x, y)
+ implicit none
+ class(psb_d_vect_oacc), intent(inout) :: x
+ class(psb_d_base_vect_type), intent(inout) :: y
+ integer(psb_ipk_) :: n
+ integer(psb_ipk_) :: i
+
+ n = min(size(x%v), size(y%v))
+ select type (yy => y)
+ class is (psb_d_vect_oacc)
+ if (x%is_host()) call x%sync()
+ if (yy%is_host()) call yy%sync()
+ !$acc parallel loop
+ do i = 1, n
+ yy%v(i) = abs(x%v(i))
+ end do
+ class default
+ if (x%is_dev()) call x%sync()
+ if (y%is_dev()) call y%sync()
+ call x%psb_d_base_vect_type%absval(y)
+ end select
+ end subroutine d_oacc_absval2
+
+
+
+ subroutine d_oacc_scal(alpha, x)
+ implicit none
+ class(psb_d_vect_oacc), intent(inout) :: x
+ real(psb_dpk_), intent(in) :: alpha
+ integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: i
+
+ if (x%is_host()) call x%sync_space()
+ !$acc parallel loop
+ do i = 1, size(x%v)
+ x%v(i) = alpha * x%v(i)
+ end do
+ call x%set_dev()
+ end subroutine d_oacc_scal
+
+ function d_oacc_nrm2(n, x) result(res)
+ implicit none
+ class(psb_d_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(in) :: n
+ real(psb_dpk_) :: res
+ integer(psb_ipk_) :: info
+ real(psb_dpk_) :: sum
+ integer(psb_ipk_) :: i
+
+ if (x%is_host()) call x%sync_space()
+ sum = 0.0
+ !$acc parallel loop reduction(+:sum)
+ do i = 1, n
+ sum = sum + x%v(i) * x%v(i)
+ end do
+ res = sqrt(sum)
+ end function d_oacc_nrm2
+
+ function d_oacc_amax(n, x) result(res)
+ implicit none
+ class(psb_d_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(in) :: n
+ real(psb_dpk_) :: res
+ integer(psb_ipk_) :: info
+ real(psb_dpk_) :: max_val
+ integer(psb_ipk_) :: i
+
+ if (x%is_host()) call x%sync_space()
+ max_val = -huge(0.0)
+ !$acc parallel loop reduction(max:max_val)
+ do i = 1, n
+ if (x%v(i) > max_val) max_val = x%v(i)
+ end do
+ res = max_val
+ end function d_oacc_amax
+
+ function d_oacc_asum(n, x) result(res)
+ implicit none
+ class(psb_d_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(in) :: n
+ real(psb_dpk_) :: res
+ integer(psb_ipk_) :: info
+ real(psb_dpk_) :: sum
+ integer(psb_ipk_) :: i
+
+ if (x%is_host()) call x%sync_space()
+ sum = 0.0
+ !$acc parallel loop reduction(+:sum)
+ do i = 1, n
+ sum = sum + abs(x%v(i))
+ end do
+ res = sum
+ end function d_oacc_asum
+
+
+ subroutine d_oacc_mlt_v(x, y, info)
+ use psi_serial_mod
+ implicit none
+ class(psb_d_base_vect_type), intent(inout) :: x
+ class(psb_d_vect_oacc), intent(inout) :: y
+ integer(psb_ipk_), intent(out) :: info
+
+ integer(psb_ipk_) :: i, n
+
+ info = 0
+ n = min(x%get_nrows(), y%get_nrows())
+ select type(xx => x)
+ type is (psb_d_base_vect_type)
+ if (y%is_dev()) call y%sync()
+ !$acc parallel loop
+ do i = 1, n
+ y%v(i) = y%v(i) * xx%v(i)
+ end do
+ call y%set_host()
+ class default
+ if (xx%is_dev()) call xx%sync()
+ if (y%is_dev()) call y%sync()
+ !$acc parallel loop
+ do i = 1, n
+ y%v(i) = y%v(i) * xx%v(i)
+ end do
+ call y%set_host()
+ end select
+ end subroutine d_oacc_mlt_v
+
+ subroutine d_oacc_mlt_a(x, y, info)
+ use psi_serial_mod
+ implicit none
+ real(psb_dpk_), intent(in) :: x(:)
+ class(psb_d_vect_oacc), intent(inout) :: y
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: i, n
+
+ info = 0
+ if (y%is_dev()) call y%sync_space()
+ !$acc parallel loop
+ do i = 1, size(x)
+ y%v(i) = y%v(i) * x(i)
+ end do
+ call y%set_host()
+ end subroutine d_oacc_mlt_a
+
+ subroutine d_oacc_mlt_a_2(alpha, x, y, beta, z, info)
+ use psi_serial_mod
+ implicit none
+ real(psb_dpk_), intent(in) :: alpha, beta
+ real(psb_dpk_), intent(in) :: x(:)
+ real(psb_dpk_), intent(in) :: y(:)
+ class(psb_d_vect_oacc), intent(inout) :: z
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: i, n
+
+ info = 0
+ if (z%is_dev()) call z%sync_space()
+ !$acc parallel loop
+ do i = 1, size(x)
+ z%v(i) = alpha * x(i) * y(i) + beta * z%v(i)
+ end do
+ call z%set_host()
+ end subroutine d_oacc_mlt_a_2
+
+ subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
+ use psi_serial_mod
+ use psb_string_mod
+ implicit none
+ real(psb_dpk_), intent(in) :: alpha, beta
+ class(psb_d_base_vect_type), intent(inout) :: x
+ class(psb_d_base_vect_type), intent(inout) :: y
+ class(psb_d_vect_oacc), intent(inout) :: z
+ integer(psb_ipk_), intent(out) :: info
+ character(len=1), intent(in), optional :: conjgx, conjgy
+ integer(psb_ipk_) :: i, n
+ logical :: conjgx_, conjgy_
+
+ conjgx_ = .false.
+ conjgy_ = .false.
+ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C')
+ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C')
+
+ n = min(x%get_nrows(), y%get_nrows(), z%get_nrows())
+
+ info = 0
+ select type(xx => x)
+ class is (psb_d_vect_oacc)
+ select type (yy => y)
+ class is (psb_d_vect_oacc)
+ if (xx%is_host()) call xx%sync_space()
+ if (yy%is_host()) call yy%sync_space()
+ if ((beta /= dzero) .and. (z%is_host())) call z%sync_space()
+ !$acc parallel loop
+ do i = 1, n
+ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
+ end do
+ call z%set_dev()
+ class default
+ if (xx%is_dev()) call xx%sync_space()
+ if (yy%is_dev()) call yy%sync()
+ if ((beta /= dzero) .and. (z%is_dev())) call z%sync_space()
+ !$acc parallel loop
+ do i = 1, n
+ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
+ end do
+ call z%set_host()
+ end select
+ class default
+ if (x%is_dev()) call x%sync()
+ if (y%is_dev()) call y%sync()
+ if ((beta /= dzero) .and. (z%is_dev())) call z%sync_space()
+ !$acc parallel loop
+ do i = 1, n
+ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i)
+ end do
+ call z%set_host()
+ end select
+ end subroutine d_oacc_mlt_v_2
+
+
+ subroutine d_oacc_axpby_v(m, alpha, x, beta, y, info)
+ use psi_serial_mod
+ implicit none
+ integer(psb_ipk_), intent(in) :: m
+ class(psb_d_base_vect_type), intent(inout) :: x
+ class(psb_d_vect_oacc), intent(inout) :: y
+ real(psb_dpk_), intent(in) :: alpha, beta
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: nx, ny, i
+
+ info = psb_success_
+
+ select type(xx => x)
+ type is (psb_d_vect_oacc)
+ if ((beta /= dzero) .and. y%is_host()) call y%sync_space()
+ if (xx%is_host()) call xx%sync_space()
+ nx = size(xx%v)
+ ny = size(y%v)
+ if ((nx < m) .or. (ny < m)) then
+ info = psb_err_internal_error_
+ else
+ !$acc parallel loop
+ do i = 1, m
+ y%v(i) = alpha * xx%v(i) + beta * y%v(i)
+ end do
+ end if
+ call y%set_dev()
+ class default
+ if ((alpha /= dzero) .and. (x%is_dev())) call x%sync()
+ call y%axpby(m, alpha, x%v, beta, info)
+ end select
+ end subroutine d_oacc_axpby_v
+
+ subroutine d_oacc_axpby_a(m, alpha, x, beta, y, info)
+ use psi_serial_mod
+ implicit none
+ integer(psb_ipk_), intent(in) :: m
+ real(psb_dpk_), intent(in) :: x(:)
+ class(psb_d_vect_oacc), intent(inout) :: y
+ real(psb_dpk_), intent(in) :: alpha, beta
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: i
+
+ if ((beta /= dzero) .and. (y%is_dev())) call y%sync_space()
+ !$acc parallel loop
+ do i = 1, m
+ y%v(i) = alpha * x(i) + beta * y%v(i)
+ end do
+ call y%set_host()
+ end subroutine d_oacc_axpby_a
+
+ subroutine d_oacc_abgdxyz(m, alpha, beta, gamma, delta, x, y, z, info)
+ use psi_serial_mod
+ implicit none
+ integer(psb_ipk_), intent(in) :: m
+ class(psb_d_base_vect_type), intent(inout) :: x
+ class(psb_d_base_vect_type), intent(inout) :: y
+ class(psb_d_vect_oacc), intent(inout) :: z
+ real(psb_dpk_), intent(in) :: alpha, beta, gamma, delta
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: nx, ny, nz, i
+ logical :: gpu_done
+
+ info = psb_success_
+ gpu_done = .false.
+
+ select type(xx => x)
+ class is (psb_d_vect_oacc)
+ select type(yy => y)
+ class is (psb_d_vect_oacc)
+ select type(zz => z)
+ class is (psb_d_vect_oacc)
+ if ((beta /= dzero) .and. yy%is_host()) call yy%sync_space()
+ if ((delta /= dzero) .and. zz%is_host()) call zz%sync_space()
+ if (xx%is_host()) call xx%sync_space()
+ nx = size(xx%v)
+ ny = size(yy%v)
+ nz = size(zz%v)
+ if ((nx < m) .or. (ny < m) .or. (nz < m)) then
+ info = psb_err_internal_error_
+ else
+ !$acc parallel loop
+ do i = 1, m
+ yy%v(i) = alpha * xx%v(i) + beta * yy%v(i)
+ zz%v(i) = gamma * yy%v(i) + delta * zz%v(i)
+ end do
+ end if
+ call yy%set_dev()
+ call zz%set_dev()
+ gpu_done = .true.
+ end select
+ end select
+ end select
+
+ if (.not. gpu_done) then
+ if (x%is_host()) call x%sync()
+ if (y%is_host()) call y%sync()
+ if (z%is_host()) call z%sync()
+ call y%axpby(m, alpha, x, beta, info)
+ call z%axpby(m, gamma, y, delta, info)
+ end if
+ end subroutine d_oacc_abgdxyz
+
+
+ subroutine d_oacc_sctb_buf(i, n, idx, beta, y)
+ use psb_base_mod
+ implicit none
+ integer(psb_ipk_) :: i, n
+ class(psb_i_base_vect_type) :: idx
+ real(psb_dpk_) :: beta
+ class(psb_d_vect_oacc) :: y
+ integer(psb_ipk_) :: info
+
+ if (.not.allocated(y%combuf)) then
+ call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
+ return
+ end if
+
+ select type(ii => idx)
+ class is (psb_i_vect_oacc)
+ if (ii%is_host()) call ii%sync_space(info)
+ if (y%is_host()) call y%sync_space()
+
+ !$acc parallel loop
+ do i = 1, n
+ y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i)
+ end do
+
+ class default
+ !$acc parallel loop
+ do i = 1, n
+ y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i)
+ end do
+ end select
+ end subroutine d_oacc_sctb_buf
+
+ subroutine d_oacc_sctb_x(i, n, idx, x, beta, y)
+ use psb_base_mod
+ implicit none
+ integer(psb_ipk_):: i, n
+ class(psb_i_base_vect_type) :: idx
+ real(psb_dpk_) :: beta, x(:)
+ class(psb_d_vect_oacc) :: y
+ integer(psb_ipk_) :: info, ni
+
+ select type(ii => idx)
+ class is (psb_i_vect_oacc)
+ if (ii%is_host()) call ii%sync_space(info)
+ class default
+ call psb_errpush(info, 'd_oacc_sctb_x')
+ return
+ end select
+
+ if (y%is_host()) call y%sync_space()
+
+ !$acc parallel loop
+ do i = 1, n
+ y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i)
+ end do
+
+ call y%set_dev()
+ end subroutine d_oacc_sctb_x
+
+
+
+ subroutine d_oacc_sctb(n, idx, x, beta, y)
+ use psb_base_mod
+ implicit none
+ integer(psb_ipk_) :: n
+ integer(psb_ipk_) :: idx(:)
+ real(psb_dpk_) :: beta, x(:)
+ class(psb_d_vect_oacc) :: y
+ integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: i
+
+ if (n == 0) return
+ if (y%is_dev()) call y%sync_space()
+
+ !$acc parallel loop
+ do i = 1, n
+ y%v(idx(i)) = beta * y%v(idx(i)) + x(i)
+ end do
+
+ call y%set_host()
+ end subroutine d_oacc_sctb
+
+
+ subroutine d_oacc_gthzbuf(i, n, idx, x)
+ use psb_base_mod
+ implicit none
+ integer(psb_ipk_) :: i, n
+ class(psb_i_base_vect_type) :: idx
+ class(psb_d_vect_oacc) :: x
+ integer(psb_ipk_) :: info
+
+ info = 0
+ if (.not.allocated(x%combuf)) then
+ call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
+ return
+ end if
+
+ select type(ii => idx)
+ class is (psb_i_vect_oacc)
+ if (ii%is_host()) call ii%sync_space(info)
+ class default
+ call psb_errpush(info, 'd_oacc_gthzbuf')
+ return
+ end select
+
+ if (x%is_host()) call x%sync_space()
+
+ !$acc parallel loop
+ do i = 1, n
+ x%combuf(i) = x%v(idx%v(i))
+ end do
+ end subroutine d_oacc_gthzbuf
+
+ subroutine d_oacc_gthzv_x(i, n, idx, x, y)
+ use psb_base_mod
+ implicit none
+ integer(psb_ipk_) :: i, n
+ class(psb_i_base_vect_type):: idx
+ real(psb_dpk_) :: y(:)
+ class(psb_d_vect_oacc):: x
+ integer(psb_ipk_) :: info
+
+ info = 0
+
+ select type(ii => idx)
+ class is (psb_i_vect_oacc)
+ if (ii%is_host()) call ii%sync_space(info)
+ class default
+ call psb_errpush(info, 'd_oacc_gthzv_x')
+ return
+ end select
+
+ if (x%is_host()) call x%sync_space()
+
+ !$acc parallel loop
+ do i = 1, n
+ y(i) = x%v(idx%v(i))
+ end do
+ end subroutine d_oacc_gthzv_x
+
+ subroutine d_oacc_ins_v(n, irl, val, dupl, x, info)
+ use psi_serial_mod
+ implicit none
+ class(psb_d_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(in) :: n, dupl
+ class(psb_i_base_vect_type), intent(inout) :: irl
+ class(psb_d_base_vect_type), intent(inout) :: val
+ integer(psb_ipk_), intent(out) :: info
+
+ integer(psb_ipk_) :: i, isz
+ logical :: done_oacc
+
+ info = 0
+ if (psb_errstatus_fatal()) return
+
+ done_oacc = .false.
+ select type(virl => irl)
+ type is (psb_i_vect_oacc)
+ select type(vval => val)
+ type is (psb_d_vect_oacc)
+ if (vval%is_host()) call vval%sync_space()
+ if (virl%is_host()) call virl%sync_space(info)
+ if (x%is_host()) call x%sync_space()
+ !$acc parallel loop
+ do i = 1, n
+ x%v(virl%v(i)) = vval%v(i)
+ end do
+ call x%set_dev()
+ done_oacc = .true.
+ end select
+ end select
+
+ if (.not.done_oacc) then
+ select type(virl => irl)
+ type is (psb_i_vect_oacc)
+ if (virl%is_dev()) call virl%sync_space(info)
+ end select
+ select type(vval => val)
+ type is (psb_d_vect_oacc)
+ if (vval%is_dev()) call vval%sync_space()
+ end select
+ call x%ins(n, irl%v, val%v, dupl, info)
+ end if
+
+ if (info /= 0) then
+ call psb_errpush(info, 'oacc_vect_ins')
+ return
+ end if
+
+ end subroutine d_oacc_ins_v
+
+
+
+ subroutine d_oacc_ins_a(n, irl, val, dupl, x, info)
+ use psi_serial_mod
+ implicit none
+ class(psb_d_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(in) :: n, dupl
+ integer(psb_ipk_), intent(in) :: irl(:)
+ real(psb_dpk_), intent(in) :: val(:)
+ integer(psb_ipk_), intent(out) :: info
+
+ integer(psb_ipk_) :: i
+
+ info = 0
+ if (x%is_dev()) call x%sync_space()
+ call x%psb_d_base_vect_type%ins(n, irl, val, dupl, info)
+ call x%set_host()
+ !$acc update device(x%v)
+
+ end subroutine d_oacc_ins_a
+
+
+
+ subroutine d_oacc_bld_mn(x, n)
+ use psb_base_mod
+ implicit none
+ integer(psb_mpk_), intent(in) :: n
+ class(psb_d_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_) :: info
+
+ call x%all(n, info)
+ if (info /= 0) then
+ call psb_errpush(info, 'd_oacc_bld_mn', i_err=(/n, n, n, n, n/))
+ end if
+ call x%set_host()
+ !$acc update device(x%v)
+
+ end subroutine d_oacc_bld_mn
+
+
+ subroutine d_oacc_bld_x(x, this)
+ use psb_base_mod
+ implicit none
+ real(psb_dpk_), intent(in) :: this(:)
+ class(psb_d_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_) :: info
+
+ call psb_realloc(size(this), x%v, info)
+ if (info /= 0) then
+ info = psb_err_alloc_request_
+ call psb_errpush(info, 'd_oacc_bld_x', &
+ i_err=(/size(this), izero, izero, izero, izero/))
+ return
+ end if
+
+ x%v(:) = this(:)
+ call x%set_host()
+ !$acc update device(x%v)
+
+ end subroutine d_oacc_bld_x
+
+
+ subroutine d_oacc_asb_m(n, x, info)
+ use psb_base_mod
+ implicit none
+ integer(psb_mpk_), intent(in) :: n
+ class(psb_d_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_mpk_) :: nd
+
+ info = psb_success_
+
+ if (x%is_dev()) then
+ nd = size(x%v)
+ if (nd < n) then
+ call x%sync()
+ call x%psb_d_base_vect_type%asb(n, info)
+ if (info == psb_success_) call x%sync_space()
+ call x%set_host()
+ end if
+ else
+ if (size(x%v) < n) then
+ call x%psb_d_base_vect_type%asb(n, info)
+ if (info == psb_success_) call x%sync_space()
+ call x%set_host()
+ end if
+ end if
+ end subroutine d_oacc_asb_m
+
+
+
+ subroutine d_oacc_set_scal(x, val, first, last)
+ class(psb_d_vect_oacc), intent(inout) :: x
+ real(psb_dpk_), intent(in) :: val
+ integer(psb_ipk_), optional :: first, last
+
+ integer(psb_ipk_) :: first_, last_
+ first_ = 1
+ last_ = x%get_nrows()
+ if (present(first)) first_ = max(1, first)
+ if (present(last)) last_ = min(last, last_)
+
+ !$acc parallel loop
+ do i = first_, last_
+ x%v(i) = val
+ end do
+ !$acc end parallel loop
+
+ call x%set_dev()
+ end subroutine d_oacc_set_scal
+
+
+
+ subroutine d_oacc_zero(x)
+ use psi_serial_mod
+ implicit none
+ class(psb_d_vect_oacc), intent(inout) :: x
+ call x%set_dev()
+ call x%set_scal(dzero)
+ end subroutine d_oacc_zero
+
+ function d_oacc_get_nrows(x) result(res)
+ implicit none
+ class(psb_d_vect_oacc), intent(in) :: x
+ integer(psb_ipk_) :: res
+
+ if (allocated(x%v)) res = size(x%v)
+ end function d_oacc_get_nrows
+
+ function d_oacc_get_fmt() result(res)
+ implicit none
+ character(len=5) :: res
+ res = "dOACC"
+
+ end function d_oacc_get_fmt
+
+ function d_oacc_vect_dot(n, x, y) result(res)
+ implicit none
+ class(psb_d_vect_oacc), intent(inout) :: x
+ class(psb_d_base_vect_type), intent(inout) :: y
+ integer(psb_ipk_), intent(in) :: n
+ real(psb_dpk_) :: res
+ real(psb_dpk_), external :: ddot
+ integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: i
+
+ res = dzero
+
+ select type(yy => y)
+ type is (psb_d_base_vect_type)
+ if (x%is_dev()) call x%sync()
+ res = ddot(n, x%v, 1, yy%v, 1)
+ type is (psb_d_vect_oacc)
+ if (x%is_host()) call x%sync()
+ if (yy%is_host()) call yy%sync()
+
+ !$acc parallel loop reduction(+:res) present(x%v, yy%v)
+ do i = 1, n
+ res = res + x%v(i) * yy%v(i)
+ end do
+ !$acc end parallel loop
+
+ class default
+ call x%sync()
+ res = y%dot(n, x%v)
+ end select
+
+ end function d_oacc_vect_dot
+
+
+
+
+ function d_oacc_dot_a(n, x, y) result(res)
+ implicit none
+ class(psb_d_vect_oacc), intent(inout) :: x
+ real(psb_dpk_), intent(in) :: y(:)
+ integer(psb_ipk_), intent(in) :: n
+ real(psb_dpk_) :: res
+ real(psb_dpk_), external :: ddot
+
+ if (x%is_dev()) call x%sync()
+ res = ddot(n, y, 1, x%v, 1)
+
+ end function d_oacc_dot_a
+
+ ! subroutine d_oacc_set_vect(x,y)
+ ! implicit none
+ ! class(psb_d_vect_oacc), intent(inout) :: x
+ ! real(psb_dpk_), intent(in) :: y(:)
+ ! integer(psb_ipk_) :: info
+
+ ! if (size(x%v) /= size(y)) then
+ ! call x%free(info)
+ ! call x%all(size(y),info)
+ ! end if
+ ! x%v(:) = y(:)
+ ! call x%set_host()
+ ! end subroutine d_oacc_set_vect
+
+ subroutine d_oacc_to_dev(v)
+ implicit none
+ real(psb_dpk_) :: v(:)
+ !$acc update device(v)
+ end subroutine d_oacc_to_dev
+
+ subroutine d_oacc_to_host(v)
+ implicit none
+ real(psb_dpk_) :: v(:)
+ !$acc update self(v)
+ end subroutine d_oacc_to_host
+
+ subroutine d_oacc_sync_space(x)
+ implicit none
+ class(psb_d_vect_oacc), intent(inout) :: x
+ if (allocated(x%v)) then
+ call d_oacc_create_dev(x%v)
+ end if
+ contains
+ subroutine d_oacc_create_dev(v)
+ implicit none
+ real(psb_dpk_) :: v(:)
+ !$acc enter data copyin(v)
+ end subroutine d_oacc_create_dev
+ end subroutine d_oacc_sync_space
+
+ subroutine d_oacc_sync(x)
+ implicit none
+ class(psb_d_vect_oacc), intent(inout) :: x
+ if (x%is_dev()) then
+ call d_oacc_to_host(x%v)
+ end if
+ if (x%is_host()) then
+ call d_oacc_to_dev(x%v)
+ end if
+ call x%set_sync()
+ end subroutine d_oacc_sync
+
+ subroutine d_oacc_set_host(x)
+ implicit none
+ class(psb_d_vect_oacc), intent(inout) :: x
+
+ x%state = is_host
+ end subroutine d_oacc_set_host
+
+ subroutine d_oacc_set_dev(x)
+ implicit none
+ class(psb_d_vect_oacc), intent(inout) :: x
+
+ x%state = is_dev
+ end subroutine d_oacc_set_dev
+
+ subroutine d_oacc_set_sync(x)
+ implicit none
+ class(psb_d_vect_oacc), intent(inout) :: x
+
+ x%state = is_sync
+ end subroutine d_oacc_set_sync
+
+ function d_oacc_is_dev(x) result(res)
+ implicit none
+ class(psb_d_vect_oacc), intent(in) :: x
+ logical :: res
+
+ res = (x%state == is_dev)
+ end function d_oacc_is_dev
+
+ function d_oacc_is_host(x) result(res)
+ implicit none
+ class(psb_d_vect_oacc), intent(in) :: x
+ logical :: res
+
+ res = (x%state == is_host)
+ end function d_oacc_is_host
+
+ function d_oacc_is_sync(x) result(res)
+ implicit none
+ class(psb_d_vect_oacc), intent(in) :: x
+ logical :: res
+
+ res = (x%state == is_sync)
+ end function d_oacc_is_sync
+
+ subroutine d_oacc_vect_all(n, x, info)
+ use psi_serial_mod
+ use psb_realloc_mod
+ implicit none
+ integer(psb_ipk_), intent(in) :: n
+ class(psb_d_vect_oacc), intent(out) :: x
+ integer(psb_ipk_), intent(out) :: info
+
+ call psb_realloc(n, x%v, info)
+ if (info == 0) then
+ call x%set_host()
+ !$acc enter data create(x%v)
+ call x%sync_space()
+ end if
+ if (info /= 0) then
+ info = psb_err_alloc_request_
+ call psb_errpush(info, 'd_oacc_all', &
+ i_err=(/n, n, n, n, n/))
+ end if
+ end subroutine d_oacc_vect_all
+
+
+ subroutine d_oacc_vect_free(x, info)
+ implicit none
+ class(psb_d_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(out) :: info
+ info = 0
+ if (allocated(x%v)) then
+ !$acc exit data delete(x%v) finalize
+ deallocate(x%v, stat=info)
+ end if
+
+ end subroutine d_oacc_vect_free
+
+ function oacc_get_size(x) result(res)
+ implicit none
+ class(psb_d_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_) :: res
+
+ if (x%is_dev()) call x%sync()
+ res = size(x%v)
+ end function oacc_get_size
+
+
+ subroutine initialize(N)
+ integer(psb_ipk_) :: N
+ integer(psb_ipk_) :: i
+ allocate(v1(N),v2(N),p(N))
+ !$acc enter data create(v1,v2,p)
+ !$acc parallel
+ !$acc loop
+ do i=1,n
+ v1(i) = i
+ v2(i) = n+i
+ end do
+ !$acc end parallel
+ end subroutine initialize
+ subroutine finalize_dev()
+ !$acc exit data delete(v1,v2,p)
+ end subroutine finalize_dev
+ subroutine finalize_host()
+ deallocate(v1,v2,p)
+ end subroutine finalize_host
+ subroutine to_dev()
+ !$acc update device(v1,v2)
+ end subroutine to_dev
+ subroutine to_host()
+ !$acc update self(v1,v2)
+ end subroutine to_host
+ function d_dot(N) result(res)
+ real(kind(1.d0)) :: res
+ integer(psb_ipk_) :: i,N
+ real(kind(1.d0)) :: t1,t2,t3
+ res = 0.0d0
+ !$acc parallel
+ !$acc loop reduction(+:res)
+ do i=1,N
+ res = res + v1(i) * v2(i)
+ end do
+ !$acc end parallel
+
+ end function d_dot
+ function h_dot(N) result(res)
+ integer(psb_ipk_) :: i,N
+ real(kind(1.d0)) :: t1,t2,t3,res
+ res = 0.0d0
+ do i=1,N
+ res = res + v1(i) * v2(i)
+ end do
+ end function h_dot
+
+end module psb_d_oacc_vect_mod
\ No newline at end of file
diff --git a/openacc/psb_i_oacc_vect_mod.F90 b/openacc/psb_i_oacc_vect_mod.F90
new file mode 100644
index 00000000..70fc325e
--- /dev/null
+++ b/openacc/psb_i_oacc_vect_mod.F90
@@ -0,0 +1,455 @@
+module psb_i_oacc_vect_mod
+ use iso_c_binding
+ use psb_const_mod
+ use psb_error_mod
+ use psb_i_vect_mod
+
+ integer(psb_ipk_), parameter, private :: is_host = -1
+ integer(psb_ipk_), parameter, private :: is_sync = 0
+ integer(psb_ipk_), parameter, private :: is_dev = 1
+
+ type, extends(psb_i_base_vect_type) :: psb_i_vect_oacc
+ integer :: state = is_host
+ contains
+ procedure, pass(x) :: get_nrows => i_oacc_get_nrows
+ procedure, nopass :: get_fmt => i_oacc_get_fmt
+
+ procedure, pass(x) :: all => i_oacc_all
+ procedure, pass(x) :: zero => i_oacc_zero
+ procedure, pass(x) :: asb_m => i_oacc_asb_m
+ procedure, pass(x) :: sync => i_oacc_sync
+ procedure, pass(x) :: sync_space => i_oacc_sync_space
+ procedure, pass(x) :: bld_x => i_oacc_bld_x
+ procedure, pass(x) :: bld_mn => i_oacc_bld_mn
+ procedure, pass(x) :: free => i_oacc_free
+ procedure, pass(x) :: ins_a => i_oacc_ins_a
+ procedure, pass(x) :: ins_v => i_oacc_ins_v
+ procedure, pass(x) :: is_host => i_oacc_is_host
+ procedure, pass(x) :: is_dev => i_oacc_is_dev
+ procedure, pass(x) :: is_sync => i_oacc_is_sync
+ procedure, pass(x) :: set_host => i_oacc_set_host
+ procedure, pass(x) :: set_dev => i_oacc_set_dev
+ procedure, pass(x) :: set_sync => i_oacc_set_sync
+ procedure, pass(x) :: set_scal => i_oacc_set_scal
+ procedure, pass(x) :: gthzv_x => i_oacc_gthzv_x
+ procedure, pass(y) :: sctb => i_oacc_sctb
+ procedure, pass(y) :: sctb_x => i_oacc_sctb_x
+ procedure, pass(x) :: gthzbuf => i_oacc_gthzbuf
+ procedure, pass(y) :: sctb_buf => i_oacc_sctb_buf
+
+ final :: i_oacc_vect_finalize
+ end type psb_i_vect_oacc
+
+ public :: psb_i_vect_oacc_
+ private :: constructor
+ interface psb_i_vect_oacc_
+ module procedure constructor
+ end interface psb_i_vect_oacc_
+
+contains
+
+ function constructor(x) result(this)
+ integer(psb_ipk_) :: x(:)
+ type(psb_i_vect_oacc) :: this
+ integer(psb_ipk_) :: info
+
+ this%v = x
+ call this%asb(size(x), info)
+ end function constructor
+
+
+ subroutine i_oacc_gthzv_x(i, n, idx, x, y)
+ implicit none
+ integer(psb_ipk_) :: i, n
+ class(psb_i_base_vect_type) :: idx
+ integer(psb_ipk_) :: y(:)
+ class(psb_i_vect_oacc) :: x
+ integer :: info
+
+ !$acc parallel loop
+ do i = 1, n
+ y(i) = x%v(idx%v(i))
+ end do
+ end subroutine i_oacc_gthzv_x
+
+ subroutine i_oacc_gthzbuf(i, n, idx, x)
+ implicit none
+ integer(psb_ipk_) :: i, n
+ class(psb_i_base_vect_type) :: idx
+ class(psb_i_vect_oacc) :: x
+ integer :: info
+
+ if (.not.allocated(x%combuf)) then
+ call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
+ return
+ end if
+
+ !$acc parallel loop
+ do i = 1, n
+ x%combuf(i) = x%v(idx%v(i))
+ end do
+ end subroutine i_oacc_gthzbuf
+
+ subroutine i_oacc_sctb(n, idx, x, beta, y)
+ implicit none
+ integer(psb_ipk_) :: n, idx(:)
+ integer(psb_ipk_) :: beta, x(:)
+ class(psb_i_vect_oacc) :: y
+ integer(psb_ipk_) :: info
+ integer :: i
+
+ if (n == 0) return
+
+ !$acc parallel loop
+ do i = 1, n
+ y%v(idx(i)) = beta * y%v(idx(i)) + x(i)
+ end do
+ end subroutine i_oacc_sctb
+
+ subroutine i_oacc_sctb_x(i, n, idx, x, beta, y)
+ implicit none
+ integer(psb_ipk_) :: i, n
+ class(psb_i_base_vect_type) :: idx
+ integer(psb_ipk_) :: beta, x(:)
+ class(psb_i_vect_oacc) :: y
+ integer :: info
+
+ select type(ii => idx)
+ class is (psb_i_vect_oacc)
+ if (ii%is_host()) call ii%sync_space(info)
+ if (y%is_host()) call y%sync_space(info)
+
+ !$acc parallel loop
+ do i = 1, n
+ y%v(ii%v(i)) = beta * y%v(ii%v(i)) + x(i)
+ end do
+
+ class default
+ !$acc parallel loop
+ do i = 1, n
+ y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i)
+ end do
+ end select
+ end subroutine i_oacc_sctb_x
+
+ subroutine i_oacc_sctb_buf(i, n, idx, beta, y)
+ implicit none
+ integer(psb_ipk_) :: i, n
+ class(psb_i_base_vect_type) :: idx
+ integer(psb_ipk_) :: beta
+ class(psb_i_vect_oacc) :: y
+ integer(psb_ipk_) :: info
+
+ if (.not.allocated(y%v)) then
+ call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
+ return
+ end if
+
+ !$acc parallel loop
+ do i = 1, n
+ y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%v(i)
+ end do
+ end subroutine i_oacc_sctb_buf
+
+ subroutine i_oacc_set_host(x)
+ class(psb_i_vect_oacc), intent(inout) :: x
+ x%state = is_host
+ end subroutine i_oacc_set_host
+
+ subroutine i_oacc_set_sync(x)
+ class(psb_i_vect_oacc), intent(inout) :: x
+ x%state = is_sync
+ end subroutine i_oacc_set_sync
+
+ subroutine i_oacc_set_dev(x)
+ class(psb_i_vect_oacc), intent(inout) :: x
+ x%state = is_dev
+ end subroutine i_oacc_set_dev
+
+ subroutine i_oacc_set_scal(x, val, first, last)
+ class(psb_i_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(in) :: val
+ integer(psb_ipk_), optional :: first, last
+
+ integer(psb_ipk_) :: first_, last_
+
+ first_ = 1
+ last_ = size(x%v)
+ if (present(first)) first_ = max(1, first)
+ if (present(last)) last_ = min(size(x%v), last)
+
+ !$acc parallel loop
+ do i = first_, last_
+ x%v(i) = val
+ end do
+ call x%set_dev()
+ end subroutine i_oacc_set_scal
+
+ function i_oacc_is_host(x) result(res)
+ implicit none
+ class(psb_i_vect_oacc), intent(in) :: x
+ logical :: res
+
+ res = (x%state == is_host)
+ end function i_oacc_is_host
+
+ function i_oacc_is_dev(x) result(res)
+ implicit none
+ class(psb_i_vect_oacc), intent(in) :: x
+ logical :: res
+
+ res = (x%state == is_dev)
+ end function i_oacc_is_dev
+
+ function i_oacc_is_sync(x) result(res)
+ implicit none
+ class(psb_i_vect_oacc), intent(in) :: x
+ logical :: res
+
+ res = (x%state == is_sync)
+ end function i_oacc_is_sync
+
+ subroutine i_oacc_free(x, info)
+ use psb_error_mod
+ implicit none
+ class(psb_i_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(out) :: info
+
+ info = 0
+ if (allocated(x%v)) deallocate(x%v, stat=info)
+ if (info /= 0) then
+ info = psb_err_alloc_dealloc_
+ call psb_errpush(info, 'i_oacc_free')
+ end if
+ call x%set_sync()
+ end subroutine i_oacc_free
+
+ subroutine i_oacc_ins_a(n, irl, val, dupl, x, info)
+ implicit none
+ class(psb_i_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(in) :: n, dupl
+ integer(psb_ipk_), intent(in) :: irl(:)
+ integer(psb_ipk_), intent(in) :: val(:)
+ integer(psb_ipk_), intent(out) :: info
+
+ integer(psb_ipk_) :: i
+
+ info = 0
+ if (x%is_dev()) call x%sync()
+ call x%psb_i_base_vect_type%ins(n, irl, val, dupl, info)
+ call x%set_host()
+ end subroutine i_oacc_ins_a
+
+ subroutine i_oacc_ins_v(n, irl, val, dupl, x, info)
+ implicit none
+ class(psb_i_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(in) :: n, dupl
+ class(psb_i_base_vect_type), intent(inout) :: irl
+ class(psb_i_base_vect_type), intent(inout) :: val
+ integer(psb_ipk_), intent(out) :: info
+
+ integer(psb_ipk_) :: i, isz
+ logical :: done_oacc
+
+ info = 0
+ if (psb_errstatus_fatal()) return
+
+ done_oacc = .false.
+ select type(virl => irl)
+ class is (psb_i_vect_oacc)
+ select type(vval => val)
+ class is (psb_i_vect_oacc)
+ if (vval%is_host()) call vval%sync()
+ if (virl%is_host()) call virl%sync()
+ if (x%is_host()) call x%sync()
+ ! Add the OpenACC kernel call here if needed
+ call x%set_dev()
+ done_oacc = .true.
+ end select
+ end select
+
+ if (.not.done_oacc) then
+ if (irl%is_dev()) call irl%sync()
+ if (val%is_dev()) call val%sync()
+ call x%ins(n, irl%v, val%v, dupl, info)
+ end if
+
+ if (info /= 0) then
+ call psb_errpush(info,'i_oacc_ins_v')
+ return
+ end if
+ end subroutine i_oacc_ins_v
+
+ subroutine i_oacc_bld_x(x, this)
+ use psb_error_mod
+ implicit none
+ integer(psb_ipk_), intent(in) :: this(:)
+ class(psb_i_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_) :: info
+
+ call psb_realloc(size(this), x%v, info)
+ if (info /= 0) then
+ info = psb_err_alloc_request_
+ call psb_errpush(info, 'i_oacc_bld_x', i_err = (/size(this), izero, izero, izero, izero/))
+ end if
+ x%v(:) = this(:)
+ call x%set_host()
+ call x%sync()
+ end subroutine i_oacc_bld_x
+
+ subroutine i_oacc_bld_mn(x, n)
+ use psb_error_mod
+ implicit none
+ integer(psb_mpk_), intent(in) :: n
+ class(psb_i_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_) :: info
+
+ call x%all(n, info)
+ if (info /= 0) then
+ call psb_errpush(info, 'i_oacc_bld_mn', i_err = (/n, n, n, n, n/))
+ end if
+ end subroutine i_oacc_bld_mn
+
+ subroutine i_oacc_sync(x)
+ use psb_error_mod
+ implicit none
+ class(psb_i_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_) :: n, info
+
+ info = 0
+ if (x%is_host()) then
+ n = size(x%v)
+ if (.not.allocated(x%v)) then
+ write(*, *) 'Incoherent situation : x%v not allocated'
+ call psb_realloc(n, x%v, info)
+ end if
+ if ((n > size(x%v)) .or. (n > x%get_nrows())) then
+ write(*, *) 'Incoherent situation : sizes', n, size(x%v), x%get_nrows()
+ call psb_realloc(n, x%v, info)
+ end if
+ !$acc update device(x%v)
+ else if (x%is_dev()) then
+ n = size(x%v)
+ if (.not.allocated(x%v)) then
+ write(*, *) 'Incoherent situation : x%v not allocated'
+ call psb_realloc(n, x%v, info)
+ end if
+ !$acc update self(x%v)
+ end if
+ if (info == 0) call x%set_sync()
+ if (info /= 0) then
+ info = psb_err_internal_error_
+ call psb_errpush(info, 'i_oacc_sync')
+ end if
+ end subroutine i_oacc_sync
+
+ subroutine i_oacc_sync_space(x, info)
+ use psb_error_mod
+ implicit none
+ class(psb_i_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: nh, nd
+
+ info = 0
+ if (x%is_dev()) then
+ nh = size(x%v)
+ nd = nh
+ if (nh < nd) then
+ call psb_realloc(nd, x%v, info)
+ end if
+ else
+ nh = size(x%v)
+ nd = nh
+ if (nh < nd) then
+ call psb_realloc(nd, x%v, info)
+ end if
+ end if
+ end subroutine i_oacc_sync_space
+
+ function i_oacc_get_nrows(x) result(res)
+ implicit none
+ class(psb_i_vect_oacc), intent(in) :: x
+ integer(psb_ipk_) :: res
+
+ res = 0
+ if (allocated(x%v)) res = size(x%v)
+ end function i_oacc_get_nrows
+
+ function i_oacc_get_fmt() result(res)
+ implicit none
+ character(len=5) :: res
+ res = 'iOACC'
+ end function i_oacc_get_fmt
+
+ subroutine i_oacc_all(n, x, info)
+ use psb_error_mod
+ implicit none
+ class(psb_i_vect_oacc), intent(out) :: x
+ integer(psb_ipk_), intent(in) :: n
+ integer(psb_ipk_), intent(out) :: info
+
+ call psb_realloc(n, x%v, info)
+ if (info == 0) call x%set_host()
+ if (info == 0) call x%sync_space(info)
+ if (info /= 0) then
+ info = psb_err_alloc_request_
+ call psb_errpush(info, 'i_oacc_all', i_err=(/n, n, n, n, n/))
+ end if
+ end subroutine i_oacc_all
+
+ subroutine i_oacc_zero(x)
+ use psb_error_mod
+ implicit none
+ class(psb_i_vect_oacc), intent(inout) :: x
+ ! Ensure zeroing on the GPU side
+ call x%set_dev()
+ x%v = 0
+ !$acc update device(x%v)
+ end subroutine i_oacc_zero
+
+ subroutine i_oacc_asb_m(n, x, info)
+ use psb_error_mod
+ use psb_realloc_mod
+ implicit none
+ integer(psb_ipk_), intent(in) :: n
+ class(psb_i_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: nh, nd
+
+ info = 0
+ if (x%is_dev()) then
+ nd = size(x%v)
+ if (nd < n) then
+ call x%sync()
+ call x%psb_i_base_vect_type%asb(n, info)
+ if (info == psb_success_) call x%sync_space(info)
+ call x%set_host()
+ end if
+ else
+ nh = size(x%v)
+ if (nh < n) then
+ call x%psb_i_base_vect_type%asb(n, info)
+ if (info == psb_success_) call x%sync_space(info)
+ call x%set_host()
+ end if
+ end if
+ end subroutine i_oacc_asb_m
+
+ subroutine i_oacc_vect_finalize(x)
+ use psi_serial_mod
+ use psb_realloc_mod
+ implicit none
+ type(psb_i_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_) :: info
+
+ info = 0
+ call x%free(info)
+ end subroutine i_oacc_vect_finalize
+
+end module psb_i_oacc_vect_mod
+
+
+
+
+
+
\ No newline at end of file
diff --git a/openacc/psb_oacc_mod.F90 b/openacc/psb_oacc_mod.F90
new file mode 100644
index 00000000..ce5e85f9
--- /dev/null
+++ b/openacc/psb_oacc_mod.F90
@@ -0,0 +1,7 @@
+module psb_oacc_mod
+ use psb_const_mod
+
+ use psb_d_oacc_vect_mod
+ use psb_d_oacc_csr_mat_mod
+
+end module psb_oacc_mod
\ No newline at end of file
diff --git a/test/openacc/Makefile b/test/openacc/Makefile
new file mode 100644
index 00000000..6df14d42
--- /dev/null
+++ b/test/openacc/Makefile
@@ -0,0 +1,53 @@
+TOPDIR=../..
+include $(TOPDIR)/Make.inc
+
+# Directories
+LIBDIR=$(TOPDIR)/lib/
+PSBLIBDIR=$(TOPDIR)/lib/
+PSBINCDIR=$(TOPDIR)/include
+PSBMODDIR=$(TOPDIR)/modules
+INCDIR=$(TOPDIR)/include
+MODDIR=$(TOPDIR)/modules
+EXEDIR=./runs
+
+# Libraries
+PSBLAS_LIB= -L$(LIBDIR) -L$(PSBLIBDIR) -lpsb_util -lpsb_ext -lpsb_base -lpsb_openacc -lopenblas -lmetis
+LDLIBS=$(PSBGPULDLIBS)
+
+# Includes
+FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FMFLAG). $(FMFLAG)$(PSBMODDIR) $(FMFLAG)$(PSBINCDIR) $(LIBRSB_DEFINES)
+
+# Compiler flags
+FFLAGS=-O0 -march=native -fopenacc -foffload=nvptx-none="-march=sm_70"
+CFLAGS=-O0 -march=native
+
+# Source files
+SRCS=test.F90 vectoacc.F90
+CSRC=timers.c
+
+# Object files
+OBJS=$(SRCS:.F90=.o) $(CSRC:.c=.o)
+
+# Default rule
+all: dir
+
+dir:
+ @if test ! -d $(EXEDIR); then mkdir $(EXEDIR); fi
+
+# Pattern rule for creating executables
+%: %.o timers.o
+ $(FC) $(FFLAGS) $^ -o $@ $(FINCLUDES) $(PSBLAS_LIB) $(LDLIBS)
+ /bin/mv $@ $(EXEDIR)
+
+# Compilation rules
+%.o: %.F90
+ $(FC) $(FFLAGS) $(FINCLUDES) -c $< -o $@
+
+%.o: %.c
+ $(CC) $(CFLAGS) $(FINCLUDES) -c $< -o $@
+
+clean:
+ /bin/rm -fr *.o *.mod $(EXEDIR)/*
+
+# Phony targets
+.PHONY: all dir clean
diff --git a/test/openacc/test.F90 b/test/openacc/test.F90
new file mode 100644
index 00000000..0d0b756f
--- /dev/null
+++ b/test/openacc/test.F90
@@ -0,0 +1,617 @@
+module psb_d_pde3d_mod
+
+
+ use psb_base_mod, only : psb_dpk_, psb_ipk_, psb_lpk_, psb_desc_type,&
+ & psb_dspmat_type, psb_d_vect_type, dzero,&
+ & psb_d_base_sparse_mat, psb_d_base_vect_type, &
+ & psb_i_base_vect_type, psb_l_base_vect_type
+
+ interface
+ function d_func_3d(x,y,z) result(val)
+ import :: psb_dpk_
+ real(psb_dpk_), intent(in) :: x,y,z
+ real(psb_dpk_) :: val
+ end function d_func_3d
+ end interface
+
+ interface psb_gen_pde3d
+ module procedure psb_d_gen_pde3d
+ end interface psb_gen_pde3d
+
+ contains
+
+ function d_null_func_3d(x,y,z) result(val)
+
+ real(psb_dpk_), intent(in) :: x,y,z
+ real(psb_dpk_) :: val
+
+ val = dzero
+
+ end function d_null_func_3d
+ !
+ ! functions parametrizing the differential equation
+ !
+
+ !
+ ! Note: b1, b2 and b3 are the coefficients of the first
+ ! derivative of the unknown function. The default
+ ! we apply here is to have them zero, so that the resulting
+ ! matrix is symmetric/hermitian and suitable for
+ ! testing with CG and FCG.
+ ! When testing methods for non-hermitian matrices you can
+ ! change the B1/B2/B3 functions to e.g. done/sqrt((3*done))
+ !
+ function b1(x,y,z)
+ use psb_base_mod, only : psb_dpk_, done, dzero
+ implicit none
+ real(psb_dpk_) :: b1
+ real(psb_dpk_), intent(in) :: x,y,z
+ b1=done/sqrt((3*done))
+ end function b1
+ function b2(x,y,z)
+ use psb_base_mod, only : psb_dpk_, done, dzero
+ implicit none
+ real(psb_dpk_) :: b2
+ real(psb_dpk_), intent(in) :: x,y,z
+ b2=done/sqrt((3*done))
+ end function b2
+ function b3(x,y,z)
+ use psb_base_mod, only : psb_dpk_, done, dzero
+ implicit none
+ real(psb_dpk_) :: b3
+ real(psb_dpk_), intent(in) :: x,y,z
+ b3=done/sqrt((3*done))
+ end function b3
+ function c(x,y,z)
+ use psb_base_mod, only : psb_dpk_, done, dzero
+ implicit none
+ real(psb_dpk_) :: c
+ real(psb_dpk_), intent(in) :: x,y,z
+ c=dzero
+ end function c
+ function a1(x,y,z)
+ use psb_base_mod, only : psb_dpk_, done, dzero
+ implicit none
+ real(psb_dpk_) :: a1
+ real(psb_dpk_), intent(in) :: x,y,z
+ a1=done/80
+ end function a1
+ function a2(x,y,z)
+ use psb_base_mod, only : psb_dpk_, done, dzero
+ implicit none
+ real(psb_dpk_) :: a2
+ real(psb_dpk_), intent(in) :: x,y,z
+ a2=done/80
+ end function a2
+ function a3(x,y,z)
+ use psb_base_mod, only : psb_dpk_, done, dzero
+ implicit none
+ real(psb_dpk_) :: a3
+ real(psb_dpk_), intent(in) :: x,y,z
+ a3=done/80
+ end function a3
+ function g(x,y,z)
+ use psb_base_mod, only : psb_dpk_, done, dzero
+ implicit none
+ real(psb_dpk_) :: g
+ real(psb_dpk_), intent(in) :: x,y,z
+ g = dzero
+ if (x == done) then
+ g = done
+ else if (x == dzero) then
+ g = exp(y**2-z**2)
+ end if
+ end function g
+
+
+ !
+ ! subroutine to allocate and fill in the coefficient matrix and
+ ! the rhs.
+ !
+ subroutine psb_d_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,afmt,info,&
+ & f,amold,vmold,imold,partition,nrl,iv,tnd)
+ use psb_base_mod
+ use psb_util_mod
+ !
+ ! Discretizes the partial differential equation
+ !
+ ! a1 dd(u) a2 dd(u) a3 dd(u) b1 d(u) b2 d(u) b3 d(u)
+ ! - ------ - ------ - ------ + ----- + ------ + ------ + c u = f
+ ! dxdx dydy dzdz dx dy dz
+ !
+ ! with Dirichlet boundary conditions
+ ! u = g
+ !
+ ! on the unit cube 0<=x,y,z<=1.
+ !
+ !
+ ! Note that if b1=b2=b3=c=0., the PDE is the Laplace equation.
+ !
+ implicit none
+ integer(psb_ipk_) :: idim
+ type(psb_dspmat_type) :: a
+ type(psb_d_vect_type) :: xv,bv
+ type(psb_desc_type) :: desc_a
+ type(psb_ctxt_type) :: ctxt
+ integer(psb_ipk_) :: info
+ character(len=*) :: afmt
+ procedure(d_func_3d), optional :: f
+ class(psb_d_base_sparse_mat), optional :: amold
+ class(psb_d_base_vect_type), optional :: vmold
+ class(psb_i_base_vect_type), optional :: imold
+ integer(psb_ipk_), optional :: partition, nrl,iv(:)
+ logical, optional :: tnd
+ ! Local variables.
+
+ integer(psb_ipk_), parameter :: nb=20
+ type(psb_d_csc_sparse_mat) :: acsc
+ type(psb_d_coo_sparse_mat) :: acoo
+ type(psb_d_csr_sparse_mat) :: acsr
+ real(psb_dpk_) :: zt(nb),x,y,z
+ integer(psb_ipk_) :: nnz,nr,nlr,i,j,ii,ib,k, partition_
+ integer(psb_lpk_) :: m,n,glob_row,nt
+ integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner
+ ! For 3D partition
+ ! Note: integer control variables going directly into an MPI call
+ ! must be 4 bytes, i.e. psb_mpk_
+ integer(psb_mpk_) :: npdims(3), npp, minfo
+ integer(psb_ipk_) :: npx,npy,npz, iamx,iamy,iamz,mynx,myny,mynz
+ integer(psb_ipk_), allocatable :: bndx(:),bndy(:),bndz(:)
+ ! Process grid
+ integer(psb_ipk_) :: np, iam
+ integer(psb_ipk_) :: icoeff
+ integer(psb_lpk_), allocatable :: irow(:),icol(:),myidx(:)
+ real(psb_dpk_), allocatable :: val(:)
+ ! deltah dimension of each grid cell
+ ! deltat discretization time
+ real(psb_dpk_) :: deltah, sqdeltah, deltah2
+ real(psb_dpk_), parameter :: rhs=dzero,one=done,zero=dzero
+ real(psb_dpk_) :: t0, t1, t2, t3, tasb, talc, ttot, tgen, tcdasb
+ integer(psb_ipk_) :: err_act
+ procedure(d_func_3d), pointer :: f_
+ logical :: tnd_
+ character(len=20) :: name, ch_err,tmpfmt
+
+ info = psb_success_
+ name = 'create_matrix'
+ call psb_erractionsave(err_act)
+
+ call psb_info(ctxt, iam, np)
+
+
+ if (present(f)) then
+ f_ => f
+ else
+ f_ => d_null_func_3d
+ end if
+
+ deltah = done/(idim+2)
+ sqdeltah = deltah*deltah
+ deltah2 = (2*done)* deltah
+
+ if (present(partition)) then
+ if ((1<= partition).and.(partition <= 3)) then
+ partition_ = partition
+ else
+ write(*,*) 'Invalid partition choice ',partition,' defaulting to 3'
+ partition_ = 3
+ end if
+ else
+ partition_ = 3
+ end if
+
+ ! initialize array descriptor and sparse matrix storage. provide an
+ ! estimate of the number of non zeroes
+
+ m = (1_psb_lpk_*idim)*idim*idim
+ n = m
+ nnz = ((n*7)/(np))
+ if(iam == psb_root_) write(psb_out_unit,'("Generating Matrix (size=",i0,")...")')n
+ t0 = psb_wtime()
+ select case(partition_)
+ case(1)
+ ! A BLOCK partition
+ if (present(nrl)) then
+ nr = nrl
+ else
+ !
+ ! Using a simple BLOCK distribution.
+ !
+ nt = (m+np-1)/np
+ nr = max(0,min(nt,m-(iam*nt)))
+ end if
+
+ nt = nr
+ call psb_sum(ctxt,nt)
+ if (nt /= m) then
+ write(psb_err_unit,*) iam, 'Initialization error ',nr,nt,m
+ info = -1
+ call psb_barrier(ctxt)
+ call psb_abort(ctxt)
+ return
+ end if
+
+ !
+ ! First example of use of CDALL: specify for each process a number of
+ ! contiguous rows
+ !
+ call psb_cdall(ctxt,desc_a,info,nl=nr)
+ myidx = desc_a%get_global_indices()
+ nlr = size(myidx)
+
+ case(2)
+ ! A partition defined by the user through IV
+
+ if (present(iv)) then
+ if (size(iv) /= m) then
+ write(psb_err_unit,*) iam, 'Initialization error: wrong IV size',size(iv),m
+ info = -1
+ call psb_barrier(ctxt)
+ call psb_abort(ctxt)
+ return
+ end if
+ else
+ write(psb_err_unit,*) iam, 'Initialization error: IV not present'
+ info = -1
+ call psb_barrier(ctxt)
+ call psb_abort(ctxt)
+ return
+ end if
+
+ !
+ ! Second example of use of CDALL: specify for each row the
+ ! process that owns it
+ !
+ call psb_cdall(ctxt,desc_a,info,vg=iv)
+ myidx = desc_a%get_global_indices()
+ nlr = size(myidx)
+
+ case(3)
+ ! A 3-dimensional partition
+
+ ! A nifty MPI function will split the process list
+ npdims = 0
+ call mpi_dims_create(np,3,npdims,info)
+ npx = npdims(1)
+ npy = npdims(2)
+ npz = npdims(3)
+
+ allocate(bndx(0:npx),bndy(0:npy),bndz(0:npz))
+ ! We can reuse idx2ijk for process indices as well.
+ call idx2ijk(iamx,iamy,iamz,iam,npx,npy,npz,base=0)
+ ! Now let's split the 3D cube in hexahedra
+ call dist1Didx(bndx,idim,npx)
+ mynx = bndx(iamx+1)-bndx(iamx)
+ call dist1Didx(bndy,idim,npy)
+ myny = bndy(iamy+1)-bndy(iamy)
+ call dist1Didx(bndz,idim,npz)
+ mynz = bndz(iamz+1)-bndz(iamz)
+
+ ! How many indices do I own?
+ nlr = mynx*myny*mynz
+ allocate(myidx(nlr))
+ ! Now, let's generate the list of indices I own
+ nr = 0
+ do i=bndx(iamx),bndx(iamx+1)-1
+ do j=bndy(iamy),bndy(iamy+1)-1
+ do k=bndz(iamz),bndz(iamz+1)-1
+ nr = nr + 1
+ call ijk2idx(myidx(nr),i,j,k,idim,idim,idim)
+ end do
+ end do
+ end do
+ if (nr /= nlr) then
+ write(psb_err_unit,*) iam,iamx,iamy,iamz, 'Initialization error: NR vs NLR ',&
+ & nr,nlr,mynx,myny,mynz
+ info = -1
+ call psb_barrier(ctxt)
+ call psb_abort(ctxt)
+ end if
+
+ !
+ ! Third example of use of CDALL: specify for each process
+ ! the set of global indices it owns.
+ !
+ call psb_cdall(ctxt,desc_a,info,vl=myidx)
+
+ case default
+ write(psb_err_unit,*) iam, 'Initialization error: should not get here'
+ info = -1
+ call psb_barrier(ctxt)
+ call psb_abort(ctxt)
+ return
+ end select
+
+
+ if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz,&
+ & dupl=psb_dupl_err_)
+ ! define rhs from boundary conditions; also build initial guess
+ if (info == psb_success_) call psb_geall(xv,desc_a,info)
+ if (info == psb_success_) call psb_geall(bv,desc_a,info)
+
+ call psb_barrier(ctxt)
+ talc = psb_wtime()-t0
+
+ if (info /= psb_success_) then
+ info=psb_err_from_subroutine_
+ ch_err='allocation rout.'
+ call psb_errpush(info,name,a_err=ch_err)
+ goto 9999
+ end if
+
+ ! we build an auxiliary matrix consisting of one row at a
+ ! time; just a small matrix. might be extended to generate
+ ! a bunch of rows per call.
+ !
+ allocate(val(20*nb),irow(20*nb),&
+ &icol(20*nb),stat=info)
+ if (info /= psb_success_ ) then
+ info=psb_err_alloc_dealloc_
+ call psb_errpush(info,name)
+ goto 9999
+ endif
+
+
+ ! loop over rows belonging to current process in a block
+ ! distribution.
+
+ call psb_barrier(ctxt)
+ t1 = psb_wtime()
+ do ii=1, nlr,nb
+ ib = min(nb,nlr-ii+1)
+ icoeff = 1
+ do k=1,ib
+ i=ii+k-1
+ ! local matrix pointer
+ glob_row=myidx(i)
+ ! compute gridpoint coordinates
+ call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim)
+ ! x, y, z coordinates
+ x = (ix-1)*deltah
+ y = (iy-1)*deltah
+ z = (iz-1)*deltah
+ zt(k) = f_(x,y,z)
+ ! internal point: build discretization
+ !
+ ! term depending on (x-1,y,z)
+ !
+ val(icoeff) = -a1(x,y,z)/sqdeltah-b1(x,y,z)/deltah2
+ if (ix == 1) then
+ zt(k) = g(dzero,y,z)*(-val(icoeff)) + zt(k)
+ else
+ call ijk2idx(icol(icoeff),ix-1,iy,iz,idim,idim,idim)
+ irow(icoeff) = glob_row
+ icoeff = icoeff+1
+ endif
+ ! term depending on (x,y-1,z)
+ val(icoeff) = -a2(x,y,z)/sqdeltah-b2(x,y,z)/deltah2
+ if (iy == 1) then
+ zt(k) = g(x,dzero,z)*(-val(icoeff)) + zt(k)
+ else
+ call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim)
+ irow(icoeff) = glob_row
+ icoeff = icoeff+1
+ endif
+ ! term depending on (x,y,z-1)
+ val(icoeff)=-a3(x,y,z)/sqdeltah-b3(x,y,z)/deltah2
+ if (iz == 1) then
+ zt(k) = g(x,y,dzero)*(-val(icoeff)) + zt(k)
+ else
+ call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim)
+ irow(icoeff) = glob_row
+ icoeff = icoeff+1
+ endif
+
+ ! term depending on (x,y,z)
+ val(icoeff)=(2*done)*(a1(x,y,z)+a2(x,y,z)+a3(x,y,z))/sqdeltah &
+ & + c(x,y,z)
+ call ijk2idx(icol(icoeff),ix,iy,iz,idim,idim,idim)
+ irow(icoeff) = glob_row
+ icoeff = icoeff+1
+ ! term depending on (x,y,z+1)
+ val(icoeff)=-a3(x,y,z)/sqdeltah+b3(x,y,z)/deltah2
+ if (iz == idim) then
+ zt(k) = g(x,y,done)*(-val(icoeff)) + zt(k)
+ else
+ call ijk2idx(icol(icoeff),ix,iy,iz+1,idim,idim,idim)
+ irow(icoeff) = glob_row
+ icoeff = icoeff+1
+ endif
+ ! term depending on (x,y+1,z)
+ val(icoeff)=-a2(x,y,z)/sqdeltah+b2(x,y,z)/deltah2
+ if (iy == idim) then
+ zt(k) = g(x,done,z)*(-val(icoeff)) + zt(k)
+ else
+ call ijk2idx(icol(icoeff),ix,iy+1,iz,idim,idim,idim)
+ irow(icoeff) = glob_row
+ icoeff = icoeff+1
+ endif
+ ! term depending on (x+1,y,z)
+ val(icoeff)=-a1(x,y,z)/sqdeltah+b1(x,y,z)/deltah2
+ if (ix==idim) then
+ zt(k) = g(done,y,z)*(-val(icoeff)) + zt(k)
+ else
+ call ijk2idx(icol(icoeff),ix+1,iy,iz,idim,idim,idim)
+ irow(icoeff) = glob_row
+ icoeff = icoeff+1
+ endif
+
+ end do
+ call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info)
+ if(info /= psb_success_) exit
+ call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info)
+ if(info /= psb_success_) exit
+ zt(:)=dzero
+ call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info)
+ if(info /= psb_success_) exit
+ end do
+
+ tgen = psb_wtime()-t1
+ if(info /= psb_success_) then
+ info=psb_err_from_subroutine_
+ ch_err='insert rout.'
+ call psb_errpush(info,name,a_err=ch_err)
+ goto 9999
+ end if
+
+ deallocate(val,irow,icol)
+
+ call psb_barrier(ctxt)
+ t1 = psb_wtime()
+ call psb_cdasb(desc_a,info,mold=imold)
+ tcdasb = psb_wtime()-t1
+ call psb_barrier(ctxt)
+ t1 = psb_wtime()
+ if (info == psb_success_) then
+ if (present(amold)) then
+ call psb_spasb(a,desc_a,info,mold=amold,bld_and=tnd)
+ else
+ call psb_spasb(a,desc_a,info,afmt=afmt,bld_and=tnd)
+ end if
+ end if
+ call psb_barrier(ctxt)
+ if(info /= psb_success_) then
+ info=psb_err_from_subroutine_
+ ch_err='asb rout.'
+ call psb_errpush(info,name,a_err=ch_err)
+ goto 9999
+ end if
+ if (info == psb_success_) call psb_geasb(xv,desc_a,info,mold=vmold)
+ if (info == psb_success_) call psb_geasb(bv,desc_a,info,mold=vmold)
+ if(info /= psb_success_) then
+ info=psb_err_from_subroutine_
+ ch_err='asb rout.'
+ call psb_errpush(info,name,a_err=ch_err)
+ goto 9999
+ end if
+ tasb = psb_wtime()-t1
+ call psb_barrier(ctxt)
+ ttot = psb_wtime() - t0
+
+ call psb_amx(ctxt,talc)
+ call psb_amx(ctxt,tgen)
+ call psb_amx(ctxt,tasb)
+ call psb_amx(ctxt,ttot)
+ if(iam == psb_root_) then
+ tmpfmt = a%get_fmt()
+ write(psb_out_unit,'("The matrix has been generated and assembled in ",a3," format.")')&
+ & tmpfmt
+ write(psb_out_unit,'("-allocation time : ",es12.5)') talc
+ write(psb_out_unit,'("-coeff. gen. time : ",es12.5)') tgen
+ write(psb_out_unit,'("-desc asbly time : ",es12.5)') tcdasb
+ write(psb_out_unit,'("- mat asbly time : ",es12.5)') tasb
+ write(psb_out_unit,'("-total time : ",es12.5)') ttot
+
+ end if
+ call psb_erractionrestore(err_act)
+ return
+
+ 9999 call psb_error_handler(ctxt,err_act)
+
+ return
+ end subroutine psb_d_gen_pde3d
+
+
+end module psb_d_pde3d_mod
+
+
+
+program test
+ use psb_base_mod
+ use psb_ext_mod
+ use psb_oacc_mod
+ use psb_d_pde3d_mod
+
+ implicit none
+ integer(psb_ipk_) :: n, i, info, m, nrm, nz
+ integer(psb_ipk_), parameter :: ntests=80, ngpu=20
+ real(psb_dpk_) :: dot_dev, dot_host
+ type(psb_d_vect_oacc) :: tx, ty
+ type(psb_d_oacc_csr_sparse_mat) :: aacsr
+ real(psb_dpk_) :: t0, t1, t2, t3, csflp, elflp
+ double precision, external :: etime
+
+ type(psb_dspmat_type) :: a
+ type(psb_desc_type) :: desc_a
+ type(psb_d_vect_type) :: xxv, bv
+ type(psb_d_csr_sparse_mat) :: acsr
+ character(len=5) :: afmt='csr'
+ real(psb_dpk_), allocatable :: vv(:), ydev(:), yhost(:)
+ type(psb_ctxt_type) :: ctxt
+ integer(psb_ipk_) :: iam, np, nth, idim
+ integer(psb_epk_) :: neq
+
+ call psb_init(ctxt)
+ call psb_info(ctxt, iam, np)
+
+ write(*,*) 'Enter size :'
+ read(*,*) idim
+ idim = max(1, idim)
+
+ n = idim**3
+ call psb_gen_pde3d(ctxt, idim, a, bv, xxv, desc_a, afmt, info)
+ call a%cp_to(acsr)
+ m = acsr%get_nrows()
+ n = acsr%get_ncols()
+ nz = acsr%get_nzeros()
+ call aacsr%all(m, n, nz, info)
+ aacsr%val = (acsr%val)
+ aacsr%ja = (acsr%ja)
+ aacsr%irp = (acsr%irp)
+ call aacsr%set_host()
+ call aacsr%sync()
+
+ call initialize(n)
+
+ call to_host()
+ t2 = etime()
+ do i = 1, ntests
+ dot_host = h_dot(n)
+ end do
+ t3 = etime()
+
+ call tx%all(n, info)
+ call ty%all(n, info)
+ vv = bv%get_vect()
+ call bv%set_vect(v1)
+ call tx%set_vect(v1)
+ call ty%set_vect(v2)
+ t0 = etime()
+ do i = 1, ntests * ngpu
+ dot_dev = tx%dot_v(n, ty)
+ end do
+ !$acc wait
+ t1 = etime()
+ write(*,*) ' Dot Results : dev:', dot_dev, ' host:', dot_host
+ write(*,*) ' Timing : dev:', t1 - t0, (t1 - t0) / (ntests * ngpu), &
+ ' host:', t3 - t2, (t3 - t2) / ntests
+
+ call a%mv_from(acsr)
+ t2 = etime()
+ do i = 1, ntests
+ call a%spmm(done, bv, dzero, xxv, info)
+ end do
+ t3 = etime()
+ yhost = xxv%get_vect()
+ t0 = etime()
+ do i = 1, ntests * ngpu
+ call aacsr%vect_mv(done, tx, dzero, ty, info)
+ end do
+ !$acc wait
+ t1 = etime()
+ ydev = ty%get_vect()
+ write(*,*) 'Correctness check: ', maxval(abs(ydev(:) - yhost(:)))
+ write(*,*) ' CSR PROD '
+ write(*, '(2(a,f12.3,2x))') ' Timing (ms): '
+ csflp = 2.d0 * nz / ((t1 - t0) / (ntests * ngpu))
+ write(*, '(2(a,f12.3,2x))') ' dev:', 1e3 * (t1 - t0) / (ntests * ngpu), ' :', csflp / 1.d6
+ csflp = 2.d0 * nz / ((t3 - t2) / (ntests))
+ write(*, '(2(a,f12.3,2x))') ' host:', 1e3 * (t3 - t2) / ntests, ' :', csflp / 1.d6
+ write(*,*) 'Done'
+
+ call tx%free(info)
+ call ty%free(info)
+ call finalize_dev()
+ call finalize_host()
+ call psb_exit(ctxt)
+end program test
diff --git a/test/openacc/timers.c b/test/openacc/timers.c
new file mode 100644
index 00000000..12fa4f56
--- /dev/null
+++ b/test/openacc/timers.c
@@ -0,0 +1,97 @@
+#include
+#include
+#include
+
+double wtime()
+{
+ struct timeval tt;
+ struct timezone tz;
+ double temp;
+ if (gettimeofday(&tt,&tz) != 0) {
+ fprintf(stderr,"Fatal error for gettimeofday ??? \n");
+ exit(-1);
+ }
+ temp = ((double)tt.tv_sec)*1.0e3 + ((double)tt.tv_usec)*1.0e-3;
+ return(temp);
+}
+
+double timef_()
+{
+ struct timeval tt;
+ struct timezone tz;
+ double temp;
+ if (gettimeofday(&tt,&tz) != 0) {
+ fprintf(stderr,"Fatal error for gettimeofday ??? \n");
+ exit(-1);
+ }
+ temp = ((double)tt.tv_sec)*1.0e3 + ((double)tt.tv_usec)*1.0e-3;
+ return(temp);
+}
+
+double timef()
+{
+ struct timeval tt;
+ struct timezone tz;
+ double temp;
+ if (gettimeofday(&tt,&tz) != 0) {
+ fprintf(stderr,"Fatal error for gettimeofday ??? \n");
+ exit(-1);
+ }
+ temp = ((double)tt.tv_sec)*1.0e3 + ((double)tt.tv_usec)*1.0e-3;
+ return(temp);
+}
+
+double etime()
+{
+ struct timeval tt;
+ struct timezone tz;
+ double temp;
+ if (gettimeofday(&tt,&tz) != 0) {
+ fprintf(stderr,"Fatal error for gettimeofday ??? \n");
+ exit(-1);
+ }
+ temp = ((double)tt.tv_sec) + ((double)tt.tv_usec)*1.0e-6;
+ return(temp);
+}
+
+double etime_()
+{
+ struct timeval tt;
+ struct timezone tz;
+ double temp;
+ if (gettimeofday(&tt,&tz) != 0) {
+ fprintf(stderr,"Fatal error for gettimeofday ??? \n");
+ exit(-1);
+ }
+ temp = ((double)tt.tv_sec) + ((double)tt.tv_usec)*1.0e-6;
+ return(temp);
+}
+
+double etimef()
+{
+ struct timeval tt;
+ struct timezone tz;
+ double temp;
+ if (gettimeofday(&tt,&tz) != 0) {
+ fprintf(stderr,"Fatal error for gettimeofday ??? \n");
+ exit(-1);
+ }
+ temp = ((double)tt.tv_sec) + ((double)tt.tv_usec)*1.0e-6;
+ return(temp);
+}
+
+double etimef_()
+{
+ struct timeval tt;
+ struct timezone tz;
+ double temp;
+ if (gettimeofday(&tt,&tz) != 0) {
+ fprintf(stderr,"Fatal error for gettimeofday ??? \n");
+ exit(-1);
+ }
+ temp = ((double)tt.tv_sec) + ((double)tt.tv_usec)*1.0e-6;
+ return(temp);
+}
+
+
+
diff --git a/test/openacc/vectoacc.F90 b/test/openacc/vectoacc.F90
new file mode 100644
index 00000000..b50fa317
--- /dev/null
+++ b/test/openacc/vectoacc.F90
@@ -0,0 +1,85 @@
+program vectoacc
+ use psb_base_mod
+ use psb_oacc_mod
+ implicit none
+
+ type(psb_d_vect_oacc) :: v3, v4, v5
+ integer(psb_ipk_) :: info, n, i
+ real(psb_dpk_) :: alpha, beta, result
+ double precision, external :: etime
+
+ real(psb_dpk_) :: dot_host, dot_dev, t_host, t_dev
+ double precision :: time_start, time_end
+ integer(psb_ipk_), parameter :: ntests=80, ngpu=20
+
+ write(*, *) 'Test of the vector operations with OpenACC'
+
+ write(*, *) 'Enter the size of the vectors'
+ read(*, *) n
+ alpha = 2.0
+ beta = 0.5
+
+ call v3%all(n, info)
+ call v4%all(n, info)
+ call v5%all(n, info)
+
+ do i = 1, n
+ v3%v(i) = real(i, psb_dpk_)
+ v4%v(i) = real(n - i, psb_dpk_)
+ end do
+
+ call v3%set_dev()
+ call v4%set_dev()
+
+ call v3%scal(alpha)
+ call v3%sync()
+
+ do i = 1, n
+ if (v3%v(i) /= alpha * real(i, psb_dpk_)) then
+ write(*, *) 'Scal error : index', i
+ end if
+ end do
+ write(*, *) 'Scal test passed'
+
+ result = v3%dot_v(n, v4)
+ call v3%sync()
+ call v4%sync()
+ if (result /= sum(v3%v * v4%v)) then
+ write(*, *) 'Dot_v error, expected result:', sum(v3%v * v4%v), 'instead of :', result
+ end if
+ write(*, *) 'Dot_v test passed'
+
+ result = v3%nrm2(n)
+ call v3%sync()
+ if (result /= sqrt(sum(v3%v ** 2))) then
+ write(*, *) 'nrm2 error, expected result:', sqrt(sum(v3%v ** 2)), 'instead of :', result
+ end if
+ write(*, *) 'nrm2 test passed'
+
+ call v3%set_host()
+ call v4%set_host()
+
+ time_start = etime()
+ do i = 1, ntests
+ dot_host = sum(v3%v * v4%v)
+ end do
+ time_end = etime()
+ t_host = (time_end - time_start) / real(ntests)
+ write(*, *) 'Performance host: ', t_host, ' sec'
+
+ call v3%set_dev()
+ call v4%set_dev()
+ time_start = etime()
+ do i = 1, ntests
+ dot_dev = v3%dot_v(n, v4)
+ end do
+ !$acc wait
+ time_end = etime()
+ t_dev = (time_end - time_start) / real(ntests)
+ write(*, *) 'Performance device: ', t_dev, ' sec'
+
+ call v3%free(info)
+ call v4%free(info)
+ call v5%free(info)
+
+end program vectoacc
From e3a3e39caf31e8c28b0bf4267a06b6d4e9963d93 Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Tue, 9 Jul 2024 10:03:30 +0200
Subject: [PATCH 02/86] Modify configure --enable-openacc
--with-fcopenacc=.....
---
Make.inc.in | 7 +++
Makefile | 11 ++++-
config/pac.m4 | 44 ++++++++++++++++++
configure | 123 ++++++++++++++++++++++++++++++++++++++++++++++++++
configure.ac | 21 +++++++++
5 files changed, 204 insertions(+), 2 deletions(-)
diff --git a/Make.inc.in b/Make.inc.in
index 858ad336..38c8ef86 100755
--- a/Make.inc.in
+++ b/Make.inc.in
@@ -67,6 +67,12 @@ UTILMODNAME=@UTILMODNAME@
CBINDLIBNAME=libpsb_cbind.a
+OACCD=@OACCD@
+OACCLD=@OACCLD@
+FCOPENACC=@FCOPENACC@
+CCOPENACC=@CCOPENACC@
+CXXOPENACC=@CXXOPENACC@
+
CUDAD=@CUDAD@
CUDALD=@CUDALD@
LCUDA=@LCUDA@
@@ -82,6 +88,7 @@ CUDA_SHORT_VERSION=@CUDA_SHORT_VERSION@
NVCC=@CUDA_NVCC@
CUDEFINES=@CUDEFINES@
+
.SUFFIXES: .cu
.cu.o:
$(NVCC) $(CINCLUDES) $(CDEFINES) $(CUDEFINES) -c $<
diff --git a/Makefile b/Makefile
index 95f4cb17..972fd3c6 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
include Make.inc
-all: dirs based precd kryld utild cbindd extd $(CUDAD) libd
+all: dirs based precd kryld utild cbindd extd $(CUDAD) $(OACCD) libd
@echo "====================================="
@echo "PSBLAS libraries Compilation Successful."
@@ -14,9 +14,10 @@ utild: based
kryld: precd
extd: based
cudad: extd
+oaccd: extd
cbindd: based precd kryld utild
-libd: based precd kryld utild cbindd extd $(CUDALD)
+libd: based precd kryld utild cbindd extd $(CUDALD) $(OACCLD)
$(MAKE) -C base lib
$(MAKE) -C prec lib
$(MAKE) -C krylov lib
@@ -25,6 +26,8 @@ libd: based precd kryld utild cbindd extd $(CUDALD)
$(MAKE) -C ext lib
cudald: cudad
$(MAKE) -C cuda lib
+oaccld: oaccd
+ $(MAKE) -C openacc lib
based:
@@ -41,6 +44,8 @@ extd: based
$(MAKE) -C ext objs
cudad: based extd
$(MAKE) -C cuda objs
+oaccd: based extd
+ $(MAKE) -C openacc objs
install: all
@@ -67,6 +72,7 @@ clean:
$(MAKE) -C cbind clean
$(MAKE) -C ext clean
$(MAKE) -C cuda clean
+ $(MAKE) -C openacc clean
check: all
make check -C test/serial
@@ -84,6 +90,7 @@ veryclean: cleanlib
cd cbind && $(MAKE) veryclean
cd ext && $(MAKE) veryclean
cd cuda && $(MAKE) veryclean
+ cd openacc && $(MAKE) veryclean
cd test/fileread && $(MAKE) clean
cd test/pargen && $(MAKE) clean
cd test/util && $(MAKE) clean
diff --git a/config/pac.m4 b/config/pac.m4
index 0d22392f..d74a6f24 100644
--- a/config/pac.m4
+++ b/config/pac.m4
@@ -2267,3 +2267,47 @@ CPPFLAGS="$SAVE_CPPFLAGS"
])dnl
+
+dnl @synopsis PAC_ARG_OPENACC
+dnl
+dnl Test for --enable-openacc
+dnl
+dnl
+dnl
+dnl Example use:
+dnl
+dnl
+dnl @author Salvatore Filippone
+dnl
+AC_DEFUN([PAC_ARG_OPENACC],
+[AC_MSG_CHECKING([whether we want openacc ])
+AC_ARG_ENABLE(openacc,
+AS_HELP_STRING([--enable-openacc],
+[Specify whether to enable openacc. ]),
+[
+pac_cv_openacc="yes";
+]
+dnl ,
+dnl [pac_cv_openacc="no";]
+ )
+if test x"$pac_cv_openacc" == x"yes" ; then
+ AC_MSG_RESULT([yes.])
+# AC_LANG_PUSH([Fortran])
+# AC_OPENACC()
+# pac_cv_openacc_fcopt="$OPENACC_FCFLAGS";
+# AC_LANG_POP()
+# AC_LANG_PUSH([C])
+# AC_OPENACC()
+# pac_cv_openacc_ccopt="$OPENACC_CFLAGS";
+# AC_LANG_POP()
+# AC_LANG_PUSH([C++])
+# AC_OPENACC()
+# pac_cv_openacc_cxxopt="$OPENACC_CXXFLAGS";
+# AC_LANG_POP()
+else
+ pac_cv_openacc="no";
+ AC_MSG_RESULT([no.])
+fi
+]
+)
+
diff --git a/configure b/configure
index b7e5f329..15e3670d 100755
--- a/configure
+++ b/configure
@@ -667,6 +667,11 @@ CUDA_DIR
EXTRALDLIBS
SPGPU_LIBS
SPGPU_FLAGS
+CXXOPENACC
+CCOPENACC
+FCOPENACC
+OACCLD
+OACCD
METISINCFILE
UTILLIBNAME
METHDLIBNAME
@@ -840,6 +845,10 @@ with_amdincdir
with_amdlibdir
with_cuda
with_cudacc
+enable_openacc
+with_ccopenacc
+with_cxxopenacc
+with_fcopenacc
'
ac_precious_vars='build_alias
host_alias
@@ -1490,6 +1499,7 @@ Optional Features:
--disable-silent-rules verbose build output (undo: "make V=0")
--enable-openmp Specify whether to enable openmp.
--disable-openmp do not use OpenMP
+ --enable-openacc Specify whether to enable openacc.
Optional Packages:
--with-PACKAGE[=ARG] use PACKAGE [ARG=yes]
@@ -1535,6 +1545,12 @@ Optional Packages:
--with-cuda=DIR Specify the CUDA install directory.
--with-cudacc A comma-separated list of CCs to compile to, for
example, --with-cudacc=50,60,70,75
+ --with-ccopenacc additional [CCOPENACC] flags to be added: will
+ prepend to [CCOPENACC]
+ --with-cxxopenacc additional [CXXOPENACC] flags to be added: will
+ prepend to [CXXOPENACC]
+ --with-fcopenacc additional [FCOPENACC] flags to be added: will
+ prepend to [FCOPENACC]
Some influential environment variables:
FC Fortran compiler command
@@ -10867,6 +10883,106 @@ printf "%s\n" "$as_me: For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_
CUDA_LIBS="";
fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether we want openacc " >&5
+printf %s "checking whether we want openacc ... " >&6; }
+# Check whether --enable-openacc was given.
+if test ${enable_openacc+y}
+then :
+ enableval=$enable_openacc;
+pac_cv_openacc="yes";
+
+
+fi
+
+if test x"$pac_cv_openacc" == x"yes" ; then
+ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes." >&5
+printf "%s\n" "yes." >&6; }
+# AC_LANG_PUSH([Fortran])
+# AC_OPENACC()
+# pac_cv_openacc_fcopt="$OPENACC_FCFLAGS";
+# AC_LANG_POP()
+# AC_LANG_PUSH([C])
+# AC_OPENACC()
+# pac_cv_openacc_ccopt="$OPENACC_CFLAGS";
+# AC_LANG_POP()
+# AC_LANG_PUSH([C++])
+# AC_OPENACC()
+# pac_cv_openacc_cxxopt="$OPENACC_CXXFLAGS";
+# AC_LANG_POP()
+else
+ pac_cv_openacc="no";
+ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no." >&5
+printf "%s\n" "no." >&6; }
+fi
+
+
+if test x"$pac_cv_openacc" == x"yes" ; then
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether additional CCOPENACC flags should be added (should be invoked only once)" >&5
+printf %s "checking whether additional CCOPENACC flags should be added (should be invoked only once)... " >&6; }
+
+# Check whether --with-ccopenacc was given.
+if test ${with_ccopenacc+y}
+then :
+ withval=$with_ccopenacc;
+CCOPENACC="${withval} ${CCOPENACC}"
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: CCOPENACC = ${CCOPENACC}" >&5
+printf "%s\n" "CCOPENACC = ${CCOPENACC}" >&6; }
+
+else $as_nop
+ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+
+fi
+
+
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether additional CXXOPENACC flags should be added (should be invoked only once)" >&5
+printf %s "checking whether additional CXXOPENACC flags should be added (should be invoked only once)... " >&6; }
+
+# Check whether --with-cxxopenacc was given.
+if test ${with_cxxopenacc+y}
+then :
+ withval=$with_cxxopenacc;
+CXXOPENACC="${withval} ${CXXOPENACC}"
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: CXXOPENACC = ${CXXOPENACC}" >&5
+printf "%s\n" "CXXOPENACC = ${CXXOPENACC}" >&6; }
+
+else $as_nop
+ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+
+fi
+
+
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether additional FCOPENACC flags should be added (should be invoked only once)" >&5
+printf %s "checking whether additional FCOPENACC flags should be added (should be invoked only once)... " >&6; }
+
+# Check whether --with-fcopenacc was given.
+if test ${with_fcopenacc+y}
+then :
+ withval=$with_fcopenacc;
+FCOPENACC="${withval} ${FCOPENACC}"
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: FCOPENACC = ${FCOPENACC}" >&5
+printf "%s\n" "FCOPENACC = ${FCOPENACC}" >&6; }
+
+else $as_nop
+ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+
+fi
+
+
+
+ OACCD=oaccd;
+ OACCLD=oaccld;
+
+ #FCOPT="$FCOPT $FCOPENACC";
+ #CCOPT="$CCOPT $CCOPENACC"
+ #CXXOPT="$CXXOPT $CXXOPENACC"
+ #FLINK="$FLINK $FCOPENACC";
+fi
@@ -10957,6 +11073,13 @@ FDEFINES=$(PSBFDEFINES) $(CUDA_DEFINES)
+
+
+
+
+
+
+
diff --git a/configure.ac b/configure.ac
index 10f9e2f4..ed4f4acc 100755
--- a/configure.ac
+++ b/configure.ac
@@ -843,6 +843,20 @@ if test "x$pac_cv_ipk_size" != "x4"; then
CUDA_LIBS="";
fi
+PAC_ARG_OPENACC()
+if test x"$pac_cv_openacc" == x"yes" ; then
+ PAC_ARG_WITH_FLAGS(ccopenacc,CCOPENACC)
+ PAC_ARG_WITH_FLAGS(cxxopenacc,CXXOPENACC)
+ PAC_ARG_WITH_FLAGS(fcopenacc,FCOPENACC)
+
+ OACCD=oaccd;
+ OACCLD=oaccld;
+
+ #FCOPT="$FCOPT $FCOPENACC";
+ #CCOPT="$CCOPT $CCOPENACC"
+ #CXXOPT="$CXXOPT $CXXOPENACC"
+ #FLINK="$FLINK $FCOPENACC";
+fi
@@ -927,6 +941,12 @@ AC_SUBST(PRECLIBNAME)
AC_SUBST(METHDLIBNAME)
AC_SUBST(UTILLIBNAME)
AC_SUBST(METISINCFILE)
+AC_SUBST(OACCD)
+AC_SUBST(OACCLD)
+AC_SUBST(FCOPENACC)
+AC_SUBST(CCOPENACC)
+AC_SUBST(CXXOPENACC)
+
AC_SUBST(SPGPU_FLAGS)
AC_SUBST(SPGPU_LIBS)
dnl AC_SUBST(SPGPU_DIR)
@@ -944,6 +964,7 @@ AC_SUBST(CUDEFINES)
AC_SUBST(CUDAD)
AC_SUBST(CUDALD)
AC_SUBST(LCUDA)
+
###############################################################################
# the following files will be created by Automake
From 93c9df0277619a4a3d8d56d4acfd99cb717a79a4 Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Tue, 9 Jul 2024 10:03:57 +0200
Subject: [PATCH 03/86] Adjust Makefiles & source
---
openacc/Makefile | 61 +-
openacc/psb_d_oacc_csr_mat_mod.F90 | 2 +-
openacc/psb_d_oacc_vect_mod.F90 | 1702 ++++++++++++++--------------
3 files changed, 888 insertions(+), 877 deletions(-)
diff --git a/openacc/Makefile b/openacc/Makefile
index 3590249c..3327610f 100644
--- a/openacc/Makefile
+++ b/openacc/Makefile
@@ -1,11 +1,10 @@
-.SUFFIXES:
-.SUFFIXES: .F90 .f90 .o .s .c
+include ../Make.inc
# Compilers and flags
-CC=mpicc
-FC=mpif90
-FCOPT=-O0 -march=native
-OFFLOAD=-fopenacc -foffload=nvptx-none="-march=sm_70"
+#CC=mpicc
+#FC=mpif90
+#FCOPT=-O0 -march=native
+#OFFLOAD=-fopenacc -foffload=nvptx-none="-march=sm_70"
# Directories
LIBDIR=../lib
@@ -14,35 +13,49 @@ MODDIR=../modules
IMPLDIR=./impl # Adding the impl directory
# Include and library paths
-INCLUDES=-I$(LIBDIR) -I$(INCDIR) -I$(MODDIR) -I$(IMPLDIR)
-LIBS=-L$(LIBDIR) -lpsb_util -lpsb_ext -lpsb_base -lopenblas -lmetis
+INCLUDES=-I$(LIBDIR) -I$(INCDIR) -I$(MODDIR)
+FINCLUDES=$(FMFLAG). $(FMFLAG)$(INCDIR) $(FMFLAG)$(MODDIR) $(FIFLAG).
+CINCLUDES=
+#LIBS=-L$(LIBDIR) -lpsb_util -lpsb_ext -lpsb_base -lopenblas -lmetis
# Source files
FOBJS= psb_i_oacc_vect_mod.o psb_d_oacc_vect_mod.o \
- psb_oacc_mod.o psb_d_oacc_csr_mat_mod.o \
- impl/psb_d_oacc_csr_vect_mv.o
+ psb_oacc_mod.o psb_d_oacc_csr_mat_mod.o
# Library name
LIBNAME=libpsb_openacc.a
-# Rules
-all: $(LIBNAME)
+OBJS=$(COBJS) $(FOBJS)
-$(LIBNAME): $(FOBJS)
- ar cr $(LIBNAME) $(FOBJS)
+# Rules
+lib: objs ilib
+ ar cur $(LIBNAME) $(OBJS)
/bin/cp -p $(LIBNAME) $(LIBDIR)
-clean:
- /bin/rm -fr *.o $(LIBNAME) *.mod impl/*.o
+objs: $(OBJS) iobjs
+ /bin/cp -p *$(.mod) $(MODDIR)
-.f90.o:
- $(FC) $(FCOPT) $(OFFLOAD) $(INCLUDES) -c $< -o $@
+iobjs: $(OBJS)
+ $(MAKE) -C impl objs
-.c.o:
- $(CC) -c $< -o $@
+ilib: $(OBJS)
+ $(MAKE) -C impl lib
-.F90.o:
- $(FC) $(FCOPT) $(OFFLOAD) $(INCLUDES) -c $< -o $@
+psb_oacc_mod.o : psb_i_oacc_vect_mod.o psb_d_oacc_vect_mod.o \
+ psb_d_oacc_csr_mat_mod.o
+clean: cclean iclean
+ /bin/rm -f $(FOBJS) *$(.mod) *.a
+veryclean: clean
+cclean:
+ /bin/rm -f $(COBJS)
+iclean:
+ $(MAKE) -C impl clean
-.F90.s:
- $(FC) $(FCOPT) $(INCLUDES) -c -S $< -o $@
+.c.o:
+ $(CC) $(CCOPT) $(CCOPENACC) $(CINCLUDES) $(CDEFINES) -c $< -o $@
+.f90.o:
+ $(FC) $(FCOPT) $(FCOPENACC) $(FINCLUDES) -c $< -o $@
+.F90.o:
+ $(FC) $(FCOPT) $(FCOPENACC) $(FINCLUDES) $(FDEFINES) -c $< -o $@
+.cpp.o:
+ $(CXX) $(CXXOPT) $(CXXOPENACC) $(CXXINCLUDES) $(CXXDEFINES) -c $< -o $@
diff --git a/openacc/psb_d_oacc_csr_mat_mod.F90 b/openacc/psb_d_oacc_csr_mat_mod.F90
index c6b7e3d0..d9c8a2a6 100644
--- a/openacc/psb_d_oacc_csr_mat_mod.F90
+++ b/openacc/psb_d_oacc_csr_mat_mod.F90
@@ -3,7 +3,7 @@ module psb_d_oacc_csr_mat_mod
use iso_c_binding
use psb_d_mat_mod
use psb_d_oacc_vect_mod
- use oaccsparse_mod
+ !use oaccsparse_mod
integer(psb_ipk_), parameter, private :: is_host = -1
integer(psb_ipk_), parameter, private :: is_sync = 0
diff --git a/openacc/psb_d_oacc_vect_mod.F90 b/openacc/psb_d_oacc_vect_mod.F90
index ac5428f3..eda804ce 100644
--- a/openacc/psb_d_oacc_vect_mod.F90
+++ b/openacc/psb_d_oacc_vect_mod.F90
@@ -3,9 +3,9 @@ module psb_d_oacc_vect_mod
use psb_const_mod
use psb_error_mod
use psb_d_vect_mod
- use psb_i_oacc_vect_mod
use psb_i_vect_mod
-
+ use psb_i_oacc_vect_mod
+
integer(psb_ipk_), parameter, private :: is_host = -1
integer(psb_ipk_), parameter, private :: is_sync = 0
integer(psb_ipk_), parameter, private :: is_dev = 1
@@ -14,49 +14,49 @@ module psb_d_oacc_vect_mod
integer :: state = is_host
contains
- procedure, pass(x) :: get_nrows => d_oacc_get_nrows
- procedure, nopass :: get_fmt => d_oacc_get_fmt
-
- procedure, pass(x) :: all => d_oacc_vect_all
- procedure, pass(x) :: zero => d_oacc_zero
- procedure, pass(x) :: asb_m => d_oacc_asb_m
+!!$ procedure, pass(x) :: get_nrows => d_oacc_get_nrows
+!!$ procedure, nopass :: get_fmt => d_oacc_get_fmt
+!!$
+!!$ procedure, pass(x) :: all => d_oacc_vect_all
+!!$ procedure, pass(x) :: zero => d_oacc_zero
+!!$ procedure, pass(x) :: asb_m => d_oacc_asb_m
procedure, pass(x) :: sync => d_oacc_sync
procedure, pass(x) :: sync_space => d_oacc_sync_space
- procedure, pass(x) :: bld_x => d_oacc_bld_x
- procedure, pass(x) :: bld_mn => d_oacc_bld_mn
- procedure, pass(x) :: free => d_oacc_vect_free
- procedure, pass(x) :: ins_a => d_oacc_ins_a
- procedure, pass(x) :: ins_v => d_oacc_ins_v
+!!$ procedure, pass(x) :: bld_x => d_oacc_bld_x
+!!$ procedure, pass(x) :: bld_mn => d_oacc_bld_mn
+!!$ procedure, pass(x) :: free => d_oacc_vect_free
+!!$ procedure, pass(x) :: ins_a => d_oacc_ins_a
+!!$ procedure, pass(x) :: ins_v => d_oacc_ins_v
procedure, pass(x) :: is_host => d_oacc_is_host
procedure, pass(x) :: is_dev => d_oacc_is_dev
procedure, pass(x) :: is_sync => d_oacc_is_sync
procedure, pass(x) :: set_host => d_oacc_set_host
procedure, pass(x) :: set_dev => d_oacc_set_dev
procedure, pass(x) :: set_sync => d_oacc_set_sync
- procedure, pass(x) :: set_scal => d_oacc_set_scal
-
- procedure, pass(x) :: gthzv_x => d_oacc_gthzv_x
- procedure, pass(x) :: gthzbuf_x => d_oacc_gthzbuf
- procedure, pass(y) :: sctb => d_oacc_sctb
- procedure, pass(y) :: sctb_x => d_oacc_sctb_x
- procedure, pass(y) :: sctb_buf => d_oacc_sctb_buf
-
- procedure, pass(x) :: get_size => oacc_get_size
- procedure, pass(x) :: dot_v => d_oacc_vect_dot
- procedure, pass(x) :: dot_a => d_oacc_dot_a
- procedure, pass(y) :: axpby_v => d_oacc_axpby_v
- procedure, pass(y) :: axpby_a => d_oacc_axpby_a
- procedure, pass(z) :: abgdxyz => d_oacc_abgdxyz
- procedure, pass(y) :: mlt_v => d_oacc_mlt_v
- procedure, pass(y) :: mlt_a => d_oacc_mlt_a
- procedure, pass(z) :: mlt_a_2 => d_oacc_mlt_a_2
- procedure, pass(z) :: mlt_v_2 => d_oacc_mlt_v_2
- procedure, pass(x) :: scal => d_oacc_scal
- procedure, pass(x) :: nrm2 => d_oacc_nrm2
- procedure, pass(x) :: amax => d_oacc_amax
- procedure, pass(x) :: asum => d_oacc_asum
+!!$ procedure, pass(x) :: set_scal => d_oacc_set_scal
+!!$
+!!$ procedure, pass(x) :: gthzv_x => d_oacc_gthzv_x
+!!$ procedure, pass(x) :: gthzbuf_x => d_oacc_gthzbuf
+!!$ procedure, pass(y) :: sctb => d_oacc_sctb
+!!$ procedure, pass(y) :: sctb_x => d_oacc_sctb_x
+!!$ procedure, pass(y) :: sctb_buf => d_oacc_sctb_buf
+!!$
+!!$ procedure, pass(x) :: get_size => oacc_get_size
+!!$ procedure, pass(x) :: dot_v => d_oacc_vect_dot
+!!$ procedure, pass(x) :: dot_a => d_oacc_dot_a
+!!$ procedure, pass(y) :: axpby_v => d_oacc_axpby_v
+!!$ procedure, pass(y) :: axpby_a => d_oacc_axpby_a
+!!$ procedure, pass(z) :: abgdxyz => d_oacc_abgdxyz
+!!$ procedure, pass(y) :: mlt_v => d_oacc_mlt_v
+!!$ procedure, pass(y) :: mlt_a => d_oacc_mlt_a
+!!$ procedure, pass(z) :: mlt_a_2 => d_oacc_mlt_a_2
+!!$ procedure, pass(z) :: mlt_v_2 => d_oacc_mlt_v_2
+!!$ procedure, pass(x) :: scal => d_oacc_scal
+!!$ procedure, pass(x) :: nrm2 => d_oacc_nrm2
+!!$ procedure, pass(x) :: amax => d_oacc_amax
+!!$ procedure, pass(x) :: asum => d_oacc_asum
procedure, pass(x) :: absval1 => d_oacc_absval1
- procedure, pass(x) :: absval2 => d_oacc_absval2
+!!$ procedure, pass(x) :: absval2 => d_oacc_absval2
end type psb_d_vect_oacc
@@ -78,729 +78,727 @@ contains
call x%set_dev()
end subroutine d_oacc_absval1
- subroutine d_oacc_absval2(x, y)
- implicit none
- class(psb_d_vect_oacc), intent(inout) :: x
- class(psb_d_base_vect_type), intent(inout) :: y
- integer(psb_ipk_) :: n
- integer(psb_ipk_) :: i
-
- n = min(size(x%v), size(y%v))
- select type (yy => y)
- class is (psb_d_vect_oacc)
- if (x%is_host()) call x%sync()
- if (yy%is_host()) call yy%sync()
- !$acc parallel loop
- do i = 1, n
- yy%v(i) = abs(x%v(i))
- end do
- class default
- if (x%is_dev()) call x%sync()
- if (y%is_dev()) call y%sync()
- call x%psb_d_base_vect_type%absval(y)
- end select
- end subroutine d_oacc_absval2
-
-
-
- subroutine d_oacc_scal(alpha, x)
- implicit none
- class(psb_d_vect_oacc), intent(inout) :: x
- real(psb_dpk_), intent(in) :: alpha
- integer(psb_ipk_) :: info
- integer(psb_ipk_) :: i
-
- if (x%is_host()) call x%sync_space()
- !$acc parallel loop
- do i = 1, size(x%v)
- x%v(i) = alpha * x%v(i)
- end do
- call x%set_dev()
- end subroutine d_oacc_scal
-
- function d_oacc_nrm2(n, x) result(res)
- implicit none
- class(psb_d_vect_oacc), intent(inout) :: x
- integer(psb_ipk_), intent(in) :: n
- real(psb_dpk_) :: res
- integer(psb_ipk_) :: info
- real(psb_dpk_) :: sum
- integer(psb_ipk_) :: i
-
- if (x%is_host()) call x%sync_space()
- sum = 0.0
- !$acc parallel loop reduction(+:sum)
- do i = 1, n
- sum = sum + x%v(i) * x%v(i)
- end do
- res = sqrt(sum)
- end function d_oacc_nrm2
-
- function d_oacc_amax(n, x) result(res)
- implicit none
- class(psb_d_vect_oacc), intent(inout) :: x
- integer(psb_ipk_), intent(in) :: n
- real(psb_dpk_) :: res
- integer(psb_ipk_) :: info
- real(psb_dpk_) :: max_val
- integer(psb_ipk_) :: i
-
- if (x%is_host()) call x%sync_space()
- max_val = -huge(0.0)
- !$acc parallel loop reduction(max:max_val)
- do i = 1, n
- if (x%v(i) > max_val) max_val = x%v(i)
- end do
- res = max_val
- end function d_oacc_amax
-
- function d_oacc_asum(n, x) result(res)
- implicit none
- class(psb_d_vect_oacc), intent(inout) :: x
- integer(psb_ipk_), intent(in) :: n
- real(psb_dpk_) :: res
- integer(psb_ipk_) :: info
- real(psb_dpk_) :: sum
- integer(psb_ipk_) :: i
-
- if (x%is_host()) call x%sync_space()
- sum = 0.0
- !$acc parallel loop reduction(+:sum)
- do i = 1, n
- sum = sum + abs(x%v(i))
- end do
- res = sum
- end function d_oacc_asum
-
-
- subroutine d_oacc_mlt_v(x, y, info)
- use psi_serial_mod
- implicit none
- class(psb_d_base_vect_type), intent(inout) :: x
- class(psb_d_vect_oacc), intent(inout) :: y
- integer(psb_ipk_), intent(out) :: info
-
- integer(psb_ipk_) :: i, n
-
- info = 0
- n = min(x%get_nrows(), y%get_nrows())
- select type(xx => x)
- type is (psb_d_base_vect_type)
- if (y%is_dev()) call y%sync()
- !$acc parallel loop
- do i = 1, n
- y%v(i) = y%v(i) * xx%v(i)
- end do
- call y%set_host()
- class default
- if (xx%is_dev()) call xx%sync()
- if (y%is_dev()) call y%sync()
- !$acc parallel loop
- do i = 1, n
- y%v(i) = y%v(i) * xx%v(i)
- end do
- call y%set_host()
- end select
- end subroutine d_oacc_mlt_v
-
- subroutine d_oacc_mlt_a(x, y, info)
- use psi_serial_mod
- implicit none
- real(psb_dpk_), intent(in) :: x(:)
- class(psb_d_vect_oacc), intent(inout) :: y
- integer(psb_ipk_), intent(out) :: info
- integer(psb_ipk_) :: i, n
-
- info = 0
- if (y%is_dev()) call y%sync_space()
- !$acc parallel loop
- do i = 1, size(x)
- y%v(i) = y%v(i) * x(i)
- end do
- call y%set_host()
- end subroutine d_oacc_mlt_a
-
- subroutine d_oacc_mlt_a_2(alpha, x, y, beta, z, info)
- use psi_serial_mod
- implicit none
- real(psb_dpk_), intent(in) :: alpha, beta
- real(psb_dpk_), intent(in) :: x(:)
- real(psb_dpk_), intent(in) :: y(:)
- class(psb_d_vect_oacc), intent(inout) :: z
- integer(psb_ipk_), intent(out) :: info
- integer(psb_ipk_) :: i, n
-
- info = 0
- if (z%is_dev()) call z%sync_space()
- !$acc parallel loop
- do i = 1, size(x)
- z%v(i) = alpha * x(i) * y(i) + beta * z%v(i)
- end do
- call z%set_host()
- end subroutine d_oacc_mlt_a_2
-
- subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
- use psi_serial_mod
- use psb_string_mod
- implicit none
- real(psb_dpk_), intent(in) :: alpha, beta
- class(psb_d_base_vect_type), intent(inout) :: x
- class(psb_d_base_vect_type), intent(inout) :: y
- class(psb_d_vect_oacc), intent(inout) :: z
- integer(psb_ipk_), intent(out) :: info
- character(len=1), intent(in), optional :: conjgx, conjgy
- integer(psb_ipk_) :: i, n
- logical :: conjgx_, conjgy_
-
- conjgx_ = .false.
- conjgy_ = .false.
- if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C')
- if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C')
-
- n = min(x%get_nrows(), y%get_nrows(), z%get_nrows())
-
- info = 0
- select type(xx => x)
- class is (psb_d_vect_oacc)
- select type (yy => y)
- class is (psb_d_vect_oacc)
- if (xx%is_host()) call xx%sync_space()
- if (yy%is_host()) call yy%sync_space()
- if ((beta /= dzero) .and. (z%is_host())) call z%sync_space()
- !$acc parallel loop
- do i = 1, n
- z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
- end do
- call z%set_dev()
- class default
- if (xx%is_dev()) call xx%sync_space()
- if (yy%is_dev()) call yy%sync()
- if ((beta /= dzero) .and. (z%is_dev())) call z%sync_space()
- !$acc parallel loop
- do i = 1, n
- z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
- end do
- call z%set_host()
- end select
- class default
- if (x%is_dev()) call x%sync()
- if (y%is_dev()) call y%sync()
- if ((beta /= dzero) .and. (z%is_dev())) call z%sync_space()
- !$acc parallel loop
- do i = 1, n
- z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i)
- end do
- call z%set_host()
- end select
- end subroutine d_oacc_mlt_v_2
-
-
- subroutine d_oacc_axpby_v(m, alpha, x, beta, y, info)
- use psi_serial_mod
- implicit none
- integer(psb_ipk_), intent(in) :: m
- class(psb_d_base_vect_type), intent(inout) :: x
- class(psb_d_vect_oacc), intent(inout) :: y
- real(psb_dpk_), intent(in) :: alpha, beta
- integer(psb_ipk_), intent(out) :: info
- integer(psb_ipk_) :: nx, ny, i
-
- info = psb_success_
-
- select type(xx => x)
- type is (psb_d_vect_oacc)
- if ((beta /= dzero) .and. y%is_host()) call y%sync_space()
- if (xx%is_host()) call xx%sync_space()
- nx = size(xx%v)
- ny = size(y%v)
- if ((nx < m) .or. (ny < m)) then
- info = psb_err_internal_error_
- else
- !$acc parallel loop
- do i = 1, m
- y%v(i) = alpha * xx%v(i) + beta * y%v(i)
- end do
- end if
- call y%set_dev()
- class default
- if ((alpha /= dzero) .and. (x%is_dev())) call x%sync()
- call y%axpby(m, alpha, x%v, beta, info)
- end select
- end subroutine d_oacc_axpby_v
-
- subroutine d_oacc_axpby_a(m, alpha, x, beta, y, info)
- use psi_serial_mod
- implicit none
- integer(psb_ipk_), intent(in) :: m
- real(psb_dpk_), intent(in) :: x(:)
- class(psb_d_vect_oacc), intent(inout) :: y
- real(psb_dpk_), intent(in) :: alpha, beta
- integer(psb_ipk_), intent(out) :: info
- integer(psb_ipk_) :: i
-
- if ((beta /= dzero) .and. (y%is_dev())) call y%sync_space()
- !$acc parallel loop
- do i = 1, m
- y%v(i) = alpha * x(i) + beta * y%v(i)
- end do
- call y%set_host()
- end subroutine d_oacc_axpby_a
-
- subroutine d_oacc_abgdxyz(m, alpha, beta, gamma, delta, x, y, z, info)
- use psi_serial_mod
- implicit none
- integer(psb_ipk_), intent(in) :: m
- class(psb_d_base_vect_type), intent(inout) :: x
- class(psb_d_base_vect_type), intent(inout) :: y
- class(psb_d_vect_oacc), intent(inout) :: z
- real(psb_dpk_), intent(in) :: alpha, beta, gamma, delta
- integer(psb_ipk_), intent(out) :: info
- integer(psb_ipk_) :: nx, ny, nz, i
- logical :: gpu_done
-
- info = psb_success_
- gpu_done = .false.
-
- select type(xx => x)
- class is (psb_d_vect_oacc)
- select type(yy => y)
- class is (psb_d_vect_oacc)
- select type(zz => z)
- class is (psb_d_vect_oacc)
- if ((beta /= dzero) .and. yy%is_host()) call yy%sync_space()
- if ((delta /= dzero) .and. zz%is_host()) call zz%sync_space()
- if (xx%is_host()) call xx%sync_space()
- nx = size(xx%v)
- ny = size(yy%v)
- nz = size(zz%v)
- if ((nx < m) .or. (ny < m) .or. (nz < m)) then
- info = psb_err_internal_error_
- else
- !$acc parallel loop
- do i = 1, m
- yy%v(i) = alpha * xx%v(i) + beta * yy%v(i)
- zz%v(i) = gamma * yy%v(i) + delta * zz%v(i)
- end do
- end if
- call yy%set_dev()
- call zz%set_dev()
- gpu_done = .true.
- end select
- end select
- end select
-
- if (.not. gpu_done) then
- if (x%is_host()) call x%sync()
- if (y%is_host()) call y%sync()
- if (z%is_host()) call z%sync()
- call y%axpby(m, alpha, x, beta, info)
- call z%axpby(m, gamma, y, delta, info)
- end if
- end subroutine d_oacc_abgdxyz
-
-
- subroutine d_oacc_sctb_buf(i, n, idx, beta, y)
- use psb_base_mod
- implicit none
- integer(psb_ipk_) :: i, n
- class(psb_i_base_vect_type) :: idx
- real(psb_dpk_) :: beta
- class(psb_d_vect_oacc) :: y
- integer(psb_ipk_) :: info
-
- if (.not.allocated(y%combuf)) then
- call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
- return
- end if
-
- select type(ii => idx)
- class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space(info)
- if (y%is_host()) call y%sync_space()
-
- !$acc parallel loop
- do i = 1, n
- y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i)
- end do
-
- class default
- !$acc parallel loop
- do i = 1, n
- y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i)
- end do
- end select
- end subroutine d_oacc_sctb_buf
-
- subroutine d_oacc_sctb_x(i, n, idx, x, beta, y)
- use psb_base_mod
- implicit none
- integer(psb_ipk_):: i, n
- class(psb_i_base_vect_type) :: idx
- real(psb_dpk_) :: beta, x(:)
- class(psb_d_vect_oacc) :: y
- integer(psb_ipk_) :: info, ni
-
- select type(ii => idx)
- class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space(info)
- class default
- call psb_errpush(info, 'd_oacc_sctb_x')
- return
- end select
-
- if (y%is_host()) call y%sync_space()
-
- !$acc parallel loop
- do i = 1, n
- y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i)
- end do
-
- call y%set_dev()
- end subroutine d_oacc_sctb_x
-
-
-
- subroutine d_oacc_sctb(n, idx, x, beta, y)
- use psb_base_mod
- implicit none
- integer(psb_ipk_) :: n
- integer(psb_ipk_) :: idx(:)
- real(psb_dpk_) :: beta, x(:)
- class(psb_d_vect_oacc) :: y
- integer(psb_ipk_) :: info
- integer(psb_ipk_) :: i
-
- if (n == 0) return
- if (y%is_dev()) call y%sync_space()
-
- !$acc parallel loop
- do i = 1, n
- y%v(idx(i)) = beta * y%v(idx(i)) + x(i)
- end do
-
- call y%set_host()
- end subroutine d_oacc_sctb
-
-
- subroutine d_oacc_gthzbuf(i, n, idx, x)
- use psb_base_mod
- implicit none
- integer(psb_ipk_) :: i, n
- class(psb_i_base_vect_type) :: idx
- class(psb_d_vect_oacc) :: x
- integer(psb_ipk_) :: info
-
- info = 0
- if (.not.allocated(x%combuf)) then
- call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
- return
- end if
-
- select type(ii => idx)
- class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space(info)
- class default
- call psb_errpush(info, 'd_oacc_gthzbuf')
- return
- end select
-
- if (x%is_host()) call x%sync_space()
-
- !$acc parallel loop
- do i = 1, n
- x%combuf(i) = x%v(idx%v(i))
- end do
- end subroutine d_oacc_gthzbuf
-
- subroutine d_oacc_gthzv_x(i, n, idx, x, y)
- use psb_base_mod
- implicit none
- integer(psb_ipk_) :: i, n
- class(psb_i_base_vect_type):: idx
- real(psb_dpk_) :: y(:)
- class(psb_d_vect_oacc):: x
- integer(psb_ipk_) :: info
-
- info = 0
-
- select type(ii => idx)
- class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space(info)
- class default
- call psb_errpush(info, 'd_oacc_gthzv_x')
- return
- end select
-
- if (x%is_host()) call x%sync_space()
-
- !$acc parallel loop
- do i = 1, n
- y(i) = x%v(idx%v(i))
- end do
- end subroutine d_oacc_gthzv_x
-
- subroutine d_oacc_ins_v(n, irl, val, dupl, x, info)
- use psi_serial_mod
- implicit none
- class(psb_d_vect_oacc), intent(inout) :: x
- integer(psb_ipk_), intent(in) :: n, dupl
- class(psb_i_base_vect_type), intent(inout) :: irl
- class(psb_d_base_vect_type), intent(inout) :: val
- integer(psb_ipk_), intent(out) :: info
-
- integer(psb_ipk_) :: i, isz
- logical :: done_oacc
-
- info = 0
- if (psb_errstatus_fatal()) return
-
- done_oacc = .false.
- select type(virl => irl)
- type is (psb_i_vect_oacc)
- select type(vval => val)
- type is (psb_d_vect_oacc)
- if (vval%is_host()) call vval%sync_space()
- if (virl%is_host()) call virl%sync_space(info)
- if (x%is_host()) call x%sync_space()
- !$acc parallel loop
- do i = 1, n
- x%v(virl%v(i)) = vval%v(i)
- end do
- call x%set_dev()
- done_oacc = .true.
- end select
- end select
-
- if (.not.done_oacc) then
- select type(virl => irl)
- type is (psb_i_vect_oacc)
- if (virl%is_dev()) call virl%sync_space(info)
- end select
- select type(vval => val)
- type is (psb_d_vect_oacc)
- if (vval%is_dev()) call vval%sync_space()
- end select
- call x%ins(n, irl%v, val%v, dupl, info)
- end if
-
- if (info /= 0) then
- call psb_errpush(info, 'oacc_vect_ins')
- return
- end if
-
- end subroutine d_oacc_ins_v
-
-
-
- subroutine d_oacc_ins_a(n, irl, val, dupl, x, info)
- use psi_serial_mod
- implicit none
- class(psb_d_vect_oacc), intent(inout) :: x
- integer(psb_ipk_), intent(in) :: n, dupl
- integer(psb_ipk_), intent(in) :: irl(:)
- real(psb_dpk_), intent(in) :: val(:)
- integer(psb_ipk_), intent(out) :: info
-
- integer(psb_ipk_) :: i
-
- info = 0
- if (x%is_dev()) call x%sync_space()
- call x%psb_d_base_vect_type%ins(n, irl, val, dupl, info)
- call x%set_host()
- !$acc update device(x%v)
-
- end subroutine d_oacc_ins_a
-
-
-
- subroutine d_oacc_bld_mn(x, n)
- use psb_base_mod
- implicit none
- integer(psb_mpk_), intent(in) :: n
- class(psb_d_vect_oacc), intent(inout) :: x
- integer(psb_ipk_) :: info
-
- call x%all(n, info)
- if (info /= 0) then
- call psb_errpush(info, 'd_oacc_bld_mn', i_err=(/n, n, n, n, n/))
- end if
- call x%set_host()
- !$acc update device(x%v)
-
- end subroutine d_oacc_bld_mn
-
-
- subroutine d_oacc_bld_x(x, this)
- use psb_base_mod
- implicit none
- real(psb_dpk_), intent(in) :: this(:)
- class(psb_d_vect_oacc), intent(inout) :: x
- integer(psb_ipk_) :: info
-
- call psb_realloc(size(this), x%v, info)
- if (info /= 0) then
- info = psb_err_alloc_request_
- call psb_errpush(info, 'd_oacc_bld_x', &
- i_err=(/size(this), izero, izero, izero, izero/))
- return
- end if
-
- x%v(:) = this(:)
- call x%set_host()
- !$acc update device(x%v)
-
- end subroutine d_oacc_bld_x
-
-
- subroutine d_oacc_asb_m(n, x, info)
- use psb_base_mod
- implicit none
- integer(psb_mpk_), intent(in) :: n
- class(psb_d_vect_oacc), intent(inout) :: x
- integer(psb_ipk_), intent(out) :: info
- integer(psb_mpk_) :: nd
-
- info = psb_success_
-
- if (x%is_dev()) then
- nd = size(x%v)
- if (nd < n) then
- call x%sync()
- call x%psb_d_base_vect_type%asb(n, info)
- if (info == psb_success_) call x%sync_space()
- call x%set_host()
- end if
- else
- if (size(x%v) < n) then
- call x%psb_d_base_vect_type%asb(n, info)
- if (info == psb_success_) call x%sync_space()
- call x%set_host()
- end if
- end if
- end subroutine d_oacc_asb_m
-
-
-
- subroutine d_oacc_set_scal(x, val, first, last)
- class(psb_d_vect_oacc), intent(inout) :: x
- real(psb_dpk_), intent(in) :: val
- integer(psb_ipk_), optional :: first, last
-
- integer(psb_ipk_) :: first_, last_
- first_ = 1
- last_ = x%get_nrows()
- if (present(first)) first_ = max(1, first)
- if (present(last)) last_ = min(last, last_)
-
- !$acc parallel loop
- do i = first_, last_
- x%v(i) = val
- end do
- !$acc end parallel loop
-
- call x%set_dev()
- end subroutine d_oacc_set_scal
-
-
-
- subroutine d_oacc_zero(x)
- use psi_serial_mod
- implicit none
- class(psb_d_vect_oacc), intent(inout) :: x
- call x%set_dev()
- call x%set_scal(dzero)
- end subroutine d_oacc_zero
-
- function d_oacc_get_nrows(x) result(res)
- implicit none
- class(psb_d_vect_oacc), intent(in) :: x
- integer(psb_ipk_) :: res
-
- if (allocated(x%v)) res = size(x%v)
- end function d_oacc_get_nrows
-
- function d_oacc_get_fmt() result(res)
- implicit none
- character(len=5) :: res
- res = "dOACC"
-
- end function d_oacc_get_fmt
-
- function d_oacc_vect_dot(n, x, y) result(res)
- implicit none
- class(psb_d_vect_oacc), intent(inout) :: x
- class(psb_d_base_vect_type), intent(inout) :: y
- integer(psb_ipk_), intent(in) :: n
- real(psb_dpk_) :: res
- real(psb_dpk_), external :: ddot
- integer(psb_ipk_) :: info
- integer(psb_ipk_) :: i
-
- res = dzero
-
- select type(yy => y)
- type is (psb_d_base_vect_type)
- if (x%is_dev()) call x%sync()
- res = ddot(n, x%v, 1, yy%v, 1)
- type is (psb_d_vect_oacc)
- if (x%is_host()) call x%sync()
- if (yy%is_host()) call yy%sync()
-
- !$acc parallel loop reduction(+:res) present(x%v, yy%v)
- do i = 1, n
- res = res + x%v(i) * yy%v(i)
- end do
- !$acc end parallel loop
-
- class default
- call x%sync()
- res = y%dot(n, x%v)
- end select
-
- end function d_oacc_vect_dot
-
-
-
-
- function d_oacc_dot_a(n, x, y) result(res)
- implicit none
- class(psb_d_vect_oacc), intent(inout) :: x
- real(psb_dpk_), intent(in) :: y(:)
- integer(psb_ipk_), intent(in) :: n
- real(psb_dpk_) :: res
- real(psb_dpk_), external :: ddot
-
- if (x%is_dev()) call x%sync()
- res = ddot(n, y, 1, x%v, 1)
-
- end function d_oacc_dot_a
-
- ! subroutine d_oacc_set_vect(x,y)
- ! implicit none
- ! class(psb_d_vect_oacc), intent(inout) :: x
- ! real(psb_dpk_), intent(in) :: y(:)
- ! integer(psb_ipk_) :: info
-
- ! if (size(x%v) /= size(y)) then
- ! call x%free(info)
- ! call x%all(size(y),info)
- ! end if
- ! x%v(:) = y(:)
- ! call x%set_host()
- ! end subroutine d_oacc_set_vect
-
- subroutine d_oacc_to_dev(v)
- implicit none
- real(psb_dpk_) :: v(:)
- !$acc update device(v)
- end subroutine d_oacc_to_dev
-
- subroutine d_oacc_to_host(v)
- implicit none
- real(psb_dpk_) :: v(:)
- !$acc update self(v)
- end subroutine d_oacc_to_host
-
+!!$ subroutine d_oacc_absval2(x, y)
+!!$ implicit none
+!!$ class(psb_d_vect_oacc), intent(inout) :: x
+!!$ class(psb_d_base_vect_type), intent(inout) :: y
+!!$ integer(psb_ipk_) :: n
+!!$ integer(psb_ipk_) :: i
+!!$
+!!$ n = min(size(x%v), size(y%v))
+!!$ select type (yy => y)
+!!$ class is (psb_d_vect_oacc)
+!!$ if (x%is_host()) call x%sync()
+!!$ if (yy%is_host()) call yy%sync()
+!!$ !$acc parallel loop
+!!$ do i = 1, n
+!!$ yy%v(i) = abs(x%v(i))
+!!$ end do
+!!$ class default
+!!$ if (x%is_dev()) call x%sync()
+!!$ if (y%is_dev()) call y%sync()
+!!$ call x%psb_d_base_vect_type%absval(y)
+!!$ end select
+!!$ end subroutine d_oacc_absval2
+!!$
+!!$ subroutine d_oacc_scal(alpha, x)
+!!$ implicit none
+!!$ class(psb_d_vect_oacc), intent(inout) :: x
+!!$ real(psb_dpk_), intent(in) :: alpha
+!!$ integer(psb_ipk_) :: info
+!!$ integer(psb_ipk_) :: i
+!!$
+!!$ if (x%is_host()) call x%sync_space()
+!!$ !$acc parallel loop
+!!$ do i = 1, size(x%v)
+!!$ x%v(i) = alpha * x%v(i)
+!!$ end do
+!!$ call x%set_dev()
+!!$ end subroutine d_oacc_scal
+!!$
+!!$ function d_oacc_nrm2(n, x) result(res)
+!!$ implicit none
+!!$ class(psb_d_vect_oacc), intent(inout) :: x
+!!$ integer(psb_ipk_), intent(in) :: n
+!!$ real(psb_dpk_) :: res
+!!$ integer(psb_ipk_) :: info
+!!$ real(psb_dpk_) :: sum
+!!$ integer(psb_ipk_) :: i
+!!$
+!!$ if (x%is_host()) call x%sync_space()
+!!$ sum = 0.0
+!!$ !$acc parallel loop reduction(+:sum)
+!!$ do i = 1, n
+!!$ sum = sum + x%v(i) * x%v(i)
+!!$ end do
+!!$ res = sqrt(sum)
+!!$ end function d_oacc_nrm2
+!!$
+!!$ function d_oacc_amax(n, x) result(res)
+!!$ implicit none
+!!$ class(psb_d_vect_oacc), intent(inout) :: x
+!!$ integer(psb_ipk_), intent(in) :: n
+!!$ real(psb_dpk_) :: res
+!!$ integer(psb_ipk_) :: info
+!!$ real(psb_dpk_) :: max_val
+!!$ integer(psb_ipk_) :: i
+!!$
+!!$ if (x%is_host()) call x%sync_space()
+!!$ max_val = -huge(0.0)
+!!$ !$acc parallel loop reduction(max:max_val)
+!!$ do i = 1, n
+!!$ if (x%v(i) > max_val) max_val = x%v(i)
+!!$ end do
+!!$ res = max_val
+!!$ end function d_oacc_amax
+!!$
+!!$ function d_oacc_asum(n, x) result(res)
+!!$ implicit none
+!!$ class(psb_d_vect_oacc), intent(inout) :: x
+!!$ integer(psb_ipk_), intent(in) :: n
+!!$ real(psb_dpk_) :: res
+!!$ integer(psb_ipk_) :: info
+!!$ real(psb_dpk_) :: sum
+!!$ integer(psb_ipk_) :: i
+!!$
+!!$ if (x%is_host()) call x%sync_space()
+!!$ sum = 0.0
+!!$ !$acc parallel loop reduction(+:sum)
+!!$ do i = 1, n
+!!$ sum = sum + abs(x%v(i))
+!!$ end do
+!!$ res = sum
+!!$ end function d_oacc_asum
+!!$
+!!$
+!!$ subroutine d_oacc_mlt_v(x, y, info)
+!!$ use psi_serial_mod
+!!$ implicit none
+!!$ class(psb_d_base_vect_type), intent(inout) :: x
+!!$ class(psb_d_vect_oacc), intent(inout) :: y
+!!$ integer(psb_ipk_), intent(out) :: info
+!!$
+!!$ integer(psb_ipk_) :: i, n
+!!$
+!!$ info = 0
+!!$ n = min(x%get_nrows(), y%get_nrows())
+!!$ select type(xx => x)
+!!$ type is (psb_d_base_vect_type)
+!!$ if (y%is_dev()) call y%sync()
+!!$ !$acc parallel loop
+!!$ do i = 1, n
+!!$ y%v(i) = y%v(i) * xx%v(i)
+!!$ end do
+!!$ call y%set_host()
+!!$ class default
+!!$ if (xx%is_dev()) call xx%sync()
+!!$ if (y%is_dev()) call y%sync()
+!!$ !$acc parallel loop
+!!$ do i = 1, n
+!!$ y%v(i) = y%v(i) * xx%v(i)
+!!$ end do
+!!$ call y%set_host()
+!!$ end select
+!!$ end subroutine d_oacc_mlt_v
+!!$
+!!$ subroutine d_oacc_mlt_a(x, y, info)
+!!$ use psi_serial_mod
+!!$ implicit none
+!!$ real(psb_dpk_), intent(in) :: x(:)
+!!$ class(psb_d_vect_oacc), intent(inout) :: y
+!!$ integer(psb_ipk_), intent(out) :: info
+!!$ integer(psb_ipk_) :: i, n
+!!$
+!!$ info = 0
+!!$ if (y%is_dev()) call y%sync_space()
+!!$ !$acc parallel loop
+!!$ do i = 1, size(x)
+!!$ y%v(i) = y%v(i) * x(i)
+!!$ end do
+!!$ call y%set_host()
+!!$ end subroutine d_oacc_mlt_a
+!!$
+!!$ subroutine d_oacc_mlt_a_2(alpha, x, y, beta, z, info)
+!!$ use psi_serial_mod
+!!$ implicit none
+!!$ real(psb_dpk_), intent(in) :: alpha, beta
+!!$ real(psb_dpk_), intent(in) :: x(:)
+!!$ real(psb_dpk_), intent(in) :: y(:)
+!!$ class(psb_d_vect_oacc), intent(inout) :: z
+!!$ integer(psb_ipk_), intent(out) :: info
+!!$ integer(psb_ipk_) :: i, n
+!!$
+!!$ info = 0
+!!$ if (z%is_dev()) call z%sync_space()
+!!$ !$acc parallel loop
+!!$ do i = 1, size(x)
+!!$ z%v(i) = alpha * x(i) * y(i) + beta * z%v(i)
+!!$ end do
+!!$ call z%set_host()
+!!$ end subroutine d_oacc_mlt_a_2
+!!$
+!!$ subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
+!!$ use psi_serial_mod
+!!$ use psb_string_mod
+!!$ implicit none
+!!$ real(psb_dpk_), intent(in) :: alpha, beta
+!!$ class(psb_d_base_vect_type), intent(inout) :: x
+!!$ class(psb_d_base_vect_type), intent(inout) :: y
+!!$ class(psb_d_vect_oacc), intent(inout) :: z
+!!$ integer(psb_ipk_), intent(out) :: info
+!!$ character(len=1), intent(in), optional :: conjgx, conjgy
+!!$ integer(psb_ipk_) :: i, n
+!!$ logical :: conjgx_, conjgy_
+!!$
+!!$ conjgx_ = .false.
+!!$ conjgy_ = .false.
+!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C')
+!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C')
+!!$
+!!$ n = min(x%get_nrows(), y%get_nrows(), z%get_nrows())
+!!$
+!!$ info = 0
+!!$ select type(xx => x)
+!!$ class is (psb_d_vect_oacc)
+!!$ select type (yy => y)
+!!$ class is (psb_d_vect_oacc)
+!!$ if (xx%is_host()) call xx%sync_space()
+!!$ if (yy%is_host()) call yy%sync_space()
+!!$ if ((beta /= dzero) .and. (z%is_host())) call z%sync_space()
+!!$ !$acc parallel loop
+!!$ do i = 1, n
+!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
+!!$ end do
+!!$ call z%set_dev()
+!!$ class default
+!!$ if (xx%is_dev()) call xx%sync_space()
+!!$ if (yy%is_dev()) call yy%sync()
+!!$ if ((beta /= dzero) .and. (z%is_dev())) call z%sync_space()
+!!$ !$acc parallel loop
+!!$ do i = 1, n
+!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
+!!$ end do
+!!$ call z%set_host()
+!!$ end select
+!!$ class default
+!!$ if (x%is_dev()) call x%sync()
+!!$ if (y%is_dev()) call y%sync()
+!!$ if ((beta /= dzero) .and. (z%is_dev())) call z%sync_space()
+!!$ !$acc parallel loop
+!!$ do i = 1, n
+!!$ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i)
+!!$ end do
+!!$ call z%set_host()
+!!$ end select
+!!$ end subroutine d_oacc_mlt_v_2
+!!$
+!!$
+!!$ subroutine d_oacc_axpby_v(m, alpha, x, beta, y, info)
+!!$ use psi_serial_mod
+!!$ implicit none
+!!$ integer(psb_ipk_), intent(in) :: m
+!!$ class(psb_d_base_vect_type), intent(inout) :: x
+!!$ class(psb_d_vect_oacc), intent(inout) :: y
+!!$ real(psb_dpk_), intent(in) :: alpha, beta
+!!$ integer(psb_ipk_), intent(out) :: info
+!!$ integer(psb_ipk_) :: nx, ny, i
+!!$
+!!$ info = psb_success_
+!!$
+!!$ select type(xx => x)
+!!$ type is (psb_d_vect_oacc)
+!!$ if ((beta /= dzero) .and. y%is_host()) call y%sync_space()
+!!$ if (xx%is_host()) call xx%sync_space()
+!!$ nx = size(xx%v)
+!!$ ny = size(y%v)
+!!$ if ((nx < m) .or. (ny < m)) then
+!!$ info = psb_err_internal_error_
+!!$ else
+!!$ !$acc parallel loop
+!!$ do i = 1, m
+!!$ y%v(i) = alpha * xx%v(i) + beta * y%v(i)
+!!$ end do
+!!$ end if
+!!$ call y%set_dev()
+!!$ class default
+!!$ if ((alpha /= dzero) .and. (x%is_dev())) call x%sync()
+!!$ call y%axpby(m, alpha, x%v, beta, info)
+!!$ end select
+!!$ end subroutine d_oacc_axpby_v
+!!$
+!!$ subroutine d_oacc_axpby_a(m, alpha, x, beta, y, info)
+!!$ use psi_serial_mod
+!!$ implicit none
+!!$ integer(psb_ipk_), intent(in) :: m
+!!$ real(psb_dpk_), intent(in) :: x(:)
+!!$ class(psb_d_vect_oacc), intent(inout) :: y
+!!$ real(psb_dpk_), intent(in) :: alpha, beta
+!!$ integer(psb_ipk_), intent(out) :: info
+!!$ integer(psb_ipk_) :: i
+!!$
+!!$ if ((beta /= dzero) .and. (y%is_dev())) call y%sync_space()
+!!$ !$acc parallel loop
+!!$ do i = 1, m
+!!$ y%v(i) = alpha * x(i) + beta * y%v(i)
+!!$ end do
+!!$ call y%set_host()
+!!$ end subroutine d_oacc_axpby_a
+!!$
+!!$ subroutine d_oacc_abgdxyz(m, alpha, beta, gamma, delta, x, y, z, info)
+!!$ use psi_serial_mod
+!!$ implicit none
+!!$ integer(psb_ipk_), intent(in) :: m
+!!$ class(psb_d_base_vect_type), intent(inout) :: x
+!!$ class(psb_d_base_vect_type), intent(inout) :: y
+!!$ class(psb_d_vect_oacc), intent(inout) :: z
+!!$ real(psb_dpk_), intent(in) :: alpha, beta, gamma, delta
+!!$ integer(psb_ipk_), intent(out) :: info
+!!$ integer(psb_ipk_) :: nx, ny, nz, i
+!!$ logical :: gpu_done
+!!$
+!!$ info = psb_success_
+!!$ gpu_done = .false.
+!!$
+!!$ select type(xx => x)
+!!$ class is (psb_d_vect_oacc)
+!!$ select type(yy => y)
+!!$ class is (psb_d_vect_oacc)
+!!$ select type(zz => z)
+!!$ class is (psb_d_vect_oacc)
+!!$ if ((beta /= dzero) .and. yy%is_host()) call yy%sync_space()
+!!$ if ((delta /= dzero) .and. zz%is_host()) call zz%sync_space()
+!!$ if (xx%is_host()) call xx%sync_space()
+!!$ nx = size(xx%v)
+!!$ ny = size(yy%v)
+!!$ nz = size(zz%v)
+!!$ if ((nx < m) .or. (ny < m) .or. (nz < m)) then
+!!$ info = psb_err_internal_error_
+!!$ else
+!!$ !$acc parallel loop
+!!$ do i = 1, m
+!!$ yy%v(i) = alpha * xx%v(i) + beta * yy%v(i)
+!!$ zz%v(i) = gamma * yy%v(i) + delta * zz%v(i)
+!!$ end do
+!!$ end if
+!!$ call yy%set_dev()
+!!$ call zz%set_dev()
+!!$ gpu_done = .true.
+!!$ end select
+!!$ end select
+!!$ end select
+!!$
+!!$ if (.not. gpu_done) then
+!!$ if (x%is_host()) call x%sync()
+!!$ if (y%is_host()) call y%sync()
+!!$ if (z%is_host()) call z%sync()
+!!$ call y%axpby(m, alpha, x, beta, info)
+!!$ call z%axpby(m, gamma, y, delta, info)
+!!$ end if
+!!$ end subroutine d_oacc_abgdxyz
+!!$
+!!$
+!!$ subroutine d_oacc_sctb_buf(i, n, idx, beta, y)
+!!$ use psb_base_mod
+!!$ implicit none
+!!$ integer(psb_ipk_) :: i, n
+!!$ class(psb_i_base_vect_type) :: idx
+!!$ real(psb_dpk_) :: beta
+!!$ class(psb_d_vect_oacc) :: y
+!!$ integer(psb_ipk_) :: info
+!!$
+!!$ if (.not.allocated(y%combuf)) then
+!!$ call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
+!!$ return
+!!$ end if
+!!$
+!!$ select type(ii => idx)
+!!$ class is (psb_i_vect_oacc)
+!!$ if (ii%is_host()) call ii%sync_space(info)
+!!$ if (y%is_host()) call y%sync_space()
+!!$
+!!$ !$acc parallel loop
+!!$ do i = 1, n
+!!$ y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i)
+!!$ end do
+!!$
+!!$ class default
+!!$ !$acc parallel loop
+!!$ do i = 1, n
+!!$ y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i)
+!!$ end do
+!!$ end select
+!!$ end subroutine d_oacc_sctb_buf
+!!$
+!!$ subroutine d_oacc_sctb_x(i, n, idx, x, beta, y)
+!!$ use psb_base_mod
+!!$ implicit none
+!!$ integer(psb_ipk_):: i, n
+!!$ class(psb_i_base_vect_type) :: idx
+!!$ real(psb_dpk_) :: beta, x(:)
+!!$ class(psb_d_vect_oacc) :: y
+!!$ integer(psb_ipk_) :: info, ni
+!!$
+!!$ select type(ii => idx)
+!!$ class is (psb_i_vect_oacc)
+!!$ if (ii%is_host()) call ii%sync_space(info)
+!!$ class default
+!!$ call psb_errpush(info, 'd_oacc_sctb_x')
+!!$ return
+!!$ end select
+!!$
+!!$ if (y%is_host()) call y%sync_space()
+!!$
+!!$ !$acc parallel loop
+!!$ do i = 1, n
+!!$ y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i)
+!!$ end do
+!!$
+!!$ call y%set_dev()
+!!$ end subroutine d_oacc_sctb_x
+!!$
+!!$
+!!$
+!!$ subroutine d_oacc_sctb(n, idx, x, beta, y)
+!!$ use psb_base_mod
+!!$ implicit none
+!!$ integer(psb_ipk_) :: n
+!!$ integer(psb_ipk_) :: idx(:)
+!!$ real(psb_dpk_) :: beta, x(:)
+!!$ class(psb_d_vect_oacc) :: y
+!!$ integer(psb_ipk_) :: info
+!!$ integer(psb_ipk_) :: i
+!!$
+!!$ if (n == 0) return
+!!$ if (y%is_dev()) call y%sync_space()
+!!$
+!!$ !$acc parallel loop
+!!$ do i = 1, n
+!!$ y%v(idx(i)) = beta * y%v(idx(i)) + x(i)
+!!$ end do
+!!$
+!!$ call y%set_host()
+!!$ end subroutine d_oacc_sctb
+!!$
+!!$
+!!$ subroutine d_oacc_gthzbuf(i, n, idx, x)
+!!$ use psb_base_mod
+!!$ implicit none
+!!$ integer(psb_ipk_) :: i, n
+!!$ class(psb_i_base_vect_type) :: idx
+!!$ class(psb_d_vect_oacc) :: x
+!!$ integer(psb_ipk_) :: info
+!!$
+!!$ info = 0
+!!$ if (.not.allocated(x%combuf)) then
+!!$ call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
+!!$ return
+!!$ end if
+!!$
+!!$ select type(ii => idx)
+!!$ class is (psb_i_vect_oacc)
+!!$ if (ii%is_host()) call ii%sync_space(info)
+!!$ class default
+!!$ call psb_errpush(info, 'd_oacc_gthzbuf')
+!!$ return
+!!$ end select
+!!$
+!!$ if (x%is_host()) call x%sync_space()
+!!$
+!!$ !$acc parallel loop
+!!$ do i = 1, n
+!!$ x%combuf(i) = x%v(idx%v(i))
+!!$ end do
+!!$ end subroutine d_oacc_gthzbuf
+!!$
+!!$ subroutine d_oacc_gthzv_x(i, n, idx, x, y)
+!!$ use psb_base_mod
+!!$ implicit none
+!!$ integer(psb_ipk_) :: i, n
+!!$ class(psb_i_base_vect_type):: idx
+!!$ real(psb_dpk_) :: y(:)
+!!$ class(psb_d_vect_oacc):: x
+!!$ integer(psb_ipk_) :: info
+!!$
+!!$ info = 0
+!!$
+!!$ select type(ii => idx)
+!!$ class is (psb_i_vect_oacc)
+!!$ if (ii%is_host()) call ii%sync_space(info)
+!!$ class default
+!!$ call psb_errpush(info, 'd_oacc_gthzv_x')
+!!$ return
+!!$ end select
+!!$
+!!$ if (x%is_host()) call x%sync_space()
+!!$
+!!$ !$acc parallel loop
+!!$ do i = 1, n
+!!$ y(i) = x%v(idx%v(i))
+!!$ end do
+!!$ end subroutine d_oacc_gthzv_x
+!!$
+!!$ subroutine d_oacc_ins_v(n, irl, val, dupl, x, info)
+!!$ use psi_serial_mod
+!!$ implicit none
+!!$ class(psb_d_vect_oacc), intent(inout) :: x
+!!$ integer(psb_ipk_), intent(in) :: n, dupl
+!!$ class(psb_i_base_vect_type), intent(inout) :: irl
+!!$ class(psb_d_base_vect_type), intent(inout) :: val
+!!$ integer(psb_ipk_), intent(out) :: info
+!!$
+!!$ integer(psb_ipk_) :: i, isz
+!!$ logical :: done_oacc
+!!$
+!!$ info = 0
+!!$ if (psb_errstatus_fatal()) return
+!!$
+!!$ done_oacc = .false.
+!!$ select type(virl => irl)
+!!$ type is (psb_i_vect_oacc)
+!!$ select type(vval => val)
+!!$ type is (psb_d_vect_oacc)
+!!$ if (vval%is_host()) call vval%sync_space()
+!!$ if (virl%is_host()) call virl%sync_space(info)
+!!$ if (x%is_host()) call x%sync_space()
+!!$ !$acc parallel loop
+!!$ do i = 1, n
+!!$ x%v(virl%v(i)) = vval%v(i)
+!!$ end do
+!!$ call x%set_dev()
+!!$ done_oacc = .true.
+!!$ end select
+!!$ end select
+!!$
+!!$ if (.not.done_oacc) then
+!!$ select type(virl => irl)
+!!$ type is (psb_i_vect_oacc)
+!!$ if (virl%is_dev()) call virl%sync_space(info)
+!!$ end select
+!!$ select type(vval => val)
+!!$ type is (psb_d_vect_oacc)
+!!$ if (vval%is_dev()) call vval%sync_space()
+!!$ end select
+!!$ call x%ins(n, irl%v, val%v, dupl, info)
+!!$ end if
+!!$
+!!$ if (info /= 0) then
+!!$ call psb_errpush(info, 'oacc_vect_ins')
+!!$ return
+!!$ end if
+!!$
+!!$ end subroutine d_oacc_ins_v
+!!$
+!!$
+!!$
+!!$ subroutine d_oacc_ins_a(n, irl, val, dupl, x, info)
+!!$ use psi_serial_mod
+!!$ implicit none
+!!$ class(psb_d_vect_oacc), intent(inout) :: x
+!!$ integer(psb_ipk_), intent(in) :: n, dupl
+!!$ integer(psb_ipk_), intent(in) :: irl(:)
+!!$ real(psb_dpk_), intent(in) :: val(:)
+!!$ integer(psb_ipk_), intent(out) :: info
+!!$
+!!$ integer(psb_ipk_) :: i
+!!$
+!!$ info = 0
+!!$ if (x%is_dev()) call x%sync_space()
+!!$ call x%psb_d_base_vect_type%ins(n, irl, val, dupl, info)
+!!$ call x%set_host()
+!!$ !$acc update device(x%v)
+!!$
+!!$ end subroutine d_oacc_ins_a
+!!$
+!!$
+!!$
+!!$ subroutine d_oacc_bld_mn(x, n)
+!!$ use psb_base_mod
+!!$ implicit none
+!!$ integer(psb_mpk_), intent(in) :: n
+!!$ class(psb_d_vect_oacc), intent(inout) :: x
+!!$ integer(psb_ipk_) :: info
+!!$
+!!$ call x%all(n, info)
+!!$ if (info /= 0) then
+!!$ call psb_errpush(info, 'd_oacc_bld_mn', i_err=(/n, n, n, n, n/))
+!!$ end if
+!!$ call x%set_host()
+!!$ !$acc update device(x%v)
+!!$
+!!$ end subroutine d_oacc_bld_mn
+!!$
+!!$
+!!$ subroutine d_oacc_bld_x(x, this)
+!!$ use psb_base_mod
+!!$ implicit none
+!!$ real(psb_dpk_), intent(in) :: this(:)
+!!$ class(psb_d_vect_oacc), intent(inout) :: x
+!!$ integer(psb_ipk_) :: info
+!!$
+!!$ call psb_realloc(size(this), x%v, info)
+!!$ if (info /= 0) then
+!!$ info = psb_err_alloc_request_
+!!$ call psb_errpush(info, 'd_oacc_bld_x', &
+!!$ i_err=(/size(this), izero, izero, izero, izero/))
+!!$ return
+!!$ end if
+!!$
+!!$ x%v(:) = this(:)
+!!$ call x%set_host()
+!!$ !$acc update device(x%v)
+!!$
+!!$ end subroutine d_oacc_bld_x
+!!$
+!!$
+!!$ subroutine d_oacc_asb_m(n, x, info)
+!!$ use psb_base_mod
+!!$ implicit none
+!!$ integer(psb_mpk_), intent(in) :: n
+!!$ class(psb_d_vect_oacc), intent(inout) :: x
+!!$ integer(psb_ipk_), intent(out) :: info
+!!$ integer(psb_mpk_) :: nd
+!!$
+!!$ info = psb_success_
+!!$
+!!$ if (x%is_dev()) then
+!!$ nd = size(x%v)
+!!$ if (nd < n) then
+!!$ call x%sync()
+!!$ call x%psb_d_base_vect_type%asb(n, info)
+!!$ if (info == psb_success_) call x%sync_space()
+!!$ call x%set_host()
+!!$ end if
+!!$ else
+!!$ if (size(x%v) < n) then
+!!$ call x%psb_d_base_vect_type%asb(n, info)
+!!$ if (info == psb_success_) call x%sync_space()
+!!$ call x%set_host()
+!!$ end if
+!!$ end if
+!!$ end subroutine d_oacc_asb_m
+!!$
+!!$
+!!$
+!!$ subroutine d_oacc_set_scal(x, val, first, last)
+!!$ class(psb_d_vect_oacc), intent(inout) :: x
+!!$ real(psb_dpk_), intent(in) :: val
+!!$ integer(psb_ipk_), optional :: first, last
+!!$
+!!$ integer(psb_ipk_) :: first_, last_
+!!$ first_ = 1
+!!$ last_ = x%get_nrows()
+!!$ if (present(first)) first_ = max(1, first)
+!!$ if (present(last)) last_ = min(last, last_)
+!!$
+!!$ !$acc parallel loop
+!!$ do i = first_, last_
+!!$ x%v(i) = val
+!!$ end do
+!!$ !$acc end parallel loop
+!!$
+!!$ call x%set_dev()
+!!$ end subroutine d_oacc_set_scal
+!!$
+!!$
+!!$
+!!$ subroutine d_oacc_zero(x)
+!!$ use psi_serial_mod
+!!$ implicit none
+!!$ class(psb_d_vect_oacc), intent(inout) :: x
+!!$ call x%set_dev()
+!!$ call x%set_scal(dzero)
+!!$ end subroutine d_oacc_zero
+!!$
+!!$ function d_oacc_get_nrows(x) result(res)
+!!$ implicit none
+!!$ class(psb_d_vect_oacc), intent(in) :: x
+!!$ integer(psb_ipk_) :: res
+!!$
+!!$ if (allocated(x%v)) res = size(x%v)
+!!$ end function d_oacc_get_nrows
+!!$
+!!$ function d_oacc_get_fmt() result(res)
+!!$ implicit none
+!!$ character(len=5) :: res
+!!$ res = "dOACC"
+!!$
+!!$ end function d_oacc_get_fmt
+!!$
+!!$ function d_oacc_vect_dot(n, x, y) result(res)
+!!$ implicit none
+!!$ class(psb_d_vect_oacc), intent(inout) :: x
+!!$ class(psb_d_base_vect_type), intent(inout) :: y
+!!$ integer(psb_ipk_), intent(in) :: n
+!!$ real(psb_dpk_) :: res
+!!$ real(psb_dpk_), external :: ddot
+!!$ integer(psb_ipk_) :: info
+!!$ integer(psb_ipk_) :: i
+!!$
+!!$ res = dzero
+!!$
+!!$ select type(yy => y)
+!!$ type is (psb_d_base_vect_type)
+!!$ if (x%is_dev()) call x%sync()
+!!$ res = ddot(n, x%v, 1, yy%v, 1)
+!!$ type is (psb_d_vect_oacc)
+!!$ if (x%is_host()) call x%sync()
+!!$ if (yy%is_host()) call yy%sync()
+!!$
+!!$ !$acc parallel loop reduction(+:res) present(x%v, yy%v)
+!!$ do i = 1, n
+!!$ res = res + x%v(i) * yy%v(i)
+!!$ end do
+!!$ !$acc end parallel loop
+!!$
+!!$ class default
+!!$ call x%sync()
+!!$ res = y%dot(n, x%v)
+!!$ end select
+!!$
+!!$ end function d_oacc_vect_dot
+!!$
+!!$
+!!$
+!!$
+!!$ function d_oacc_dot_a(n, x, y) result(res)
+!!$ implicit none
+!!$ class(psb_d_vect_oacc), intent(inout) :: x
+!!$ real(psb_dpk_), intent(in) :: y(:)
+!!$ integer(psb_ipk_), intent(in) :: n
+!!$ real(psb_dpk_) :: res
+!!$ real(psb_dpk_), external :: ddot
+!!$
+!!$ if (x%is_dev()) call x%sync()
+!!$ res = ddot(n, y, 1, x%v, 1)
+!!$
+!!$ end function d_oacc_dot_a
+!!$
+!!$ ! subroutine d_oacc_set_vect(x,y)
+!!$ ! implicit none
+!!$ ! class(psb_d_vect_oacc), intent(inout) :: x
+!!$ ! real(psb_dpk_), intent(in) :: y(:)
+!!$ ! integer(psb_ipk_) :: info
+!!$
+!!$ ! if (size(x%v) /= size(y)) then
+!!$ ! call x%free(info)
+!!$ ! call x%all(size(y),info)
+!!$ ! end if
+!!$ ! x%v(:) = y(:)
+!!$ ! call x%set_host()
+!!$ ! end subroutine d_oacc_set_vect
+!!$
+!!$ subroutine d_oacc_to_dev(v)
+!!$ implicit none
+!!$ real(psb_dpk_) :: v(:)
+!!$ !$acc update device(v)
+!!$ end subroutine d_oacc_to_dev
+!!$
+!!$ subroutine d_oacc_to_host(v)
+!!$ implicit none
+!!$ real(psb_dpk_) :: v(:)
+!!$ !$acc update self(v)
+!!$ end subroutine d_oacc_to_host
+!!$
subroutine d_oacc_sync_space(x)
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
@@ -871,96 +869,96 @@ contains
res = (x%state == is_sync)
end function d_oacc_is_sync
-
- subroutine d_oacc_vect_all(n, x, info)
- use psi_serial_mod
- use psb_realloc_mod
- implicit none
- integer(psb_ipk_), intent(in) :: n
- class(psb_d_vect_oacc), intent(out) :: x
- integer(psb_ipk_), intent(out) :: info
-
- call psb_realloc(n, x%v, info)
- if (info == 0) then
- call x%set_host()
- !$acc enter data create(x%v)
- call x%sync_space()
- end if
- if (info /= 0) then
- info = psb_err_alloc_request_
- call psb_errpush(info, 'd_oacc_all', &
- i_err=(/n, n, n, n, n/))
- end if
- end subroutine d_oacc_vect_all
-
-
- subroutine d_oacc_vect_free(x, info)
- implicit none
- class(psb_d_vect_oacc), intent(inout) :: x
- integer(psb_ipk_), intent(out) :: info
- info = 0
- if (allocated(x%v)) then
- !$acc exit data delete(x%v) finalize
- deallocate(x%v, stat=info)
- end if
-
- end subroutine d_oacc_vect_free
-
- function oacc_get_size(x) result(res)
- implicit none
- class(psb_d_vect_oacc), intent(inout) :: x
- integer(psb_ipk_) :: res
-
- if (x%is_dev()) call x%sync()
- res = size(x%v)
- end function oacc_get_size
-
-
- subroutine initialize(N)
- integer(psb_ipk_) :: N
- integer(psb_ipk_) :: i
- allocate(v1(N),v2(N),p(N))
- !$acc enter data create(v1,v2,p)
- !$acc parallel
- !$acc loop
- do i=1,n
- v1(i) = i
- v2(i) = n+i
- end do
- !$acc end parallel
- end subroutine initialize
- subroutine finalize_dev()
- !$acc exit data delete(v1,v2,p)
- end subroutine finalize_dev
- subroutine finalize_host()
- deallocate(v1,v2,p)
- end subroutine finalize_host
- subroutine to_dev()
- !$acc update device(v1,v2)
- end subroutine to_dev
- subroutine to_host()
- !$acc update self(v1,v2)
- end subroutine to_host
- function d_dot(N) result(res)
- real(kind(1.d0)) :: res
- integer(psb_ipk_) :: i,N
- real(kind(1.d0)) :: t1,t2,t3
- res = 0.0d0
- !$acc parallel
- !$acc loop reduction(+:res)
- do i=1,N
- res = res + v1(i) * v2(i)
- end do
- !$acc end parallel
-
- end function d_dot
- function h_dot(N) result(res)
- integer(psb_ipk_) :: i,N
- real(kind(1.d0)) :: t1,t2,t3,res
- res = 0.0d0
- do i=1,N
- res = res + v1(i) * v2(i)
- end do
- end function h_dot
-
-end module psb_d_oacc_vect_mod
\ No newline at end of file
+!!$
+!!$ subroutine d_oacc_vect_all(n, x, info)
+!!$ use psi_serial_mod
+!!$ use psb_realloc_mod
+!!$ implicit none
+!!$ integer(psb_ipk_), intent(in) :: n
+!!$ class(psb_d_vect_oacc), intent(out) :: x
+!!$ integer(psb_ipk_), intent(out) :: info
+!!$
+!!$ call psb_realloc(n, x%v, info)
+!!$ if (info == 0) then
+!!$ call x%set_host()
+!!$ !$acc enter data create(x%v)
+!!$ call x%sync_space()
+!!$ end if
+!!$ if (info /= 0) then
+!!$ info = psb_err_alloc_request_
+!!$ call psb_errpush(info, 'd_oacc_all', &
+!!$ i_err=(/n, n, n, n, n/))
+!!$ end if
+!!$ end subroutine d_oacc_vect_all
+!!$
+!!$
+!!$ subroutine d_oacc_vect_free(x, info)
+!!$ implicit none
+!!$ class(psb_d_vect_oacc), intent(inout) :: x
+!!$ integer(psb_ipk_), intent(out) :: info
+!!$ info = 0
+!!$ if (allocated(x%v)) then
+!!$ !$acc exit data delete(x%v) finalize
+!!$ deallocate(x%v, stat=info)
+!!$ end if
+!!$
+!!$ end subroutine d_oacc_vect_free
+!!$
+!!$ function oacc_get_size(x) result(res)
+!!$ implicit none
+!!$ class(psb_d_vect_oacc), intent(inout) :: x
+!!$ integer(psb_ipk_) :: res
+!!$
+!!$ if (x%is_dev()) call x%sync()
+!!$ res = size(x%v)
+!!$ end function oacc_get_size
+!!$
+!!$
+!!$ subroutine initialize(N)
+!!$ integer(psb_ipk_) :: N
+!!$ integer(psb_ipk_) :: i
+!!$ allocate(v1(N),v2(N),p(N))
+!!$ !$acc enter data create(v1,v2,p)
+!!$ !$acc parallel
+!!$ !$acc loop
+!!$ do i=1,n
+!!$ v1(i) = i
+!!$ v2(i) = n+i
+!!$ end do
+!!$ !$acc end parallel
+!!$ end subroutine initialize
+!!$ subroutine finalize_dev()
+!!$ !$acc exit data delete(v1,v2,p)
+!!$ end subroutine finalize_dev
+!!$ subroutine finalize_host()
+!!$ deallocate(v1,v2,p)
+!!$ end subroutine finalize_host
+!!$ subroutine to_dev()
+!!$ !$acc update device(v1,v2)
+!!$ end subroutine to_dev
+!!$ subroutine to_host()
+!!$ !$acc update self(v1,v2)
+!!$ end subroutine to_host
+!!$ function d_dot(N) result(res)
+!!$ real(kind(1.d0)) :: res
+!!$ integer(psb_ipk_) :: i,N
+!!$ real(kind(1.d0)) :: t1,t2,t3
+!!$ res = 0.0d0
+!!$ !$acc parallel
+!!$ !$acc loop reduction(+:res)
+!!$ do i=1,N
+!!$ res = res + v1(i) * v2(i)
+!!$ end do
+!!$ !$acc end parallel
+!!$
+!!$ end function d_dot
+!!$ function h_dot(N) result(res)
+!!$ integer(psb_ipk_) :: i,N
+!!$ real(kind(1.d0)) :: t1,t2,t3,res
+!!$ res = 0.0d0
+!!$ do i=1,N
+!!$ res = res + v1(i) * v2(i)
+!!$ end do
+!!$ end function h_dot
+!!$
+end module psb_d_oacc_vect_mod
From e0d7091ecc81f3727ae4f75e170dea82d1948e13 Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Tue, 9 Jul 2024 10:04:32 +0200
Subject: [PATCH 04/86] Makefile in openacc/impl
---
openacc/impl/Makefile | 29 +++++++++++++++++++++++++++++
1 file changed, 29 insertions(+)
create mode 100755 openacc/impl/Makefile
diff --git a/openacc/impl/Makefile b/openacc/impl/Makefile
new file mode 100755
index 00000000..fff48657
--- /dev/null
+++ b/openacc/impl/Makefile
@@ -0,0 +1,29 @@
+include ../../Make.inc
+LIBDIR=../../lib
+INCDIR=../../include
+MODDIR=../../modules
+#
+# Compilers and such
+#
+#CCOPT= -g
+FINCLUDES=$(FMFLAG).. $(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FIFLAG)..
+LIBNAME=libpsb_openacc.a
+
+OBJS= psb_d_oacc_csr_vect_mv.o
+
+objs: $(OBJS)
+
+lib: objs
+ ar cur ../$(LIBNAME) $(OBJS)
+
+clean:
+ /bin/rm -f $(OBJS)
+
+.c.o:
+ $(CC) $(CCOPT) $(CCOPENACC) $(CINCLUDES) $(CDEFINES) -c $< -o $@
+.f90.o:
+ $(FC) $(FCOPT) $(FCOPENACC) $(FINCLUDES) -c $< -o $@
+.F90.o:
+ $(FC) $(FCOPT) $(FCOPENACC) $(FINCLUDES) $(FDEFINES) -c $< -o $@
+.cpp.o:
+ $(CXX) $(CXXOPT) $(CXXOPENACC) $(CXXINCLUDES) $(CXXDEFINES) -c $< -o $@
From 6be998ac66c5a0869f10e58ace3ee2f2aec5cb8e Mon Sep 17 00:00:00 2001
From: tloloum
Date: Tue, 9 Jul 2024 11:22:28 +0200
Subject: [PATCH 05/86] oacc_env_mod
---
openacc/Makefile | 3 ++-
openacc/psb_oacc_env_mod.F90 | 18 ++++++++++++++++++
openacc/psb_oacc_mod.F90 | 2 ++
3 files changed, 22 insertions(+), 1 deletion(-)
create mode 100644 openacc/psb_oacc_env_mod.F90
diff --git a/openacc/Makefile b/openacc/Makefile
index 3590249c..aae571ad 100644
--- a/openacc/Makefile
+++ b/openacc/Makefile
@@ -11,7 +11,7 @@ OFFLOAD=-fopenacc -foffload=nvptx-none="-march=sm_70"
LIBDIR=../lib
INCDIR=../include
MODDIR=../modules
-IMPLDIR=./impl # Adding the impl directory
+IMPLDIR=./impl
# Include and library paths
INCLUDES=-I$(LIBDIR) -I$(INCDIR) -I$(MODDIR) -I$(IMPLDIR)
@@ -20,6 +20,7 @@ LIBS=-L$(LIBDIR) -lpsb_util -lpsb_ext -lpsb_base -lopenblas -lmetis
# Source files
FOBJS= psb_i_oacc_vect_mod.o psb_d_oacc_vect_mod.o \
psb_oacc_mod.o psb_d_oacc_csr_mat_mod.o \
+ psb_oacc_env_mod.o \
impl/psb_d_oacc_csr_vect_mv.o
# Library name
diff --git a/openacc/psb_oacc_env_mod.F90 b/openacc/psb_oacc_env_mod.F90
new file mode 100644
index 00000000..83c9426d
--- /dev/null
+++ b/openacc/psb_oacc_env_mod.F90
@@ -0,0 +1,18 @@
+module psb_oacc_env_mod
+contains
+
+ subroutine psb_oacc_init(ctxt, dev)
+ use psb_penv_mod
+ use psb_const_mod
+ use psb_error_mod
+ type(psb_ctxt_type), intent(in) :: ctxt
+ integer, intent(in), optional :: dev
+
+ end subroutine psb_oacc_init
+
+ subroutine psb_oacc_exit()
+ integer :: res
+
+ end subroutine psb_oacc_exit
+
+end module psb_oacc_env_mod
diff --git a/openacc/psb_oacc_mod.F90 b/openacc/psb_oacc_mod.F90
index ce5e85f9..fe827db8 100644
--- a/openacc/psb_oacc_mod.F90
+++ b/openacc/psb_oacc_mod.F90
@@ -1,6 +1,8 @@
module psb_oacc_mod
use psb_const_mod
+ use psb_oacc_env_mod
+
use psb_d_oacc_vect_mod
use psb_d_oacc_csr_mat_mod
From 1f0e5918270e22d1f3dcf0f4bea0e299192ee2c5 Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Tue, 9 Jul 2024 13:16:36 +0200
Subject: [PATCH 06/86] Reworked oacc_vect_mod. oacc_mlt_v_X generates ICE, to
be fixed
---
openacc/impl/Makefile | 2 +-
openacc/impl/psb_d_oacc_mlt_v.f90 | 31 +
openacc/impl/psb_d_oacc_mlt_v_2.f90 | 55 +
openacc/psb_d_oacc_vect_mod.F90 | 1487 ++++++++++++++-------------
4 files changed, 831 insertions(+), 744 deletions(-)
create mode 100644 openacc/impl/psb_d_oacc_mlt_v.f90
create mode 100644 openacc/impl/psb_d_oacc_mlt_v_2.f90
diff --git a/openacc/impl/Makefile b/openacc/impl/Makefile
index fff48657..7bab6654 100755
--- a/openacc/impl/Makefile
+++ b/openacc/impl/Makefile
@@ -9,7 +9,7 @@ MODDIR=../../modules
FINCLUDES=$(FMFLAG).. $(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FIFLAG)..
LIBNAME=libpsb_openacc.a
-OBJS= psb_d_oacc_csr_vect_mv.o
+OBJS= psb_d_oacc_csr_vect_mv.o psb_d_oacc_mlt_v_2.o psb_d_oacc_mlt_v.o
objs: $(OBJS)
diff --git a/openacc/impl/psb_d_oacc_mlt_v.f90 b/openacc/impl/psb_d_oacc_mlt_v.f90
new file mode 100644
index 00000000..a4eb6660
--- /dev/null
+++ b/openacc/impl/psb_d_oacc_mlt_v.f90
@@ -0,0 +1,31 @@
+
+subroutine d_oacc_mlt_v(x, y, info)
+ use psb_d_oacc_vect_mod, psb_protect_name => d_oacc_mlt_v
+
+ implicit none
+ class(psb_d_base_vect_type), intent(inout) :: x
+ class(psb_d_vect_oacc), intent(inout) :: y
+ integer(psb_ipk_), intent(out) :: info
+
+ integer(psb_ipk_) :: i, n
+
+ info = 0
+!!$ n = min(x%get_nrows(), y%get_nrows())
+!!$ select type(xx => x)
+!!$ type is (psb_d_base_vect_type)
+!!$ if (y%is_dev()) call y%sync()
+!!$ !$acc parallel loop
+!!$ do i = 1, n
+!!$ y%v(i) = y%v(i) * xx%v(i)
+!!$ end do
+!!$ call y%set_host()
+!!$ class default
+!!$ if (xx%is_dev()) call xx%sync()
+!!$ if (y%is_dev()) call y%sync()
+!!$ !$acc parallel loop
+!!$ do i = 1, n
+!!$ y%v(i) = y%v(i) * xx%v(i)
+!!$ end do
+!!$ call y%set_host()
+!!$ end select
+end subroutine d_oacc_mlt_v
diff --git a/openacc/impl/psb_d_oacc_mlt_v_2.f90 b/openacc/impl/psb_d_oacc_mlt_v_2.f90
new file mode 100644
index 00000000..b59b4f56
--- /dev/null
+++ b/openacc/impl/psb_d_oacc_mlt_v_2.f90
@@ -0,0 +1,55 @@
+subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
+ use psb_d_oacc_vect_mod, psb_protect_name => d_oacc_mlt_v_2
+ use psb_string_mod
+ implicit none
+ real(psb_dpk_), intent(in) :: alpha, beta
+ class(psb_d_base_vect_type), intent(inout) :: x
+ class(psb_d_base_vect_type), intent(inout) :: y
+ class(psb_d_vect_oacc), intent(inout) :: z
+ integer(psb_ipk_), intent(out) :: info
+ character(len=1), intent(in), optional :: conjgx, conjgy
+ integer(psb_ipk_) :: i, n
+ logical :: conjgx_, conjgy_
+
+ conjgx_ = .false.
+ conjgy_ = .false.
+ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C')
+ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C')
+
+ n = min(x%get_nrows(), y%get_nrows(), z%get_nrows())
+
+ info = 0
+!!$ select type(xx => x)
+!!$ class is (psb_d_vect_oacc)
+!!$ select type (yy => y)
+!!$ class is (psb_d_vect_oacc)
+!!$ if (xx%is_host()) call xx%sync_space()
+!!$ if (yy%is_host()) call yy%sync_space()
+!!$ if ((beta /= dzero) .and. (z%is_host())) call z%sync_space()
+!!$ !$acc parallel loop
+!!$ do i = 1, n
+!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
+!!$ end do
+!!$ call z%set_dev()
+!!$ class default
+!!$ if (xx%is_dev()) call xx%sync_space()
+!!$ if (yy%is_dev()) call yy%sync()
+!!$ if ((beta /= dzero) .and. (z%is_dev())) call z%sync_space()
+!!$ !$acc parallel loop
+!!$ do i = 1, n
+!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
+!!$ end do
+!!$ call z%set_host()
+!!$ end select
+!!$ class default
+!!$ if (x%is_dev()) call x%sync()
+!!$ if (y%is_dev()) call y%sync()
+!!$ if ((beta /= dzero) .and. (z%is_dev())) call z%sync_space()
+!!$ !$acc parallel loop
+!!$ do i = 1, n
+!!$ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i)
+!!$ end do
+!!$ call z%set_host()
+!!$ end select
+end subroutine d_oacc_mlt_v_2
+
diff --git a/openacc/psb_d_oacc_vect_mod.F90 b/openacc/psb_d_oacc_vect_mod.F90
index eda804ce..3139f9b8 100644
--- a/openacc/psb_d_oacc_vect_mod.F90
+++ b/openacc/psb_d_oacc_vect_mod.F90
@@ -5,63 +5,87 @@ module psb_d_oacc_vect_mod
use psb_d_vect_mod
use psb_i_vect_mod
use psb_i_oacc_vect_mod
-
+
integer(psb_ipk_), parameter, private :: is_host = -1
integer(psb_ipk_), parameter, private :: is_sync = 0
integer(psb_ipk_), parameter, private :: is_dev = 1
-
+
type, extends(psb_d_base_vect_type) :: psb_d_vect_oacc
integer :: state = is_host
contains
-!!$ procedure, pass(x) :: get_nrows => d_oacc_get_nrows
-!!$ procedure, nopass :: get_fmt => d_oacc_get_fmt
-!!$
-!!$ procedure, pass(x) :: all => d_oacc_vect_all
-!!$ procedure, pass(x) :: zero => d_oacc_zero
-!!$ procedure, pass(x) :: asb_m => d_oacc_asb_m
+ procedure, pass(x) :: get_nrows => d_oacc_get_nrows
+ procedure, nopass :: get_fmt => d_oacc_get_fmt
+
+ procedure, pass(x) :: all => d_oacc_vect_all
+ procedure, pass(x) :: zero => d_oacc_zero
+ procedure, pass(x) :: asb_m => d_oacc_asb_m
procedure, pass(x) :: sync => d_oacc_sync
procedure, pass(x) :: sync_space => d_oacc_sync_space
-!!$ procedure, pass(x) :: bld_x => d_oacc_bld_x
-!!$ procedure, pass(x) :: bld_mn => d_oacc_bld_mn
-!!$ procedure, pass(x) :: free => d_oacc_vect_free
-!!$ procedure, pass(x) :: ins_a => d_oacc_ins_a
-!!$ procedure, pass(x) :: ins_v => d_oacc_ins_v
+ procedure, pass(x) :: bld_x => d_oacc_bld_x
+ procedure, pass(x) :: bld_mn => d_oacc_bld_mn
+ procedure, pass(x) :: free => d_oacc_vect_free
+ procedure, pass(x) :: ins_a => d_oacc_ins_a
+ procedure, pass(x) :: ins_v => d_oacc_ins_v
procedure, pass(x) :: is_host => d_oacc_is_host
procedure, pass(x) :: is_dev => d_oacc_is_dev
procedure, pass(x) :: is_sync => d_oacc_is_sync
procedure, pass(x) :: set_host => d_oacc_set_host
procedure, pass(x) :: set_dev => d_oacc_set_dev
procedure, pass(x) :: set_sync => d_oacc_set_sync
-!!$ procedure, pass(x) :: set_scal => d_oacc_set_scal
-!!$
-!!$ procedure, pass(x) :: gthzv_x => d_oacc_gthzv_x
-!!$ procedure, pass(x) :: gthzbuf_x => d_oacc_gthzbuf
-!!$ procedure, pass(y) :: sctb => d_oacc_sctb
-!!$ procedure, pass(y) :: sctb_x => d_oacc_sctb_x
-!!$ procedure, pass(y) :: sctb_buf => d_oacc_sctb_buf
-!!$
-!!$ procedure, pass(x) :: get_size => oacc_get_size
-!!$ procedure, pass(x) :: dot_v => d_oacc_vect_dot
-!!$ procedure, pass(x) :: dot_a => d_oacc_dot_a
-!!$ procedure, pass(y) :: axpby_v => d_oacc_axpby_v
-!!$ procedure, pass(y) :: axpby_a => d_oacc_axpby_a
-!!$ procedure, pass(z) :: abgdxyz => d_oacc_abgdxyz
-!!$ procedure, pass(y) :: mlt_v => d_oacc_mlt_v
-!!$ procedure, pass(y) :: mlt_a => d_oacc_mlt_a
-!!$ procedure, pass(z) :: mlt_a_2 => d_oacc_mlt_a_2
-!!$ procedure, pass(z) :: mlt_v_2 => d_oacc_mlt_v_2
-!!$ procedure, pass(x) :: scal => d_oacc_scal
-!!$ procedure, pass(x) :: nrm2 => d_oacc_nrm2
-!!$ procedure, pass(x) :: amax => d_oacc_amax
-!!$ procedure, pass(x) :: asum => d_oacc_asum
+ procedure, pass(x) :: set_scal => d_oacc_set_scal
+
+ procedure, pass(x) :: gthzv_x => d_oacc_gthzv_x
+ procedure, pass(x) :: gthzbuf_x => d_oacc_gthzbuf
+ procedure, pass(y) :: sctb => d_oacc_sctb
+ procedure, pass(y) :: sctb_x => d_oacc_sctb_x
+ procedure, pass(y) :: sctb_buf => d_oacc_sctb_buf
+
+ procedure, pass(x) :: get_size => oacc_get_size
+ procedure, pass(x) :: dot_v => d_oacc_vect_dot
+ procedure, pass(x) :: dot_a => d_oacc_dot_a
+ procedure, pass(y) :: axpby_v => d_oacc_axpby_v
+ procedure, pass(y) :: axpby_a => d_oacc_axpby_a
+ procedure, pass(z) :: abgdxyz => d_oacc_abgdxyz
+ procedure, pass(y) :: mlt_a => d_oacc_mlt_a
+ procedure, pass(z) :: mlt_a_2 => d_oacc_mlt_a_2
+ procedure, pass(y) :: mlt_v => d_oacc_mlt_v
+ procedure, pass(z) :: mlt_v_2 => d_oacc_mlt_v_2
+ procedure, pass(x) :: scal => d_oacc_scal
+ procedure, pass(x) :: nrm2 => d_oacc_nrm2
+ procedure, pass(x) :: amax => d_oacc_amax
+ procedure, pass(x) :: asum => d_oacc_asum
procedure, pass(x) :: absval1 => d_oacc_absval1
-!!$ procedure, pass(x) :: absval2 => d_oacc_absval2
+ procedure, pass(x) :: absval2 => d_oacc_absval2
end type psb_d_vect_oacc
real(psb_dpk_), allocatable :: v1(:),v2(:),p(:)
+ interface
+ subroutine d_oacc_mlt_v(x, y, info)
+ import
+ implicit none
+ class(psb_d_base_vect_type), intent(inout) :: x
+ class(psb_d_vect_oacc), intent(inout) :: y
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine d_oacc_mlt_v
+ end interface
+
+
+ interface
+ subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
+ import
+ implicit none
+ real(psb_dpk_), intent(in) :: alpha, beta
+ class(psb_d_base_vect_type), intent(inout) :: x
+ class(psb_d_base_vect_type), intent(inout) :: y
+ class(psb_d_vect_oacc), intent(inout) :: z
+ integer(psb_ipk_), intent(out) :: info
+ character(len=1), intent(in), optional :: conjgx, conjgy
+ end subroutine d_oacc_mlt_v_2
+ end interface
+
contains
subroutine d_oacc_absval1(x)
@@ -73,106 +97,140 @@ contains
n = size(x%v)
!$acc parallel loop
do i = 1, n
- x%v(i) = abs(x%v(i))
+ x%v(i) = abs(x%v(i))
end do
call x%set_dev()
end subroutine d_oacc_absval1
-!!$ subroutine d_oacc_absval2(x, y)
-!!$ implicit none
-!!$ class(psb_d_vect_oacc), intent(inout) :: x
-!!$ class(psb_d_base_vect_type), intent(inout) :: y
-!!$ integer(psb_ipk_) :: n
-!!$ integer(psb_ipk_) :: i
-!!$
-!!$ n = min(size(x%v), size(y%v))
-!!$ select type (yy => y)
-!!$ class is (psb_d_vect_oacc)
-!!$ if (x%is_host()) call x%sync()
-!!$ if (yy%is_host()) call yy%sync()
-!!$ !$acc parallel loop
-!!$ do i = 1, n
-!!$ yy%v(i) = abs(x%v(i))
-!!$ end do
-!!$ class default
-!!$ if (x%is_dev()) call x%sync()
-!!$ if (y%is_dev()) call y%sync()
-!!$ call x%psb_d_base_vect_type%absval(y)
-!!$ end select
-!!$ end subroutine d_oacc_absval2
-!!$
-!!$ subroutine d_oacc_scal(alpha, x)
-!!$ implicit none
-!!$ class(psb_d_vect_oacc), intent(inout) :: x
-!!$ real(psb_dpk_), intent(in) :: alpha
-!!$ integer(psb_ipk_) :: info
-!!$ integer(psb_ipk_) :: i
-!!$
-!!$ if (x%is_host()) call x%sync_space()
-!!$ !$acc parallel loop
-!!$ do i = 1, size(x%v)
-!!$ x%v(i) = alpha * x%v(i)
-!!$ end do
-!!$ call x%set_dev()
-!!$ end subroutine d_oacc_scal
-!!$
-!!$ function d_oacc_nrm2(n, x) result(res)
-!!$ implicit none
-!!$ class(psb_d_vect_oacc), intent(inout) :: x
-!!$ integer(psb_ipk_), intent(in) :: n
-!!$ real(psb_dpk_) :: res
-!!$ integer(psb_ipk_) :: info
-!!$ real(psb_dpk_) :: sum
-!!$ integer(psb_ipk_) :: i
-!!$
-!!$ if (x%is_host()) call x%sync_space()
-!!$ sum = 0.0
-!!$ !$acc parallel loop reduction(+:sum)
-!!$ do i = 1, n
-!!$ sum = sum + x%v(i) * x%v(i)
-!!$ end do
-!!$ res = sqrt(sum)
-!!$ end function d_oacc_nrm2
-!!$
-!!$ function d_oacc_amax(n, x) result(res)
-!!$ implicit none
-!!$ class(psb_d_vect_oacc), intent(inout) :: x
-!!$ integer(psb_ipk_), intent(in) :: n
-!!$ real(psb_dpk_) :: res
-!!$ integer(psb_ipk_) :: info
-!!$ real(psb_dpk_) :: max_val
-!!$ integer(psb_ipk_) :: i
-!!$
-!!$ if (x%is_host()) call x%sync_space()
-!!$ max_val = -huge(0.0)
-!!$ !$acc parallel loop reduction(max:max_val)
-!!$ do i = 1, n
-!!$ if (x%v(i) > max_val) max_val = x%v(i)
-!!$ end do
-!!$ res = max_val
-!!$ end function d_oacc_amax
-!!$
-!!$ function d_oacc_asum(n, x) result(res)
-!!$ implicit none
-!!$ class(psb_d_vect_oacc), intent(inout) :: x
-!!$ integer(psb_ipk_), intent(in) :: n
-!!$ real(psb_dpk_) :: res
-!!$ integer(psb_ipk_) :: info
-!!$ real(psb_dpk_) :: sum
-!!$ integer(psb_ipk_) :: i
-!!$
-!!$ if (x%is_host()) call x%sync_space()
-!!$ sum = 0.0
-!!$ !$acc parallel loop reduction(+:sum)
-!!$ do i = 1, n
-!!$ sum = sum + abs(x%v(i))
-!!$ end do
-!!$ res = sum
-!!$ end function d_oacc_asum
-!!$
-!!$
+ subroutine d_oacc_absval2(x, y)
+ implicit none
+ class(psb_d_vect_oacc), intent(inout) :: x
+ class(psb_d_base_vect_type), intent(inout) :: y
+ integer(psb_ipk_) :: n
+ integer(psb_ipk_) :: i
+
+ n = min(size(x%v), size(y%v))
+ select type (yy => y)
+ class is (psb_d_vect_oacc)
+ if (x%is_host()) call x%sync()
+ if (yy%is_host()) call yy%sync()
+ !$acc parallel loop
+ do i = 1, n
+ yy%v(i) = abs(x%v(i))
+ end do
+ class default
+ if (x%is_dev()) call x%sync()
+ if (y%is_dev()) call y%sync()
+ call x%psb_d_base_vect_type%absval(y)
+ end select
+ end subroutine d_oacc_absval2
+
+ subroutine d_oacc_scal(alpha, x)
+ implicit none
+ class(psb_d_vect_oacc), intent(inout) :: x
+ real(psb_dpk_), intent(in) :: alpha
+ integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: i
+
+ if (x%is_host()) call x%sync_space()
+ !$acc parallel loop
+ do i = 1, size(x%v)
+ x%v(i) = alpha * x%v(i)
+ end do
+ call x%set_dev()
+ end subroutine d_oacc_scal
+
+ function d_oacc_nrm2(n, x) result(res)
+ implicit none
+ class(psb_d_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(in) :: n
+ real(psb_dpk_) :: res
+ integer(psb_ipk_) :: info
+ real(psb_dpk_) :: sum
+ integer(psb_ipk_) :: i
+
+ if (x%is_host()) call x%sync_space()
+ sum = 0.0
+ !$acc parallel loop reduction(+:sum)
+ do i = 1, n
+ sum = sum + x%v(i) * x%v(i)
+ end do
+ res = sqrt(sum)
+ end function d_oacc_nrm2
+
+ function d_oacc_amax(n, x) result(res)
+ implicit none
+ class(psb_d_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(in) :: n
+ real(psb_dpk_) :: res
+ integer(psb_ipk_) :: info
+ real(psb_dpk_) :: max_val
+ integer(psb_ipk_) :: i
+
+ if (x%is_host()) call x%sync_space()
+ max_val = -huge(0.0)
+ !$acc parallel loop reduction(max:max_val)
+ do i = 1, n
+ if (x%v(i) > max_val) max_val = x%v(i)
+ end do
+ res = max_val
+ end function d_oacc_amax
+
+ function d_oacc_asum(n, x) result(res)
+ implicit none
+ class(psb_d_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(in) :: n
+ real(psb_dpk_) :: res
+ integer(psb_ipk_) :: info
+ real(psb_dpk_) :: sum
+ integer(psb_ipk_) :: i
+
+ if (x%is_host()) call x%sync_space()
+ sum = 0.0
+ !$acc parallel loop reduction(+:sum)
+ do i = 1, n
+ sum = sum + abs(x%v(i))
+ end do
+ res = sum
+ end function d_oacc_asum
+
+
+ subroutine d_oacc_mlt_a(x, y, info)
+ implicit none
+ real(psb_dpk_), intent(in) :: x(:)
+ class(psb_d_vect_oacc), intent(inout) :: y
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: i, n
+
+ info = 0
+ if (y%is_dev()) call y%sync_space()
+ !$acc parallel loop
+ do i = 1, size(x)
+ y%v(i) = y%v(i) * x(i)
+ end do
+ call y%set_host()
+ end subroutine d_oacc_mlt_a
+
+ subroutine d_oacc_mlt_a_2(alpha, x, y, beta, z, info)
+ implicit none
+ real(psb_dpk_), intent(in) :: alpha, beta
+ real(psb_dpk_), intent(in) :: x(:)
+ real(psb_dpk_), intent(in) :: y(:)
+ class(psb_d_vect_oacc), intent(inout) :: z
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: i, n
+
+ info = 0
+ if (z%is_dev()) call z%sync_space()
+ !$acc parallel loop
+ do i = 1, size(x)
+ z%v(i) = alpha * x(i) * y(i) + beta * z%v(i)
+ end do
+ call z%set_host()
+ end subroutine d_oacc_mlt_a_2
+
+
!!$ subroutine d_oacc_mlt_v(x, y, info)
-!!$ use psi_serial_mod
!!$ implicit none
!!$ class(psb_d_base_vect_type), intent(inout) :: x
!!$ class(psb_d_vect_oacc), intent(inout) :: y
@@ -201,42 +259,6 @@ contains
!!$ end select
!!$ end subroutine d_oacc_mlt_v
!!$
-!!$ subroutine d_oacc_mlt_a(x, y, info)
-!!$ use psi_serial_mod
-!!$ implicit none
-!!$ real(psb_dpk_), intent(in) :: x(:)
-!!$ class(psb_d_vect_oacc), intent(inout) :: y
-!!$ integer(psb_ipk_), intent(out) :: info
-!!$ integer(psb_ipk_) :: i, n
-!!$
-!!$ info = 0
-!!$ if (y%is_dev()) call y%sync_space()
-!!$ !$acc parallel loop
-!!$ do i = 1, size(x)
-!!$ y%v(i) = y%v(i) * x(i)
-!!$ end do
-!!$ call y%set_host()
-!!$ end subroutine d_oacc_mlt_a
-!!$
-!!$ subroutine d_oacc_mlt_a_2(alpha, x, y, beta, z, info)
-!!$ use psi_serial_mod
-!!$ implicit none
-!!$ real(psb_dpk_), intent(in) :: alpha, beta
-!!$ real(psb_dpk_), intent(in) :: x(:)
-!!$ real(psb_dpk_), intent(in) :: y(:)
-!!$ class(psb_d_vect_oacc), intent(inout) :: z
-!!$ integer(psb_ipk_), intent(out) :: info
-!!$ integer(psb_ipk_) :: i, n
-!!$
-!!$ info = 0
-!!$ if (z%is_dev()) call z%sync_space()
-!!$ !$acc parallel loop
-!!$ do i = 1, size(x)
-!!$ z%v(i) = alpha * x(i) * y(i) + beta * z%v(i)
-!!$ end do
-!!$ call z%set_host()
-!!$ end subroutine d_oacc_mlt_a_2
-!!$
!!$ subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
!!$ use psi_serial_mod
!!$ use psb_string_mod
@@ -291,514 +313,513 @@ contains
!!$ call z%set_host()
!!$ end select
!!$ end subroutine d_oacc_mlt_v_2
-!!$
-!!$
-!!$ subroutine d_oacc_axpby_v(m, alpha, x, beta, y, info)
-!!$ use psi_serial_mod
-!!$ implicit none
-!!$ integer(psb_ipk_), intent(in) :: m
-!!$ class(psb_d_base_vect_type), intent(inout) :: x
-!!$ class(psb_d_vect_oacc), intent(inout) :: y
-!!$ real(psb_dpk_), intent(in) :: alpha, beta
-!!$ integer(psb_ipk_), intent(out) :: info
-!!$ integer(psb_ipk_) :: nx, ny, i
-!!$
-!!$ info = psb_success_
-!!$
-!!$ select type(xx => x)
-!!$ type is (psb_d_vect_oacc)
-!!$ if ((beta /= dzero) .and. y%is_host()) call y%sync_space()
-!!$ if (xx%is_host()) call xx%sync_space()
-!!$ nx = size(xx%v)
-!!$ ny = size(y%v)
-!!$ if ((nx < m) .or. (ny < m)) then
-!!$ info = psb_err_internal_error_
-!!$ else
-!!$ !$acc parallel loop
-!!$ do i = 1, m
-!!$ y%v(i) = alpha * xx%v(i) + beta * y%v(i)
-!!$ end do
-!!$ end if
-!!$ call y%set_dev()
-!!$ class default
-!!$ if ((alpha /= dzero) .and. (x%is_dev())) call x%sync()
-!!$ call y%axpby(m, alpha, x%v, beta, info)
-!!$ end select
-!!$ end subroutine d_oacc_axpby_v
-!!$
-!!$ subroutine d_oacc_axpby_a(m, alpha, x, beta, y, info)
-!!$ use psi_serial_mod
-!!$ implicit none
-!!$ integer(psb_ipk_), intent(in) :: m
-!!$ real(psb_dpk_), intent(in) :: x(:)
-!!$ class(psb_d_vect_oacc), intent(inout) :: y
-!!$ real(psb_dpk_), intent(in) :: alpha, beta
-!!$ integer(psb_ipk_), intent(out) :: info
-!!$ integer(psb_ipk_) :: i
-!!$
-!!$ if ((beta /= dzero) .and. (y%is_dev())) call y%sync_space()
-!!$ !$acc parallel loop
-!!$ do i = 1, m
-!!$ y%v(i) = alpha * x(i) + beta * y%v(i)
-!!$ end do
-!!$ call y%set_host()
-!!$ end subroutine d_oacc_axpby_a
-!!$
-!!$ subroutine d_oacc_abgdxyz(m, alpha, beta, gamma, delta, x, y, z, info)
-!!$ use psi_serial_mod
-!!$ implicit none
-!!$ integer(psb_ipk_), intent(in) :: m
-!!$ class(psb_d_base_vect_type), intent(inout) :: x
-!!$ class(psb_d_base_vect_type), intent(inout) :: y
-!!$ class(psb_d_vect_oacc), intent(inout) :: z
-!!$ real(psb_dpk_), intent(in) :: alpha, beta, gamma, delta
-!!$ integer(psb_ipk_), intent(out) :: info
-!!$ integer(psb_ipk_) :: nx, ny, nz, i
-!!$ logical :: gpu_done
-!!$
-!!$ info = psb_success_
-!!$ gpu_done = .false.
-!!$
-!!$ select type(xx => x)
-!!$ class is (psb_d_vect_oacc)
-!!$ select type(yy => y)
-!!$ class is (psb_d_vect_oacc)
-!!$ select type(zz => z)
-!!$ class is (psb_d_vect_oacc)
-!!$ if ((beta /= dzero) .and. yy%is_host()) call yy%sync_space()
-!!$ if ((delta /= dzero) .and. zz%is_host()) call zz%sync_space()
-!!$ if (xx%is_host()) call xx%sync_space()
-!!$ nx = size(xx%v)
-!!$ ny = size(yy%v)
-!!$ nz = size(zz%v)
-!!$ if ((nx < m) .or. (ny < m) .or. (nz < m)) then
-!!$ info = psb_err_internal_error_
-!!$ else
-!!$ !$acc parallel loop
-!!$ do i = 1, m
-!!$ yy%v(i) = alpha * xx%v(i) + beta * yy%v(i)
-!!$ zz%v(i) = gamma * yy%v(i) + delta * zz%v(i)
-!!$ end do
-!!$ end if
-!!$ call yy%set_dev()
-!!$ call zz%set_dev()
-!!$ gpu_done = .true.
-!!$ end select
-!!$ end select
-!!$ end select
-!!$
-!!$ if (.not. gpu_done) then
-!!$ if (x%is_host()) call x%sync()
-!!$ if (y%is_host()) call y%sync()
-!!$ if (z%is_host()) call z%sync()
-!!$ call y%axpby(m, alpha, x, beta, info)
-!!$ call z%axpby(m, gamma, y, delta, info)
-!!$ end if
-!!$ end subroutine d_oacc_abgdxyz
-!!$
-!!$
-!!$ subroutine d_oacc_sctb_buf(i, n, idx, beta, y)
-!!$ use psb_base_mod
-!!$ implicit none
-!!$ integer(psb_ipk_) :: i, n
-!!$ class(psb_i_base_vect_type) :: idx
-!!$ real(psb_dpk_) :: beta
-!!$ class(psb_d_vect_oacc) :: y
-!!$ integer(psb_ipk_) :: info
-!!$
-!!$ if (.not.allocated(y%combuf)) then
-!!$ call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
-!!$ return
-!!$ end if
-!!$
-!!$ select type(ii => idx)
-!!$ class is (psb_i_vect_oacc)
-!!$ if (ii%is_host()) call ii%sync_space(info)
-!!$ if (y%is_host()) call y%sync_space()
-!!$
-!!$ !$acc parallel loop
-!!$ do i = 1, n
-!!$ y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i)
-!!$ end do
-!!$
-!!$ class default
-!!$ !$acc parallel loop
-!!$ do i = 1, n
-!!$ y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i)
-!!$ end do
-!!$ end select
-!!$ end subroutine d_oacc_sctb_buf
-!!$
-!!$ subroutine d_oacc_sctb_x(i, n, idx, x, beta, y)
-!!$ use psb_base_mod
-!!$ implicit none
-!!$ integer(psb_ipk_):: i, n
-!!$ class(psb_i_base_vect_type) :: idx
-!!$ real(psb_dpk_) :: beta, x(:)
-!!$ class(psb_d_vect_oacc) :: y
-!!$ integer(psb_ipk_) :: info, ni
-!!$
-!!$ select type(ii => idx)
-!!$ class is (psb_i_vect_oacc)
-!!$ if (ii%is_host()) call ii%sync_space(info)
-!!$ class default
-!!$ call psb_errpush(info, 'd_oacc_sctb_x')
-!!$ return
-!!$ end select
-!!$
-!!$ if (y%is_host()) call y%sync_space()
-!!$
-!!$ !$acc parallel loop
-!!$ do i = 1, n
-!!$ y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i)
-!!$ end do
-!!$
-!!$ call y%set_dev()
-!!$ end subroutine d_oacc_sctb_x
-!!$
-!!$
-!!$
-!!$ subroutine d_oacc_sctb(n, idx, x, beta, y)
-!!$ use psb_base_mod
-!!$ implicit none
-!!$ integer(psb_ipk_) :: n
-!!$ integer(psb_ipk_) :: idx(:)
-!!$ real(psb_dpk_) :: beta, x(:)
-!!$ class(psb_d_vect_oacc) :: y
-!!$ integer(psb_ipk_) :: info
-!!$ integer(psb_ipk_) :: i
-!!$
-!!$ if (n == 0) return
-!!$ if (y%is_dev()) call y%sync_space()
-!!$
-!!$ !$acc parallel loop
-!!$ do i = 1, n
-!!$ y%v(idx(i)) = beta * y%v(idx(i)) + x(i)
-!!$ end do
-!!$
-!!$ call y%set_host()
-!!$ end subroutine d_oacc_sctb
-!!$
-!!$
-!!$ subroutine d_oacc_gthzbuf(i, n, idx, x)
-!!$ use psb_base_mod
-!!$ implicit none
-!!$ integer(psb_ipk_) :: i, n
-!!$ class(psb_i_base_vect_type) :: idx
-!!$ class(psb_d_vect_oacc) :: x
-!!$ integer(psb_ipk_) :: info
-!!$
-!!$ info = 0
-!!$ if (.not.allocated(x%combuf)) then
-!!$ call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
-!!$ return
-!!$ end if
-!!$
-!!$ select type(ii => idx)
-!!$ class is (psb_i_vect_oacc)
-!!$ if (ii%is_host()) call ii%sync_space(info)
-!!$ class default
-!!$ call psb_errpush(info, 'd_oacc_gthzbuf')
-!!$ return
-!!$ end select
-!!$
-!!$ if (x%is_host()) call x%sync_space()
-!!$
-!!$ !$acc parallel loop
-!!$ do i = 1, n
-!!$ x%combuf(i) = x%v(idx%v(i))
-!!$ end do
-!!$ end subroutine d_oacc_gthzbuf
-!!$
-!!$ subroutine d_oacc_gthzv_x(i, n, idx, x, y)
-!!$ use psb_base_mod
-!!$ implicit none
-!!$ integer(psb_ipk_) :: i, n
-!!$ class(psb_i_base_vect_type):: idx
-!!$ real(psb_dpk_) :: y(:)
-!!$ class(psb_d_vect_oacc):: x
-!!$ integer(psb_ipk_) :: info
-!!$
-!!$ info = 0
-!!$
-!!$ select type(ii => idx)
-!!$ class is (psb_i_vect_oacc)
-!!$ if (ii%is_host()) call ii%sync_space(info)
-!!$ class default
-!!$ call psb_errpush(info, 'd_oacc_gthzv_x')
-!!$ return
-!!$ end select
-!!$
-!!$ if (x%is_host()) call x%sync_space()
-!!$
-!!$ !$acc parallel loop
-!!$ do i = 1, n
-!!$ y(i) = x%v(idx%v(i))
-!!$ end do
-!!$ end subroutine d_oacc_gthzv_x
-!!$
-!!$ subroutine d_oacc_ins_v(n, irl, val, dupl, x, info)
-!!$ use psi_serial_mod
-!!$ implicit none
-!!$ class(psb_d_vect_oacc), intent(inout) :: x
-!!$ integer(psb_ipk_), intent(in) :: n, dupl
-!!$ class(psb_i_base_vect_type), intent(inout) :: irl
-!!$ class(psb_d_base_vect_type), intent(inout) :: val
-!!$ integer(psb_ipk_), intent(out) :: info
-!!$
-!!$ integer(psb_ipk_) :: i, isz
-!!$ logical :: done_oacc
-!!$
-!!$ info = 0
-!!$ if (psb_errstatus_fatal()) return
-!!$
-!!$ done_oacc = .false.
-!!$ select type(virl => irl)
-!!$ type is (psb_i_vect_oacc)
-!!$ select type(vval => val)
-!!$ type is (psb_d_vect_oacc)
-!!$ if (vval%is_host()) call vval%sync_space()
-!!$ if (virl%is_host()) call virl%sync_space(info)
-!!$ if (x%is_host()) call x%sync_space()
-!!$ !$acc parallel loop
-!!$ do i = 1, n
-!!$ x%v(virl%v(i)) = vval%v(i)
-!!$ end do
-!!$ call x%set_dev()
-!!$ done_oacc = .true.
-!!$ end select
-!!$ end select
-!!$
-!!$ if (.not.done_oacc) then
-!!$ select type(virl => irl)
-!!$ type is (psb_i_vect_oacc)
-!!$ if (virl%is_dev()) call virl%sync_space(info)
-!!$ end select
-!!$ select type(vval => val)
-!!$ type is (psb_d_vect_oacc)
-!!$ if (vval%is_dev()) call vval%sync_space()
-!!$ end select
-!!$ call x%ins(n, irl%v, val%v, dupl, info)
-!!$ end if
-!!$
-!!$ if (info /= 0) then
-!!$ call psb_errpush(info, 'oacc_vect_ins')
-!!$ return
-!!$ end if
-!!$
-!!$ end subroutine d_oacc_ins_v
-!!$
-!!$
-!!$
-!!$ subroutine d_oacc_ins_a(n, irl, val, dupl, x, info)
-!!$ use psi_serial_mod
-!!$ implicit none
-!!$ class(psb_d_vect_oacc), intent(inout) :: x
-!!$ integer(psb_ipk_), intent(in) :: n, dupl
-!!$ integer(psb_ipk_), intent(in) :: irl(:)
-!!$ real(psb_dpk_), intent(in) :: val(:)
-!!$ integer(psb_ipk_), intent(out) :: info
-!!$
-!!$ integer(psb_ipk_) :: i
-!!$
-!!$ info = 0
-!!$ if (x%is_dev()) call x%sync_space()
-!!$ call x%psb_d_base_vect_type%ins(n, irl, val, dupl, info)
-!!$ call x%set_host()
-!!$ !$acc update device(x%v)
-!!$
-!!$ end subroutine d_oacc_ins_a
-!!$
-!!$
-!!$
-!!$ subroutine d_oacc_bld_mn(x, n)
-!!$ use psb_base_mod
-!!$ implicit none
-!!$ integer(psb_mpk_), intent(in) :: n
-!!$ class(psb_d_vect_oacc), intent(inout) :: x
-!!$ integer(psb_ipk_) :: info
-!!$
-!!$ call x%all(n, info)
-!!$ if (info /= 0) then
-!!$ call psb_errpush(info, 'd_oacc_bld_mn', i_err=(/n, n, n, n, n/))
-!!$ end if
-!!$ call x%set_host()
-!!$ !$acc update device(x%v)
-!!$
-!!$ end subroutine d_oacc_bld_mn
-!!$
-!!$
-!!$ subroutine d_oacc_bld_x(x, this)
-!!$ use psb_base_mod
-!!$ implicit none
-!!$ real(psb_dpk_), intent(in) :: this(:)
-!!$ class(psb_d_vect_oacc), intent(inout) :: x
-!!$ integer(psb_ipk_) :: info
-!!$
-!!$ call psb_realloc(size(this), x%v, info)
-!!$ if (info /= 0) then
-!!$ info = psb_err_alloc_request_
-!!$ call psb_errpush(info, 'd_oacc_bld_x', &
-!!$ i_err=(/size(this), izero, izero, izero, izero/))
-!!$ return
-!!$ end if
-!!$
-!!$ x%v(:) = this(:)
-!!$ call x%set_host()
-!!$ !$acc update device(x%v)
-!!$
-!!$ end subroutine d_oacc_bld_x
-!!$
-!!$
-!!$ subroutine d_oacc_asb_m(n, x, info)
-!!$ use psb_base_mod
-!!$ implicit none
-!!$ integer(psb_mpk_), intent(in) :: n
-!!$ class(psb_d_vect_oacc), intent(inout) :: x
-!!$ integer(psb_ipk_), intent(out) :: info
-!!$ integer(psb_mpk_) :: nd
-!!$
-!!$ info = psb_success_
-!!$
-!!$ if (x%is_dev()) then
-!!$ nd = size(x%v)
-!!$ if (nd < n) then
-!!$ call x%sync()
-!!$ call x%psb_d_base_vect_type%asb(n, info)
-!!$ if (info == psb_success_) call x%sync_space()
-!!$ call x%set_host()
-!!$ end if
-!!$ else
-!!$ if (size(x%v) < n) then
-!!$ call x%psb_d_base_vect_type%asb(n, info)
-!!$ if (info == psb_success_) call x%sync_space()
-!!$ call x%set_host()
-!!$ end if
-!!$ end if
-!!$ end subroutine d_oacc_asb_m
-!!$
-!!$
-!!$
-!!$ subroutine d_oacc_set_scal(x, val, first, last)
-!!$ class(psb_d_vect_oacc), intent(inout) :: x
-!!$ real(psb_dpk_), intent(in) :: val
-!!$ integer(psb_ipk_), optional :: first, last
-!!$
-!!$ integer(psb_ipk_) :: first_, last_
-!!$ first_ = 1
-!!$ last_ = x%get_nrows()
-!!$ if (present(first)) first_ = max(1, first)
-!!$ if (present(last)) last_ = min(last, last_)
-!!$
-!!$ !$acc parallel loop
-!!$ do i = first_, last_
-!!$ x%v(i) = val
-!!$ end do
-!!$ !$acc end parallel loop
-!!$
-!!$ call x%set_dev()
-!!$ end subroutine d_oacc_set_scal
-!!$
-!!$
-!!$
-!!$ subroutine d_oacc_zero(x)
-!!$ use psi_serial_mod
-!!$ implicit none
-!!$ class(psb_d_vect_oacc), intent(inout) :: x
-!!$ call x%set_dev()
-!!$ call x%set_scal(dzero)
-!!$ end subroutine d_oacc_zero
-!!$
-!!$ function d_oacc_get_nrows(x) result(res)
-!!$ implicit none
-!!$ class(psb_d_vect_oacc), intent(in) :: x
-!!$ integer(psb_ipk_) :: res
-!!$
-!!$ if (allocated(x%v)) res = size(x%v)
-!!$ end function d_oacc_get_nrows
-!!$
-!!$ function d_oacc_get_fmt() result(res)
-!!$ implicit none
-!!$ character(len=5) :: res
-!!$ res = "dOACC"
-!!$
-!!$ end function d_oacc_get_fmt
-!!$
-!!$ function d_oacc_vect_dot(n, x, y) result(res)
-!!$ implicit none
-!!$ class(psb_d_vect_oacc), intent(inout) :: x
-!!$ class(psb_d_base_vect_type), intent(inout) :: y
-!!$ integer(psb_ipk_), intent(in) :: n
-!!$ real(psb_dpk_) :: res
-!!$ real(psb_dpk_), external :: ddot
-!!$ integer(psb_ipk_) :: info
-!!$ integer(psb_ipk_) :: i
-!!$
-!!$ res = dzero
-!!$
-!!$ select type(yy => y)
-!!$ type is (psb_d_base_vect_type)
-!!$ if (x%is_dev()) call x%sync()
-!!$ res = ddot(n, x%v, 1, yy%v, 1)
-!!$ type is (psb_d_vect_oacc)
-!!$ if (x%is_host()) call x%sync()
-!!$ if (yy%is_host()) call yy%sync()
-!!$
-!!$ !$acc parallel loop reduction(+:res) present(x%v, yy%v)
-!!$ do i = 1, n
-!!$ res = res + x%v(i) * yy%v(i)
-!!$ end do
-!!$ !$acc end parallel loop
-!!$
-!!$ class default
-!!$ call x%sync()
-!!$ res = y%dot(n, x%v)
-!!$ end select
-!!$
-!!$ end function d_oacc_vect_dot
-!!$
-!!$
-!!$
-!!$
-!!$ function d_oacc_dot_a(n, x, y) result(res)
-!!$ implicit none
-!!$ class(psb_d_vect_oacc), intent(inout) :: x
-!!$ real(psb_dpk_), intent(in) :: y(:)
-!!$ integer(psb_ipk_), intent(in) :: n
-!!$ real(psb_dpk_) :: res
-!!$ real(psb_dpk_), external :: ddot
-!!$
-!!$ if (x%is_dev()) call x%sync()
-!!$ res = ddot(n, y, 1, x%v, 1)
-!!$
-!!$ end function d_oacc_dot_a
-!!$
-!!$ ! subroutine d_oacc_set_vect(x,y)
-!!$ ! implicit none
-!!$ ! class(psb_d_vect_oacc), intent(inout) :: x
-!!$ ! real(psb_dpk_), intent(in) :: y(:)
-!!$ ! integer(psb_ipk_) :: info
-!!$
-!!$ ! if (size(x%v) /= size(y)) then
-!!$ ! call x%free(info)
-!!$ ! call x%all(size(y),info)
-!!$ ! end if
-!!$ ! x%v(:) = y(:)
-!!$ ! call x%set_host()
-!!$ ! end subroutine d_oacc_set_vect
-!!$
-!!$ subroutine d_oacc_to_dev(v)
-!!$ implicit none
-!!$ real(psb_dpk_) :: v(:)
-!!$ !$acc update device(v)
-!!$ end subroutine d_oacc_to_dev
-!!$
-!!$ subroutine d_oacc_to_host(v)
-!!$ implicit none
-!!$ real(psb_dpk_) :: v(:)
-!!$ !$acc update self(v)
-!!$ end subroutine d_oacc_to_host
-!!$
+
+
+ subroutine d_oacc_axpby_v(m, alpha, x, beta, y, info)
+ !use psi_serial_mod
+ implicit none
+ integer(psb_ipk_), intent(in) :: m
+ class(psb_d_base_vect_type), intent(inout) :: x
+ class(psb_d_vect_oacc), intent(inout) :: y
+ real(psb_dpk_), intent(in) :: alpha, beta
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: nx, ny, i
+
+ info = psb_success_
+
+ select type(xx => x)
+ type is (psb_d_vect_oacc)
+ if ((beta /= dzero) .and. y%is_host()) call y%sync_space()
+ if (xx%is_host()) call xx%sync_space()
+ nx = size(xx%v)
+ ny = size(y%v)
+ if ((nx < m) .or. (ny < m)) then
+ info = psb_err_internal_error_
+ else
+ !$acc parallel loop
+ do i = 1, m
+ y%v(i) = alpha * xx%v(i) + beta * y%v(i)
+ end do
+ end if
+ call y%set_dev()
+ class default
+ if ((alpha /= dzero) .and. (x%is_dev())) call x%sync()
+ call y%axpby(m, alpha, x%v, beta, info)
+ end select
+ end subroutine d_oacc_axpby_v
+
+ subroutine d_oacc_axpby_a(m, alpha, x, beta, y, info)
+ !use psi_serial_mod
+ implicit none
+ integer(psb_ipk_), intent(in) :: m
+ real(psb_dpk_), intent(in) :: x(:)
+ class(psb_d_vect_oacc), intent(inout) :: y
+ real(psb_dpk_), intent(in) :: alpha, beta
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: i
+
+ if ((beta /= dzero) .and. (y%is_dev())) call y%sync_space()
+ !$acc parallel loop
+ do i = 1, m
+ y%v(i) = alpha * x(i) + beta * y%v(i)
+ end do
+ call y%set_host()
+ end subroutine d_oacc_axpby_a
+
+ subroutine d_oacc_abgdxyz(m, alpha, beta, gamma, delta, x, y, z, info)
+ use psi_serial_mod
+ implicit none
+ integer(psb_ipk_), intent(in) :: m
+ class(psb_d_base_vect_type), intent(inout) :: x
+ class(psb_d_base_vect_type), intent(inout) :: y
+ class(psb_d_vect_oacc), intent(inout) :: z
+ real(psb_dpk_), intent(in) :: alpha, beta, gamma, delta
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: nx, ny, nz, i
+ logical :: gpu_done
+
+ info = psb_success_
+ gpu_done = .false.
+
+ select type(xx => x)
+ class is (psb_d_vect_oacc)
+ select type(yy => y)
+ class is (psb_d_vect_oacc)
+ select type(zz => z)
+ class is (psb_d_vect_oacc)
+ if ((beta /= dzero) .and. yy%is_host()) call yy%sync_space()
+ if ((delta /= dzero) .and. zz%is_host()) call zz%sync_space()
+ if (xx%is_host()) call xx%sync_space()
+ nx = size(xx%v)
+ ny = size(yy%v)
+ nz = size(zz%v)
+ if ((nx < m) .or. (ny < m) .or. (nz < m)) then
+ info = psb_err_internal_error_
+ else
+ !$acc parallel loop
+ do i = 1, m
+ yy%v(i) = alpha * xx%v(i) + beta * yy%v(i)
+ zz%v(i) = gamma * yy%v(i) + delta * zz%v(i)
+ end do
+ end if
+ call yy%set_dev()
+ call zz%set_dev()
+ gpu_done = .true.
+ end select
+ end select
+ end select
+
+ if (.not. gpu_done) then
+ if (x%is_host()) call x%sync()
+ if (y%is_host()) call y%sync()
+ if (z%is_host()) call z%sync()
+ call y%axpby(m, alpha, x, beta, info)
+ call z%axpby(m, gamma, y, delta, info)
+ end if
+ end subroutine d_oacc_abgdxyz
+
+ subroutine d_oacc_sctb_buf(i, n, idx, beta, y)
+ use psb_base_mod
+ implicit none
+ integer(psb_ipk_) :: i, n
+ class(psb_i_base_vect_type) :: idx
+ real(psb_dpk_) :: beta
+ class(psb_d_vect_oacc) :: y
+ integer(psb_ipk_) :: info
+
+ if (.not.allocated(y%combuf)) then
+ call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
+ return
+ end if
+
+ select type(ii => idx)
+ class is (psb_i_vect_oacc)
+ if (ii%is_host()) call ii%sync_space(info)
+ if (y%is_host()) call y%sync_space()
+
+ !$acc parallel loop
+ do i = 1, n
+ y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i)
+ end do
+
+ class default
+ !$acc parallel loop
+ do i = 1, n
+ y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i)
+ end do
+ end select
+ end subroutine d_oacc_sctb_buf
+
+ subroutine d_oacc_sctb_x(i, n, idx, x, beta, y)
+ use psb_base_mod
+ implicit none
+ integer(psb_ipk_):: i, n
+ class(psb_i_base_vect_type) :: idx
+ real(psb_dpk_) :: beta, x(:)
+ class(psb_d_vect_oacc) :: y
+ integer(psb_ipk_) :: info, ni
+
+ select type(ii => idx)
+ class is (psb_i_vect_oacc)
+ if (ii%is_host()) call ii%sync_space(info)
+ class default
+ call psb_errpush(info, 'd_oacc_sctb_x')
+ return
+ end select
+
+ if (y%is_host()) call y%sync_space()
+
+ !$acc parallel loop
+ do i = 1, n
+ y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i)
+ end do
+
+ call y%set_dev()
+ end subroutine d_oacc_sctb_x
+
+
+
+ subroutine d_oacc_sctb(n, idx, x, beta, y)
+ use psb_base_mod
+ implicit none
+ integer(psb_ipk_) :: n
+ integer(psb_ipk_) :: idx(:)
+ real(psb_dpk_) :: beta, x(:)
+ class(psb_d_vect_oacc) :: y
+ integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: i
+
+ if (n == 0) return
+ if (y%is_dev()) call y%sync_space()
+
+ !$acc parallel loop
+ do i = 1, n
+ y%v(idx(i)) = beta * y%v(idx(i)) + x(i)
+ end do
+
+ call y%set_host()
+ end subroutine d_oacc_sctb
+
+
+ subroutine d_oacc_gthzbuf(i, n, idx, x)
+ use psb_base_mod
+ implicit none
+ integer(psb_ipk_) :: i, n
+ class(psb_i_base_vect_type) :: idx
+ class(psb_d_vect_oacc) :: x
+ integer(psb_ipk_) :: info
+
+ info = 0
+ if (.not.allocated(x%combuf)) then
+ call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
+ return
+ end if
+
+ select type(ii => idx)
+ class is (psb_i_vect_oacc)
+ if (ii%is_host()) call ii%sync_space(info)
+ class default
+ call psb_errpush(info, 'd_oacc_gthzbuf')
+ return
+ end select
+
+ if (x%is_host()) call x%sync_space()
+
+ !$acc parallel loop
+ do i = 1, n
+ x%combuf(i) = x%v(idx%v(i))
+ end do
+ end subroutine d_oacc_gthzbuf
+
+ subroutine d_oacc_gthzv_x(i, n, idx, x, y)
+ use psb_base_mod
+ implicit none
+ integer(psb_ipk_) :: i, n
+ class(psb_i_base_vect_type):: idx
+ real(psb_dpk_) :: y(:)
+ class(psb_d_vect_oacc):: x
+ integer(psb_ipk_) :: info
+
+ info = 0
+
+ select type(ii => idx)
+ class is (psb_i_vect_oacc)
+ if (ii%is_host()) call ii%sync_space(info)
+ class default
+ call psb_errpush(info, 'd_oacc_gthzv_x')
+ return
+ end select
+
+ if (x%is_host()) call x%sync_space()
+
+ !$acc parallel loop
+ do i = 1, n
+ y(i) = x%v(idx%v(i))
+ end do
+ end subroutine d_oacc_gthzv_x
+
+ subroutine d_oacc_ins_v(n, irl, val, dupl, x, info)
+ use psi_serial_mod
+ implicit none
+ class(psb_d_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(in) :: n, dupl
+ class(psb_i_base_vect_type), intent(inout) :: irl
+ class(psb_d_base_vect_type), intent(inout) :: val
+ integer(psb_ipk_), intent(out) :: info
+
+ integer(psb_ipk_) :: i, isz
+ logical :: done_oacc
+
+ info = 0
+ if (psb_errstatus_fatal()) return
+
+ done_oacc = .false.
+ select type(virl => irl)
+ type is (psb_i_vect_oacc)
+ select type(vval => val)
+ type is (psb_d_vect_oacc)
+ if (vval%is_host()) call vval%sync_space()
+ if (virl%is_host()) call virl%sync_space(info)
+ if (x%is_host()) call x%sync_space()
+ !$acc parallel loop
+ do i = 1, n
+ x%v(virl%v(i)) = vval%v(i)
+ end do
+ call x%set_dev()
+ done_oacc = .true.
+ end select
+ end select
+
+ if (.not.done_oacc) then
+ select type(virl => irl)
+ type is (psb_i_vect_oacc)
+ if (virl%is_dev()) call virl%sync_space(info)
+ end select
+ select type(vval => val)
+ type is (psb_d_vect_oacc)
+ if (vval%is_dev()) call vval%sync_space()
+ end select
+ call x%ins(n, irl%v, val%v, dupl, info)
+ end if
+
+ if (info /= 0) then
+ call psb_errpush(info, 'oacc_vect_ins')
+ return
+ end if
+
+ end subroutine d_oacc_ins_v
+
+
+
+ subroutine d_oacc_ins_a(n, irl, val, dupl, x, info)
+ use psi_serial_mod
+ implicit none
+ class(psb_d_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(in) :: n, dupl
+ integer(psb_ipk_), intent(in) :: irl(:)
+ real(psb_dpk_), intent(in) :: val(:)
+ integer(psb_ipk_), intent(out) :: info
+
+ integer(psb_ipk_) :: i
+
+ info = 0
+ if (x%is_dev()) call x%sync_space()
+ call x%psb_d_base_vect_type%ins(n, irl, val, dupl, info)
+ call x%set_host()
+ !$acc update device(x%v)
+
+ end subroutine d_oacc_ins_a
+
+
+
+ subroutine d_oacc_bld_mn(x, n)
+ use psb_base_mod
+ implicit none
+ integer(psb_mpk_), intent(in) :: n
+ class(psb_d_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_) :: info
+
+ call x%all(n, info)
+ if (info /= 0) then
+ call psb_errpush(info, 'd_oacc_bld_mn', i_err=(/n, n, n, n, n/))
+ end if
+ call x%set_host()
+ !$acc update device(x%v)
+
+ end subroutine d_oacc_bld_mn
+
+
+ subroutine d_oacc_bld_x(x, this)
+ use psb_base_mod
+ implicit none
+ real(psb_dpk_), intent(in) :: this(:)
+ class(psb_d_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_) :: info
+
+ call psb_realloc(size(this), x%v, info)
+ if (info /= 0) then
+ info = psb_err_alloc_request_
+ call psb_errpush(info, 'd_oacc_bld_x', &
+ i_err=(/size(this), izero, izero, izero, izero/))
+ return
+ end if
+
+ x%v(:) = this(:)
+ call x%set_host()
+ !$acc update device(x%v)
+
+ end subroutine d_oacc_bld_x
+
+
+ subroutine d_oacc_asb_m(n, x, info)
+ use psb_base_mod
+ implicit none
+ integer(psb_mpk_), intent(in) :: n
+ class(psb_d_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_mpk_) :: nd
+
+ info = psb_success_
+
+ if (x%is_dev()) then
+ nd = size(x%v)
+ if (nd < n) then
+ call x%sync()
+ call x%psb_d_base_vect_type%asb(n, info)
+ if (info == psb_success_) call x%sync_space()
+ call x%set_host()
+ end if
+ else
+ if (size(x%v) < n) then
+ call x%psb_d_base_vect_type%asb(n, info)
+ if (info == psb_success_) call x%sync_space()
+ call x%set_host()
+ end if
+ end if
+ end subroutine d_oacc_asb_m
+
+
+
+ subroutine d_oacc_set_scal(x, val, first, last)
+ class(psb_d_vect_oacc), intent(inout) :: x
+ real(psb_dpk_), intent(in) :: val
+ integer(psb_ipk_), optional :: first, last
+
+ integer(psb_ipk_) :: first_, last_
+ first_ = 1
+ last_ = x%get_nrows()
+ if (present(first)) first_ = max(1, first)
+ if (present(last)) last_ = min(last, last_)
+
+ !$acc parallel loop
+ do i = first_, last_
+ x%v(i) = val
+ end do
+ !$acc end parallel loop
+
+ call x%set_dev()
+ end subroutine d_oacc_set_scal
+
+
+
+ subroutine d_oacc_zero(x)
+ use psi_serial_mod
+ implicit none
+ class(psb_d_vect_oacc), intent(inout) :: x
+ call x%set_dev()
+ call x%set_scal(dzero)
+ end subroutine d_oacc_zero
+
+ function d_oacc_get_nrows(x) result(res)
+ implicit none
+ class(psb_d_vect_oacc), intent(in) :: x
+ integer(psb_ipk_) :: res
+
+ if (allocated(x%v)) res = size(x%v)
+ end function d_oacc_get_nrows
+
+ function d_oacc_get_fmt() result(res)
+ implicit none
+ character(len=5) :: res
+ res = "dOACC"
+
+ end function d_oacc_get_fmt
+
+ function d_oacc_vect_dot(n, x, y) result(res)
+ implicit none
+ class(psb_d_vect_oacc), intent(inout) :: x
+ class(psb_d_base_vect_type), intent(inout) :: y
+ integer(psb_ipk_), intent(in) :: n
+ real(psb_dpk_) :: res
+ real(psb_dpk_), external :: ddot
+ integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: i
+
+ res = dzero
+
+ select type(yy => y)
+ type is (psb_d_base_vect_type)
+ if (x%is_dev()) call x%sync()
+ res = ddot(n, x%v, 1, yy%v, 1)
+ type is (psb_d_vect_oacc)
+ if (x%is_host()) call x%sync()
+ if (yy%is_host()) call yy%sync()
+
+ !$acc parallel loop reduction(+:res) present(x%v, yy%v)
+ do i = 1, n
+ res = res + x%v(i) * yy%v(i)
+ end do
+ !$acc end parallel loop
+
+ class default
+ call x%sync()
+ res = y%dot(n, x%v)
+ end select
+
+ end function d_oacc_vect_dot
+
+
+
+
+ function d_oacc_dot_a(n, x, y) result(res)
+ implicit none
+ class(psb_d_vect_oacc), intent(inout) :: x
+ real(psb_dpk_), intent(in) :: y(:)
+ integer(psb_ipk_), intent(in) :: n
+ real(psb_dpk_) :: res
+ real(psb_dpk_), external :: ddot
+
+ if (x%is_dev()) call x%sync()
+ res = ddot(n, y, 1, x%v, 1)
+
+ end function d_oacc_dot_a
+
+ ! subroutine d_oacc_set_vect(x,y)
+ ! implicit none
+ ! class(psb_d_vect_oacc), intent(inout) :: x
+ ! real(psb_dpk_), intent(in) :: y(:)
+ ! integer(psb_ipk_) :: info
+
+ ! if (size(x%v) /= size(y)) then
+ ! call x%free(info)
+ ! call x%all(size(y),info)
+ ! end if
+ ! x%v(:) = y(:)
+ ! call x%set_host()
+ ! end subroutine d_oacc_set_vect
+
+ subroutine d_oacc_to_dev(v)
+ implicit none
+ real(psb_dpk_) :: v(:)
+ !$acc update device(v)
+ end subroutine d_oacc_to_dev
+
+ subroutine d_oacc_to_host(v)
+ implicit none
+ real(psb_dpk_) :: v(:)
+ !$acc update self(v)
+ end subroutine d_oacc_to_host
+
subroutine d_oacc_sync_space(x)
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
@@ -828,21 +849,21 @@ contains
subroutine d_oacc_set_host(x)
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
-
+
x%state = is_host
end subroutine d_oacc_set_host
subroutine d_oacc_set_dev(x)
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
-
+
x%state = is_dev
end subroutine d_oacc_set_dev
subroutine d_oacc_set_sync(x)
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
-
+
x%state = is_sync
end subroutine d_oacc_set_sync
@@ -869,50 +890,50 @@ contains
res = (x%state == is_sync)
end function d_oacc_is_sync
-!!$
-!!$ subroutine d_oacc_vect_all(n, x, info)
-!!$ use psi_serial_mod
-!!$ use psb_realloc_mod
-!!$ implicit none
-!!$ integer(psb_ipk_), intent(in) :: n
-!!$ class(psb_d_vect_oacc), intent(out) :: x
-!!$ integer(psb_ipk_), intent(out) :: info
-!!$
-!!$ call psb_realloc(n, x%v, info)
-!!$ if (info == 0) then
-!!$ call x%set_host()
-!!$ !$acc enter data create(x%v)
-!!$ call x%sync_space()
-!!$ end if
-!!$ if (info /= 0) then
-!!$ info = psb_err_alloc_request_
-!!$ call psb_errpush(info, 'd_oacc_all', &
-!!$ i_err=(/n, n, n, n, n/))
-!!$ end if
-!!$ end subroutine d_oacc_vect_all
-!!$
-!!$
-!!$ subroutine d_oacc_vect_free(x, info)
-!!$ implicit none
-!!$ class(psb_d_vect_oacc), intent(inout) :: x
-!!$ integer(psb_ipk_), intent(out) :: info
-!!$ info = 0
-!!$ if (allocated(x%v)) then
-!!$ !$acc exit data delete(x%v) finalize
-!!$ deallocate(x%v, stat=info)
-!!$ end if
-!!$
-!!$ end subroutine d_oacc_vect_free
-!!$
-!!$ function oacc_get_size(x) result(res)
-!!$ implicit none
-!!$ class(psb_d_vect_oacc), intent(inout) :: x
-!!$ integer(psb_ipk_) :: res
-!!$
-!!$ if (x%is_dev()) call x%sync()
-!!$ res = size(x%v)
-!!$ end function oacc_get_size
-!!$
+
+ subroutine d_oacc_vect_all(n, x, info)
+ use psi_serial_mod
+ use psb_realloc_mod
+ implicit none
+ integer(psb_ipk_), intent(in) :: n
+ class(psb_d_vect_oacc), intent(out) :: x
+ integer(psb_ipk_), intent(out) :: info
+
+ call psb_realloc(n, x%v, info)
+ if (info == 0) then
+ call x%set_host()
+ !$acc enter data create(x%v)
+ call x%sync_space()
+ end if
+ if (info /= 0) then
+ info = psb_err_alloc_request_
+ call psb_errpush(info, 'd_oacc_all', &
+ i_err=(/n, n, n, n, n/))
+ end if
+ end subroutine d_oacc_vect_all
+
+
+ subroutine d_oacc_vect_free(x, info)
+ implicit none
+ class(psb_d_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(out) :: info
+ info = 0
+ if (allocated(x%v)) then
+ !$acc exit data delete(x%v) finalize
+ deallocate(x%v, stat=info)
+ end if
+
+ end subroutine d_oacc_vect_free
+
+ function oacc_get_size(x) result(res)
+ implicit none
+ class(psb_d_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_) :: res
+
+ if (x%is_dev()) call x%sync()
+ res = size(x%v)
+ end function oacc_get_size
+
!!$
!!$ subroutine initialize(N)
!!$ integer(psb_ipk_) :: N
@@ -939,26 +960,6 @@ contains
!!$ subroutine to_host()
!!$ !$acc update self(v1,v2)
!!$ end subroutine to_host
-!!$ function d_dot(N) result(res)
-!!$ real(kind(1.d0)) :: res
-!!$ integer(psb_ipk_) :: i,N
-!!$ real(kind(1.d0)) :: t1,t2,t3
-!!$ res = 0.0d0
-!!$ !$acc parallel
-!!$ !$acc loop reduction(+:res)
-!!$ do i=1,N
-!!$ res = res + v1(i) * v2(i)
-!!$ end do
-!!$ !$acc end parallel
-!!$
-!!$ end function d_dot
-!!$ function h_dot(N) result(res)
-!!$ integer(psb_ipk_) :: i,N
-!!$ real(kind(1.d0)) :: t1,t2,t3,res
-!!$ res = 0.0d0
-!!$ do i=1,N
-!!$ res = res + v1(i) * v2(i)
-!!$ end do
-!!$ end function h_dot
!!$
+
end module psb_d_oacc_vect_mod
From a81d1d9b68481161c0102dcce390cf514978c063 Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Tue, 9 Jul 2024 13:45:16 +0200
Subject: [PATCH 07/86] Now oacc_vect compiles cleanly
---
openacc/Makefile | 4 +-
openacc/impl/psb_d_oacc_mlt_v.f90 | 36 ++++++++---------
openacc/impl/psb_d_oacc_mlt_v_2.f90 | 62 ++++++++++++++---------------
3 files changed, 50 insertions(+), 52 deletions(-)
diff --git a/openacc/Makefile b/openacc/Makefile
index c82a9281..aa6ea23d 100644
--- a/openacc/Makefile
+++ b/openacc/Makefile
@@ -20,7 +20,7 @@ CINCLUDES=
# Source files
FOBJS= psb_i_oacc_vect_mod.o psb_d_oacc_vect_mod.o \
- psb_oacc_mod.o psb_d_oacc_csr_mat_mod.o
+ psb_oacc_mod.o psb_d_oacc_csr_mat_mod.o \
psb_oacc_env_mod.o
# Library name
@@ -43,7 +43,7 @@ ilib: $(OBJS)
$(MAKE) -C impl lib
psb_oacc_mod.o : psb_i_oacc_vect_mod.o psb_d_oacc_vect_mod.o \
- psb_d_oacc_csr_mat_mod.o
+ psb_d_oacc_csr_mat_mod.o psb_oacc_env_mod.o
clean: cclean iclean
/bin/rm -f $(FOBJS) *$(.mod) *.a
diff --git a/openacc/impl/psb_d_oacc_mlt_v.f90 b/openacc/impl/psb_d_oacc_mlt_v.f90
index a4eb6660..bedd0247 100644
--- a/openacc/impl/psb_d_oacc_mlt_v.f90
+++ b/openacc/impl/psb_d_oacc_mlt_v.f90
@@ -10,22 +10,22 @@ subroutine d_oacc_mlt_v(x, y, info)
integer(psb_ipk_) :: i, n
info = 0
-!!$ n = min(x%get_nrows(), y%get_nrows())
-!!$ select type(xx => x)
-!!$ type is (psb_d_base_vect_type)
-!!$ if (y%is_dev()) call y%sync()
-!!$ !$acc parallel loop
-!!$ do i = 1, n
-!!$ y%v(i) = y%v(i) * xx%v(i)
-!!$ end do
-!!$ call y%set_host()
-!!$ class default
-!!$ if (xx%is_dev()) call xx%sync()
-!!$ if (y%is_dev()) call y%sync()
-!!$ !$acc parallel loop
-!!$ do i = 1, n
-!!$ y%v(i) = y%v(i) * xx%v(i)
-!!$ end do
-!!$ call y%set_host()
-!!$ end select
+ n = min(x%get_nrows(), y%get_nrows())
+ select type(xx => x)
+ class is (psb_d_vect_oacc)
+ if (y%is_host()) call y%sync()
+ if (xx%is_host()) call xx%sync()
+ !$acc parallel loop
+ do i = 1, n
+ y%v(i) = y%v(i) * xx%v(i)
+ end do
+ call y%set_dev()
+ class default
+ if (xx%is_dev()) call xx%sync()
+ if (y%is_dev()) call y%sync()
+ do i = 1, n
+ y%v(i) = y%v(i) * xx%v(i)
+ end do
+ call y%set_host()
+ end select
end subroutine d_oacc_mlt_v
diff --git a/openacc/impl/psb_d_oacc_mlt_v_2.f90 b/openacc/impl/psb_d_oacc_mlt_v_2.f90
index b59b4f56..7e46495f 100644
--- a/openacc/impl/psb_d_oacc_mlt_v_2.f90
+++ b/openacc/impl/psb_d_oacc_mlt_v_2.f90
@@ -19,37 +19,35 @@ subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
n = min(x%get_nrows(), y%get_nrows(), z%get_nrows())
info = 0
-!!$ select type(xx => x)
-!!$ class is (psb_d_vect_oacc)
-!!$ select type (yy => y)
-!!$ class is (psb_d_vect_oacc)
-!!$ if (xx%is_host()) call xx%sync_space()
-!!$ if (yy%is_host()) call yy%sync_space()
-!!$ if ((beta /= dzero) .and. (z%is_host())) call z%sync_space()
-!!$ !$acc parallel loop
-!!$ do i = 1, n
-!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
-!!$ end do
-!!$ call z%set_dev()
-!!$ class default
-!!$ if (xx%is_dev()) call xx%sync_space()
-!!$ if (yy%is_dev()) call yy%sync()
-!!$ if ((beta /= dzero) .and. (z%is_dev())) call z%sync_space()
-!!$ !$acc parallel loop
-!!$ do i = 1, n
-!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
-!!$ end do
-!!$ call z%set_host()
-!!$ end select
-!!$ class default
-!!$ if (x%is_dev()) call x%sync()
-!!$ if (y%is_dev()) call y%sync()
-!!$ if ((beta /= dzero) .and. (z%is_dev())) call z%sync_space()
-!!$ !$acc parallel loop
-!!$ do i = 1, n
-!!$ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i)
-!!$ end do
-!!$ call z%set_host()
-!!$ end select
+ select type(xx => x)
+ class is (psb_d_vect_oacc)
+ select type (yy => y)
+ class is (psb_d_vect_oacc)
+ if (xx%is_host()) call xx%sync()
+ if (yy%is_host()) call yy%sync()
+ if ((beta /= dzero) .and. (z%is_host())) call z%sync()
+ !$acc parallel loop
+ do i = 1, n
+ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
+ end do
+ call z%set_dev()
+ class default
+ if (xx%is_dev()) call xx%sync()
+ if (yy%is_dev()) call yy%sync()
+ if ((beta /= dzero) .and. (z%is_dev())) call z%sync()
+ do i = 1, n
+ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
+ end do
+ call z%set_host()
+ end select
+ class default
+ if (x%is_dev()) call x%sync()
+ if (y%is_dev()) call y%sync()
+ if ((beta /= dzero) .and. (z%is_dev())) call z%sync()
+ do i = 1, n
+ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i)
+ end do
+ call z%set_host()
+ end select
end subroutine d_oacc_mlt_v_2
From c7bbfb8b680b8bfc25e3448e03de4d2ab57e8d0a Mon Sep 17 00:00:00 2001
From: tloloum
Date: Wed, 10 Jul 2024 10:06:27 +0200
Subject: [PATCH 08/86] new Makefile, test compile well + data vect
---
test/openacc/Makefile | 20 ++---
test/openacc/datavect.F90 | 84 ++++++++++++++++++
test/openacc/vectoacc.F90 | 180 ++++++++++++++++++++------------------
3 files changed, 187 insertions(+), 97 deletions(-)
create mode 100644 test/openacc/datavect.F90
diff --git a/test/openacc/Makefile b/test/openacc/Makefile
index 6df14d42..65bed1ad 100644
--- a/test/openacc/Makefile
+++ b/test/openacc/Makefile
@@ -1,7 +1,6 @@
TOPDIR=../..
include $(TOPDIR)/Make.inc
-# Directories
LIBDIR=$(TOPDIR)/lib/
PSBLIBDIR=$(TOPDIR)/lib/
PSBINCDIR=$(TOPDIR)/include
@@ -10,36 +9,30 @@ INCDIR=$(TOPDIR)/include
MODDIR=$(TOPDIR)/modules
EXEDIR=./runs
-# Libraries
-PSBLAS_LIB= -L$(LIBDIR) -L$(PSBLIBDIR) -lpsb_util -lpsb_ext -lpsb_base -lpsb_openacc -lopenblas -lmetis
+PSBLAS_LIB= -L$(LIBDIR) -L$(PSBLIBDIR) -lpsb_openacc -lpsb_base -lpsb_ext -lpsb_util -lopenblas -lmetis
LDLIBS=$(PSBGPULDLIBS)
-# Includes
FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FMFLAG). $(FMFLAG)$(PSBMODDIR) $(FMFLAG)$(PSBINCDIR) $(LIBRSB_DEFINES)
-# Compiler flags
FFLAGS=-O0 -march=native -fopenacc -foffload=nvptx-none="-march=sm_70"
CFLAGS=-O0 -march=native
-# Source files
-SRCS=test.F90 vectoacc.F90
+SRCS=test.F90 vectoacc.F90 datavect.F90
CSRC=timers.c
-# Object files
OBJS=$(SRCS:.F90=.o) $(CSRC:.c=.o)
-# Default rule
-all: dir
+all: dir $(OBJS)
+ $(FC) $(FFLAGS) $(OBJS) -o datavect $(FINCLUDES) $(PSBLAS_LIB) $(LDLIBS)
+ /bin/mv datavect $(EXEDIR)
dir:
@if test ! -d $(EXEDIR); then mkdir $(EXEDIR); fi
-# Pattern rule for creating executables
%: %.o timers.o
$(FC) $(FFLAGS) $^ -o $@ $(FINCLUDES) $(PSBLAS_LIB) $(LDLIBS)
/bin/mv $@ $(EXEDIR)
-# Compilation rules
%.o: %.F90
$(FC) $(FFLAGS) $(FINCLUDES) -c $< -o $@
@@ -47,7 +40,6 @@ dir:
$(CC) $(CFLAGS) $(FINCLUDES) -c $< -o $@
clean:
- /bin/rm -fr *.o *.mod $(EXEDIR)/*
+ /bin/rm -fr *.o *.mod $(EXEDIR)/*
-# Phony targets
.PHONY: all dir clean
diff --git a/test/openacc/datavect.F90 b/test/openacc/datavect.F90
new file mode 100644
index 00000000..b78a65d9
--- /dev/null
+++ b/test/openacc/datavect.F90
@@ -0,0 +1,84 @@
+program datavect
+ use psb_base_mod
+ use psb_oacc_mod
+ implicit none
+
+ type(psb_d_vect_oacc) :: v3, v4, v5
+ integer(psb_ipk_) :: info, n, i, old_percentage, percentage
+ real(psb_dpk_) :: alpha, dot_dev, dot_host, t_alloc_host, t_alloc_dev, t_calc_host, t_calc_dev
+ double precision, external :: etime
+ double precision :: time_start, time_end
+ integer, parameter :: min_size = 1000, max_size = 100000000, step_size = 1000000
+ integer, parameter :: ntests = 80, ngpu = 20
+ integer :: size
+ character(len=20) :: filename
+
+ open(unit=10, file='performance_data.csv', status='unknown')
+ write(10, '(A, A, A, A, A)') 'Size,Alloc_Host,Alloc_Dev,Calc_Host,Calc_Dev'
+
+ write(*, *) 'Test of the vector operations with OpenACC'
+
+ alpha = 2.0
+ old_percentage = 0
+
+ do size = min_size, max_size, step_size
+ n = size
+ percentage = int(real(size - min_size) / real(max_size - min_size) * 100.0)
+ if (percentage /= old_percentage) then
+ write(*, '(A,I3,A)', advance='no') 'Progress: ', percentage, '%'
+ write(*,'(A)', advance='no') char(13)
+ old_percentage = percentage
+ end if
+
+ time_start = etime()
+ call v3%all(n, info)
+ call v4%all(n, info)
+ call v5%all(n, info)
+ time_end = etime()
+ t_alloc_host = (time_end - time_start)
+
+ do i = 1, n
+ v3%v(i) = real(i, psb_dpk_)
+ v4%v(i) = real(n - i, psb_dpk_)
+ end do
+
+ call v3%scal(alpha)
+
+ call v3%set_host()
+ call v4%set_host()
+
+ time_start = etime()
+ do i = 1, ntests
+ dot_host = sum(v3%v * v4%v)
+ end do
+ time_end = etime()
+ t_calc_host = (time_end - time_start) / real(ntests)
+
+ time_start = etime()
+ call v3%set_dev()
+ call v4%set_dev()
+ call v3%sync_space()
+ call v4%sync_space()
+ time_end = etime()
+ t_alloc_dev = (time_end - time_start)
+
+ time_start = etime()
+ do i = 1, ntests
+ dot_dev = v3%dot_v(n, v4)
+ end do
+ !$acc wait
+ time_end = etime()
+ t_calc_dev = (time_end - time_start) / real(ntests)
+
+ write(10, '(I10, 1X, ES12.5, 1X, ES12.5, 1X, ES12.5, 1X, ES12.5)') size, t_alloc_host, t_alloc_dev, t_calc_host, t_calc_dev
+
+ call v3%free(info)
+ call v4%free(info)
+ call v5%free(info)
+ end do
+
+ close(10)
+ write(*, *) 'Performance data written to performance_data.csv'
+
+
+end program datavect
diff --git a/test/openacc/vectoacc.F90 b/test/openacc/vectoacc.F90
index b50fa317..639a2a67 100644
--- a/test/openacc/vectoacc.F90
+++ b/test/openacc/vectoacc.F90
@@ -1,85 +1,99 @@
program vectoacc
- use psb_base_mod
- use psb_oacc_mod
- implicit none
-
- type(psb_d_vect_oacc) :: v3, v4, v5
- integer(psb_ipk_) :: info, n, i
- real(psb_dpk_) :: alpha, beta, result
- double precision, external :: etime
-
- real(psb_dpk_) :: dot_host, dot_dev, t_host, t_dev
- double precision :: time_start, time_end
- integer(psb_ipk_), parameter :: ntests=80, ngpu=20
-
- write(*, *) 'Test of the vector operations with OpenACC'
-
- write(*, *) 'Enter the size of the vectors'
- read(*, *) n
- alpha = 2.0
- beta = 0.5
-
- call v3%all(n, info)
- call v4%all(n, info)
- call v5%all(n, info)
-
- do i = 1, n
- v3%v(i) = real(i, psb_dpk_)
- v4%v(i) = real(n - i, psb_dpk_)
- end do
-
- call v3%set_dev()
- call v4%set_dev()
-
- call v3%scal(alpha)
- call v3%sync()
-
- do i = 1, n
- if (v3%v(i) /= alpha * real(i, psb_dpk_)) then
- write(*, *) 'Scal error : index', i
- end if
- end do
- write(*, *) 'Scal test passed'
-
- result = v3%dot_v(n, v4)
- call v3%sync()
- call v4%sync()
- if (result /= sum(v3%v * v4%v)) then
- write(*, *) 'Dot_v error, expected result:', sum(v3%v * v4%v), 'instead of :', result
- end if
- write(*, *) 'Dot_v test passed'
-
- result = v3%nrm2(n)
- call v3%sync()
- if (result /= sqrt(sum(v3%v ** 2))) then
- write(*, *) 'nrm2 error, expected result:', sqrt(sum(v3%v ** 2)), 'instead of :', result
- end if
- write(*, *) 'nrm2 test passed'
-
- call v3%set_host()
- call v4%set_host()
-
- time_start = etime()
- do i = 1, ntests
- dot_host = sum(v3%v * v4%v)
- end do
- time_end = etime()
- t_host = (time_end - time_start) / real(ntests)
- write(*, *) 'Performance host: ', t_host, ' sec'
-
- call v3%set_dev()
- call v4%set_dev()
- time_start = etime()
- do i = 1, ntests
- dot_dev = v3%dot_v(n, v4)
- end do
- !$acc wait
- time_end = etime()
- t_dev = (time_end - time_start) / real(ntests)
- write(*, *) 'Performance device: ', t_dev, ' sec'
-
- call v3%free(info)
- call v4%free(info)
- call v5%free(info)
-
+ use psb_base_mod
+ use psb_oacc_mod
+ implicit none
+
+ type(psb_d_vect_oacc) :: v3, v4, v5
+ integer(psb_ipk_) :: info, n, i
+ real(psb_dpk_) :: alpha, beta, result
+ double precision, external :: etime
+
+ real(psb_dpk_) :: dot_host, dot_dev, t_host, t_dev, t_alloc_host, t_alloc_dev, t_calc_host, t_calc_dev
+ double precision :: time_start, time_end
+ integer(psb_ipk_), parameter :: ntests=80, ngpu=20
+
+ write(*, *) 'Test of the vector operations with OpenACC'
+
+ write(*, *) 'Enter the size of the vectors'
+ read(*, *) n
+ alpha = 2.0
+ beta = 0.5
+
+ time_start = etime()
+ call v3%all(n, info)
+ call v4%all(n, info)
+ call v5%all(n, info)
+ time_end = etime()
+ t_alloc_host = time_end - time_start
+ write(*, *) 'Allocation time on host: ', t_alloc_host, ' sec'
+
+ do i = 1, n
+ v3%v(i) = real(i, psb_dpk_)
+ v4%v(i) = real(n - i, psb_dpk_)
+ end do
+
+ call v3%set_dev()
+ call v4%set_dev()
+
+ call v3%scal(alpha)
+ call v3%sync()
+
+ do i = 1, n
+ if (v3%v(i) /= alpha * real(i, psb_dpk_)) then
+ write(*, *) 'Scal error : index', i
+ end if
+ end do
+ write(*, *) 'Scal test passed'
+
+ result = v3%dot_v(n, v4)
+ call v3%sync()
+ call v4%sync()
+ if (result /= sum(v3%v * v4%v)) then
+ write(*, *) 'Dot_v error, expected result:', sum(v3%v * v4%v), 'instead of :', result
+ end if
+ write(*, *) 'Dot_v test passed'
+
+ result = v3%nrm2(n)
+ call v3%sync()
+ if (result /= sqrt(sum(v3%v ** 2))) then
+ write(*, *) 'nrm2 error, expected result:', sqrt(sum(v3%v ** 2)), 'instead of :', result
+ end if
+ write(*, *) 'nrm2 test passed'
+
+ call v3%set_host()
+ call v4%set_host()
+
+ time_start = etime()
+ do i = 1, ntests
+ dot_host = sum(v3%v * v4%v)
+ end do
+ time_end = etime()
+ t_calc_host = (time_end - time_start) / real(ntests)
+ write(*, *) 'Host calculation time: ', t_calc_host, ' sec'
+
+ call v3%set_dev()
+ call v4%set_dev()
+
+ time_start = etime()
+ call v3%sync_space()
+ call v4%sync_space()
+ time_end = etime()
+ t_alloc_dev = time_end - time_start
+ write(*, *) 'Allocation time on device: ', t_alloc_dev, ' sec'
+
+ time_start = etime()
+ do i = 1, ntests
+ dot_dev = v3%dot_v(n, v4)
+ end do
+ !$acc wait
+ time_end = etime()
+ t_calc_dev = (time_end - time_start) / real(ntests)
+ write(*, *) 'Device calculation time: ', t_calc_dev, ' sec'
+
+
+ call v3%free(info)
+ call v4%free(info)
+ call v5%free(info)
+
end program vectoacc
+
\ No newline at end of file
From 0aa5c9409b7de32fdd3a1e7d4ca4b3a090b33ad3 Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Thu, 11 Jul 2024 12:12:42 +0200
Subject: [PATCH 09/86] Remove spurious pdf file.
---
docs/src/userguide.pdf | 1 -
1 file changed, 1 deletion(-)
delete mode 120000 docs/src/userguide.pdf
diff --git a/docs/src/userguide.pdf b/docs/src/userguide.pdf
deleted file mode 120000
index 7b032aa3..00000000
--- a/docs/src/userguide.pdf
+++ /dev/null
@@ -1 +0,0 @@
-tmp/userguide.pdf
\ No newline at end of file
From 2b5f09ddf9f746267c2e88530bbb708b89a532dd Mon Sep 17 00:00:00 2001
From: tloloum
Date: Fri, 12 Jul 2024 09:26:06 +0200
Subject: [PATCH 10/86] all methods implementations for
psb_d_oacc_csr_sparse_mat
---
openacc/Makefile | 10 +-
openacc/impl/Makefile | 8 +-
openacc/impl/psb_d_oacc_csr_allocate_mnnz.F90 | 34 +++
openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 | 25 +++
openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 | 23 +++
openacc/impl/psb_d_oacc_csr_csmm.F90 | 86 ++++++++
openacc/impl/psb_d_oacc_csr_csmv.F90 | 81 ++++++++
openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90 | 84 ++++++++
openacc/impl/psb_d_oacc_csr_mold.F90 | 33 +++
openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 | 24 +++
openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 | 23 +++
openacc/impl/psb_d_oacc_csr_reallocate_nz.F90 | 27 +++
openacc/impl/psb_d_oacc_csr_scal.F90 | 52 +++++
openacc/impl/psb_d_oacc_csr_scals.F90 | 32 +++
openacc/psb_d_oacc_csr_mat_mod.F90 | 194 +++++++++++++++++-
15 files changed, 719 insertions(+), 17 deletions(-)
create mode 100644 openacc/impl/psb_d_oacc_csr_allocate_mnnz.F90
create mode 100644 openacc/impl/psb_d_oacc_csr_cp_from_coo.F90
create mode 100644 openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90
create mode 100644 openacc/impl/psb_d_oacc_csr_csmm.F90
create mode 100644 openacc/impl/psb_d_oacc_csr_csmv.F90
create mode 100644 openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90
create mode 100644 openacc/impl/psb_d_oacc_csr_mold.F90
create mode 100644 openacc/impl/psb_d_oacc_csr_mv_from_coo.F90
create mode 100644 openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90
create mode 100644 openacc/impl/psb_d_oacc_csr_reallocate_nz.F90
create mode 100644 openacc/impl/psb_d_oacc_csr_scal.F90
create mode 100644 openacc/impl/psb_d_oacc_csr_scals.F90
diff --git a/openacc/Makefile b/openacc/Makefile
index aa6ea23d..d1311fe2 100644
--- a/openacc/Makefile
+++ b/openacc/Makefile
@@ -1,34 +1,32 @@
include ../Make.inc
-# Compilers and flags
+
#CC=mpicc
#FC=mpif90
#FCOPT=-O0 -march=native
#OFFLOAD=-fopenacc -foffload=nvptx-none="-march=sm_70"
-# Directories
LIBDIR=../lib
INCDIR=../include
MODDIR=../modules
IMPLDIR=./impl
-# Include and library paths
INCLUDES=-I$(LIBDIR) -I$(INCDIR) -I$(MODDIR)
FINCLUDES=$(FMFLAG). $(FMFLAG)$(INCDIR) $(FMFLAG)$(MODDIR) $(FIFLAG).
CINCLUDES=
#LIBS=-L$(LIBDIR) -lpsb_util -lpsb_ext -lpsb_base -lopenblas -lmetis
-# Source files
+
FOBJS= psb_i_oacc_vect_mod.o psb_d_oacc_vect_mod.o \
psb_oacc_mod.o psb_d_oacc_csr_mat_mod.o \
psb_oacc_env_mod.o
-# Library name
+
LIBNAME=libpsb_openacc.a
OBJS=$(COBJS) $(FOBJS)
-# Rules
+
lib: objs ilib
ar cur $(LIBNAME) $(OBJS)
/bin/cp -p $(LIBNAME) $(LIBDIR)
diff --git a/openacc/impl/Makefile b/openacc/impl/Makefile
index 7bab6654..ff0e8251 100755
--- a/openacc/impl/Makefile
+++ b/openacc/impl/Makefile
@@ -9,7 +9,13 @@ MODDIR=../../modules
FINCLUDES=$(FMFLAG).. $(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FIFLAG)..
LIBNAME=libpsb_openacc.a
-OBJS= psb_d_oacc_csr_vect_mv.o psb_d_oacc_mlt_v_2.o psb_d_oacc_mlt_v.o
+OBJS= psb_d_oacc_csr_vect_mv.o psb_d_oacc_csr_inner_vect_sv.o \
+ psb_d_oacc_csr_csmm.o psb_d_oacc_csr_csmv.o psb_d_oacc_csr_scals.o \
+ psb_d_oacc_csr_scal.o psb_d_oacc_csr_allocate_mnnz.o \
+ psb_d_oacc_csr_reallocate_nz.o psb_d_oacc_csr_cp_from_coo.o \
+ psb_d_oacc_csr_cp_from_fmt.o psb_d_oacc_csr_mv_from_coo \
+ psb_d_oacc_csr_mv_from_fmt psb_d_oacc_csr_mold.o \
+ psb_d_oacc_mlt_v_2.o psb_d_oacc_mlt_v.o
objs: $(OBJS)
diff --git a/openacc/impl/psb_d_oacc_csr_allocate_mnnz.F90 b/openacc/impl/psb_d_oacc_csr_allocate_mnnz.F90
new file mode 100644
index 00000000..a0b8abe0
--- /dev/null
+++ b/openacc/impl/psb_d_oacc_csr_allocate_mnnz.F90
@@ -0,0 +1,34 @@
+subroutine psb_d_oacc_csr_allocate_mnnz(m, n, a, nz)
+ use psb_base_mod
+ use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_allocate_mnnz
+ implicit none
+ integer(psb_ipk_), intent(in) :: m, n
+ class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_), intent(in), optional :: nz
+ integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: err_act, nz_
+ character(len=20) :: name='allocate_mnz'
+ logical, parameter :: debug=.false.
+
+ call psb_erractionsave(err_act)
+ info = psb_success_
+
+ call a%psb_d_csr_sparse_mat%allocate(m, n, nz)
+
+ if (.not.allocated(a%val)) then
+ allocate(a%val(nz))
+ allocate(a%ja(nz))
+ allocate(a%irp(m+1))
+ end if
+
+ call a%set_dev()
+ if (info /= 0) goto 9999
+
+ call psb_erractionrestore(err_act)
+ return
+
+ 9999 call psb_error_handler(err_act)
+ return
+
+end subroutine psb_d_oacc_csr_allocate_mnnz
+
\ No newline at end of file
diff --git a/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 b/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90
new file mode 100644
index 00000000..c84c5876
--- /dev/null
+++ b/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90
@@ -0,0 +1,25 @@
+subroutine psb_d_oacc_csr_cp_from_coo(a, b, info)
+ use psb_base_mod
+ use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_cp_from_coo
+ implicit none
+
+ class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
+ class(psb_d_coo_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ info = psb_success_
+
+ call a%psb_d_csr_sparse_mat%cp_from_coo(b, info)
+ if (info /= 0) goto 9999
+
+ call a%set_dev()
+ if (info /= 0) goto 9999
+
+ return
+
+ 9999 continue
+ info = psb_err_alloc_dealloc_
+ return
+
+ end subroutine psb_d_oacc_csr_cp_from_coo
+
\ No newline at end of file
diff --git a/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 b/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90
new file mode 100644
index 00000000..2eae41d7
--- /dev/null
+++ b/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90
@@ -0,0 +1,23 @@
+subroutine psb_d_oacc_csr_cp_from_fmt(a, b, info)
+ use psb_base_mod
+ use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_cp_from_fmt
+ implicit none
+
+ class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
+ class(psb_d_base_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ info = psb_success_
+
+ select type(b)
+ type is (psb_d_coo_sparse_mat)
+ call a%cp_from_coo(b, info)
+ class default
+ call a%psb_d_csr_sparse_mat%cp_from_fmt(b, info)
+ if (info /= 0) return
+
+ !$acc update device(a%val, a%ja, a%irp)
+ end select
+
+end subroutine psb_d_oacc_csr_cp_from_fmt
+
\ No newline at end of file
diff --git a/openacc/impl/psb_d_oacc_csr_csmm.F90 b/openacc/impl/psb_d_oacc_csr_csmm.F90
new file mode 100644
index 00000000..e7224764
--- /dev/null
+++ b/openacc/impl/psb_d_oacc_csr_csmm.F90
@@ -0,0 +1,86 @@
+subroutine psb_d_oacc_csr_csmm(alpha, a, x, beta, y, info, trans)
+ use psb_base_mod
+ use elldev_mod
+ use psb_vectordev_mod
+ use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_csmm
+ implicit none
+ class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
+ real(psb_dpk_), intent(in) :: alpha, beta
+ real(psb_dpk_), intent(in) :: x(:,:)
+ real(psb_dpk_), intent(inout) :: y(:,:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ character :: trans_
+ integer(psb_ipk_) :: i, j, m, n,k, nxy
+ logical :: tra
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 'd_oacc_csmm'
+ logical, parameter :: debug = .false.
+
+ info = psb_success_
+ call psb_erractionsave(err_act)
+
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+
+ if (.not.a%is_asb()) then
+ info = psb_err_invalid_mat_state_
+ call psb_errpush(info, name)
+ goto 9999
+ endif
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
+
+ if (tra) then
+ m = a%get_ncols()
+ n = a%get_nrows()
+ else
+ n = a%get_ncols()
+ m = a%get_nrows()
+ end if
+
+ if (size(x,1) < n) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (size(y,1) < m) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (tra) then
+ call a%psb_d_csr_sparse_mat%spmm(alpha, x, beta, y, info, trans)
+ else
+ nxy = min(size(x,2), size(y,2))
+
+ !$acc parallel loop collapse(2) present(a, x, y)
+ do j = 1, nxy
+ do i = 1, m
+ y(i,j) = beta * y(i,j)
+ end do
+ end do
+
+ !$acc parallel loop collapse(2) present(a, x, y)
+ do j = 1, nxy
+ do i = 1, n
+ do k = a%irp(i), a%irp(i+1) - 1
+ y(a%ja(k), j) = y(a%ja(k), j) + alpha * a%val(k) * x(i, j)
+ end do
+ end do
+ end do
+ endif
+
+ call psb_erractionrestore(err_act)
+ return
+
+ 9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_d_oacc_csr_csmm
+
\ No newline at end of file
diff --git a/openacc/impl/psb_d_oacc_csr_csmv.F90 b/openacc/impl/psb_d_oacc_csr_csmv.F90
new file mode 100644
index 00000000..c3291acf
--- /dev/null
+++ b/openacc/impl/psb_d_oacc_csr_csmv.F90
@@ -0,0 +1,81 @@
+subroutine psb_d_oacc_csr_csmv(alpha, a, x, beta, y, info, trans)
+ use psb_base_mod
+ use elldev_mod
+ use psb_vectordev_mod
+ use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_csmv
+ implicit none
+ class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
+ real(psb_dpk_), intent(in) :: alpha, beta
+ real(psb_dpk_), intent(in) :: x(:)
+ real(psb_dpk_), intent(inout) :: y(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ character :: trans_
+ integer(psb_ipk_) :: i, j, m, n
+ logical :: tra
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 'd_oacc_csmv'
+ logical, parameter :: debug = .false.
+
+ call psb_erractionsave(err_act)
+ info = psb_success_
+
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+
+ if (.not.a%is_asb()) then
+ info = psb_err_invalid_mat_state_
+ call psb_errpush(info, name)
+ goto 9999
+ endif
+
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
+
+ if (tra) then
+ m = a%get_ncols()
+ n = a%get_nrows()
+ else
+ n = a%get_ncols()
+ m = a%get_nrows()
+ end if
+
+ if (size(x,1) < n) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (size(y,1) < m) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (tra) then
+ call a%psb_d_csr_sparse_mat%spmm(alpha, x, beta, y, info, trans)
+ else
+ !$acc parallel loop present(a, x, y)
+ do i = 1, m
+ y(i) = beta * y(i)
+ end do
+
+ !$acc parallel loop present(a, x, y)
+ do i = 1, n
+ do j = a%irp(i), a%irp(i+1) - 1
+ y(a%ja(j)) = y(a%ja(j)) + alpha * a%val(j) * x(i)
+ end do
+ end do
+ endif
+
+ call psb_erractionrestore(err_act)
+ return
+
+ 9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_d_oacc_csr_csmv
+
\ No newline at end of file
diff --git a/openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90 b/openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90
new file mode 100644
index 00000000..c0461c82
--- /dev/null
+++ b/openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90
@@ -0,0 +1,84 @@
+subroutine psb_d_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans)
+ use psb_base_mod
+ use elldev_mod
+ use psb_vectordev_mod
+ use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_inner_vect_sv
+ use psb_d_oacc_vect_mod
+ implicit none
+ class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
+ real(psb_dpk_), intent(in) :: alpha, beta
+ class(psb_d_base_vect_type), intent(inout) :: x, y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ real(psb_dpk_), allocatable :: rx(:), ry(:)
+ logical :: tra
+ character :: trans_
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 'd_oacc_csr_inner_vect_sv'
+ logical, parameter :: debug = .false.
+ integer(psb_ipk_) :: i
+
+ call psb_get_erraction(err_act)
+ info = psb_success_
+
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+
+ if (.not.a%is_asb()) then
+ info = psb_err_invalid_mat_state_
+ call psb_errpush(info, name)
+ goto 9999
+ endif
+
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
+
+ if (tra .or. (beta /= dzero)) then
+ call x%sync()
+ call y%sync()
+ call a%psb_d_csr_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans)
+ call y%set_host()
+ else
+ select type (xx => x)
+ type is (psb_d_vect_oacc)
+ select type(yy => y)
+ type is (psb_d_vect_oacc)
+ if (xx%is_host()) call xx%sync()
+ if (beta /= dzero) then
+ if (yy%is_host()) call yy%sync()
+ end if
+ !$acc parallel loop present(a, xx, yy)
+ do i = 1, size(a%val)
+ yy%v(i) = alpha * a%val(i) * xx%v(a%ja(i)) + beta * yy%v(i)
+ end do
+ call yy%set_dev()
+ class default
+ rx = xx%get_vect()
+ ry = y%get_vect()
+ call a%psb_d_csr_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
+ call y%bld(ry)
+ end select
+ class default
+ rx = x%get_vect()
+ ry = y%get_vect()
+ call a%psb_d_csr_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
+ call y%bld(ry)
+ end select
+ endif
+
+ if (info /= psb_success_) then
+ info = psb_err_from_subroutine_
+ call psb_errpush(info, name, a_err = 'csrg_vect_sv')
+ goto 9999
+ endif
+
+ call psb_erractionrestore(err_act)
+ return
+
+ 9999 call psb_error_handler(err_act)
+ return
+ end subroutine psb_d_oacc_csr_inner_vect_sv
+
\ No newline at end of file
diff --git a/openacc/impl/psb_d_oacc_csr_mold.F90 b/openacc/impl/psb_d_oacc_csr_mold.F90
new file mode 100644
index 00000000..08598cb6
--- /dev/null
+++ b/openacc/impl/psb_d_oacc_csr_mold.F90
@@ -0,0 +1,33 @@
+subroutine psb_d_oacc_csr_mold(a, b, info)
+ use psb_base_mod
+ use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_mold
+ implicit none
+ class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
+ class(psb_d_base_sparse_mat), intent(inout), allocatable :: b
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name='csr_mold'
+ logical, parameter :: debug=.false.
+
+ call psb_get_erraction(err_act)
+
+ info = 0
+ if (allocated(b)) then
+ call b%free()
+ deallocate(b, stat=info)
+ end if
+ if (info == 0) allocate(psb_d_oacc_csr_sparse_mat :: b, stat=info)
+
+ if (info /= psb_success_) then
+ info = psb_err_alloc_dealloc_
+ call psb_errpush(info, name)
+ goto 9999
+ end if
+ return
+
+ 9999 call psb_error_handler(err_act)
+
+ return
+
+ end subroutine psb_d_oacc_csr_mold
+
\ No newline at end of file
diff --git a/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 b/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90
new file mode 100644
index 00000000..5f4a6b41
--- /dev/null
+++ b/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90
@@ -0,0 +1,24 @@
+subroutine psb_d_oacc_csr_mv_from_coo(a, b, info)
+ use psb_base_mod
+ use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_mv_from_coo
+ implicit none
+
+ class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
+ class(psb_d_coo_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ info = psb_success_
+
+ call a%psb_d_csr_sparse_mat%mv_from_coo(b, info)
+ if (info /= 0) goto 9999
+
+ !$acc update device(a%val, a%ja, a%irp)
+
+ return
+
+ 9999 continue
+ info = psb_err_alloc_dealloc_
+ return
+
+end subroutine psb_d_oacc_csr_mv_from_coo
+
\ No newline at end of file
diff --git a/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 b/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90
new file mode 100644
index 00000000..16a4636b
--- /dev/null
+++ b/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90
@@ -0,0 +1,23 @@
+subroutine psb_d_oacc_csr_mv_from_fmt(a, b, info)
+ use psb_base_mod
+ use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_mv_from_fmt
+ implicit none
+
+ class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
+ class(psb_d_base_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ info = psb_success_
+
+ select type(b)
+ type is (psb_d_coo_sparse_mat)
+ call a%mv_from_coo(b, info)
+ class default
+ call a%psb_d_csr_sparse_mat%mv_from_fmt(b, info)
+ if (info /= 0) return
+
+ !$acc update device(a%val, a%ja, a%irp)
+ end select
+
+end subroutine psb_d_oacc_csr_mv_from_fmt
+
\ No newline at end of file
diff --git a/openacc/impl/psb_d_oacc_csr_reallocate_nz.F90 b/openacc/impl/psb_d_oacc_csr_reallocate_nz.F90
new file mode 100644
index 00000000..7a6723d0
--- /dev/null
+++ b/openacc/impl/psb_d_oacc_csr_reallocate_nz.F90
@@ -0,0 +1,27 @@
+subroutine psb_d_oacc_csr_reallocate_nz(nz, a)
+ use psb_base_mod
+ use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_reallocate_nz
+ implicit none
+ integer(psb_ipk_), intent(in) :: nz
+ class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name='d_oacc_csr_reallocate_nz'
+ logical, parameter :: debug=.false.
+
+ call psb_erractionsave(err_act)
+ info = psb_success_
+
+ call a%psb_d_csr_sparse_mat%reallocate(nz)
+
+ call a%set_dev()
+ if (info /= 0) goto 9999
+
+ call psb_erractionrestore(err_act)
+ return
+
+ 9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_d_oacc_csr_reallocate_nz
+
\ No newline at end of file
diff --git a/openacc/impl/psb_d_oacc_csr_scal.F90 b/openacc/impl/psb_d_oacc_csr_scal.F90
new file mode 100644
index 00000000..b2bf12e8
--- /dev/null
+++ b/openacc/impl/psb_d_oacc_csr_scal.F90
@@ -0,0 +1,52 @@
+subroutine psb_d_oacc_csr_scal(d, a, info, side)
+ use psb_base_mod
+ use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_scal
+ implicit none
+ class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
+ real(psb_dpk_), intent(in) :: d(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, intent(in), optional :: side
+
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name='scal'
+ logical, parameter :: debug=.false.
+ integer(psb_ipk_) :: i, j
+
+ info = psb_success_
+ call psb_erractionsave(err_act)
+
+ if (a%is_host()) call a%sync()
+
+ if (present(side)) then
+ if (side == 'L') then
+ !$acc parallel loop present(a, d)
+ do i = 1, a%get_nrows()
+ do j = a%irp(i), a%irp(i+1) - 1
+ a%val(j) = a%val(j) * d(i)
+ end do
+ end do
+ else if (side == 'R') then
+ !$acc parallel loop present(a, d)
+ do i = 1, a%get_ncols()
+ do j = a%irp(i), a%irp(i+1) - 1
+ a%val(j) = a%val(j) * d(a%ja(j))
+ end do
+ end do
+ end if
+ else
+ !$acc parallel loop present(a, d)
+ do i = 1, size(a%val)
+ a%val(i) = a%val(i) * d(i)
+ end do
+ end if
+
+ call a%set_dev()
+
+ call psb_erractionrestore(err_act)
+ return
+
+ 9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_d_oacc_csr_scal
+
\ No newline at end of file
diff --git a/openacc/impl/psb_d_oacc_csr_scals.F90 b/openacc/impl/psb_d_oacc_csr_scals.F90
new file mode 100644
index 00000000..8c77f647
--- /dev/null
+++ b/openacc/impl/psb_d_oacc_csr_scals.F90
@@ -0,0 +1,32 @@
+subroutine psb_d_oacc_csr_scals(d, a, info)
+ use psb_base_mod
+ use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_scals
+ implicit none
+ class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
+ real(psb_dpk_), intent(in) :: d
+ integer(psb_ipk_), intent(out) :: info
+
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name='scal'
+ logical, parameter :: debug=.false.
+ integer(psb_ipk_) :: i
+
+ info = psb_success_
+ call psb_erractionsave(err_act)
+
+ if (a%is_host()) call a%sync()
+
+ !$acc parallel loop present(a)
+ do i = 1, size(a%val)
+ a%val(i) = a%val(i) * d
+ end do
+
+ call a%set_dev()
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+end subroutine psb_d_oacc_csr_scals
diff --git a/openacc/psb_d_oacc_csr_mat_mod.F90 b/openacc/psb_d_oacc_csr_mat_mod.F90
index d9c8a2a6..b8eb1be9 100644
--- a/openacc/psb_d_oacc_csr_mat_mod.F90
+++ b/openacc/psb_d_oacc_csr_mat_mod.F90
@@ -12,18 +12,69 @@ module psb_d_oacc_csr_mat_mod
type, extends(psb_d_csr_sparse_mat) :: psb_d_oacc_csr_sparse_mat
integer(psb_ipk_) :: devstate = is_host
contains
- procedure, pass(a) :: all => d_oacc_csr_all
- procedure, pass(a) :: is_host => d_oacc_csr_is_host
- procedure, pass(a) :: is_sync => d_oacc_csr_is_sync
- procedure, pass(a) :: is_dev => d_oacc_csr_is_dev
- procedure, pass(a) :: set_host => d_oacc_csr_set_host
- procedure, pass(a) :: set_sync => d_oacc_csr_set_sync
- procedure, pass(a) :: set_dev => d_oacc_csr_set_dev
- procedure, pass(a) :: sync_space => d_oacc_csr_sync_space
- procedure, pass(a) :: sync => d_oacc_csr_sync
- procedure, pass(a) :: vect_mv => psb_d_oacc_csr_vect_mv
+ procedure, nopass :: get_fmt => d_oacc_csr_get_fmt
+ procedure, pass(a) :: sizeof => d_oacc_csr_sizeof
+ procedure, pass(a) :: vect_mv => psb_d_oacc_csr_vect_mv
+ procedure, pass(a) :: in_vect_sv => psb_d_oacc_csr_inner_vect_sv
+ procedure, pass(a) :: csmm => psb_d_oacc_csr_csmm
+ procedure, pass(a) :: csmv => psb_d_oacc_csr_csmv
+ procedure, pass(a) :: scals => psb_d_oacc_csr_scals
+ procedure, pass(a) :: scalv => psb_d_oacc_csr_scal
+ procedure, pass(a) :: reallocate_nz => psb_d_oacc_csr_reallocate_nz
+ procedure, pass(a) :: allocate_mnnz => psb_d_oacc_csr_allocate_mnnz
+ procedure, pass(a) :: cp_from_coo => psb_d_oacc_csr_cp_from_coo
+ procedure, pass(a) :: cp_from_fmt => psb_d_oacc_csr_cp_from_fmt
+ procedure, pass(a) :: mv_from_coo => psb_d_oacc_csr_mv_from_coo
+ procedure, pass(a) :: mv_from_fmt => psb_d_oacc_csr_mv_from_fmt
+ procedure, pass(a) :: free => d_oacc_csr_free
+ procedure, pass(a) :: mold => psb_d_oacc_csr_mold
+ procedure, pass(a) :: all => d_oacc_csr_all
+ procedure, pass(a) :: is_host => d_oacc_csr_is_host
+ procedure, pass(a) :: is_sync => d_oacc_csr_is_sync
+ procedure, pass(a) :: is_dev => d_oacc_csr_is_dev
+ procedure, pass(a) :: set_host => d_oacc_csr_set_host
+ procedure, pass(a) :: set_sync => d_oacc_csr_set_sync
+ procedure, pass(a) :: set_dev => d_oacc_csr_set_dev
+ procedure, pass(a) :: sync_space => d_oacc_csr_sync_space
+ procedure, pass(a) :: sync => d_oacc_csr_sync
end type psb_d_oacc_csr_sparse_mat
+ interface
+ subroutine psb_d_oacc_csr_mold(a,b,info)
+ import :: psb_d_oacc_csr_sparse_mat, psb_d_base_sparse_mat, psb_ipk_
+ class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
+ class(psb_d_base_sparse_mat), intent(inout), allocatable :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_d_oacc_csr_mold
+ end interface
+
+ interface
+ subroutine psb_d_oacc_csr_cp_from_fmt(a,b,info)
+ import :: psb_d_oacc_csr_sparse_mat, psb_d_base_sparse_mat, psb_ipk_
+ class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
+ class(psb_d_base_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_d_oacc_csr_cp_from_fmt
+ end interface
+
+ interface
+ subroutine psb_d_oacc_csr_mv_from_coo(a,b,info)
+ import :: psb_d_oacc_csr_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_
+ class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
+ class(psb_d_coo_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_d_oacc_csr_mv_from_coo
+ end interface
+
+ interface
+ subroutine psb_d_oacc_csr_mv_from_fmt(a,b,info)
+ import :: psb_d_oacc_csr_sparse_mat, psb_d_base_sparse_mat, psb_ipk_
+ class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
+ class(psb_d_base_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_d_oacc_csr_mv_from_fmt
+ end interface
+
interface
subroutine psb_d_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans)
import :: psb_d_oacc_csr_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_
@@ -35,7 +86,130 @@ module psb_d_oacc_csr_mat_mod
end subroutine psb_d_oacc_csr_vect_mv
end interface
+ interface
+ subroutine psb_d_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans)
+ import :: psb_d_oacc_csr_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_
+ class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
+ real(psb_dpk_), intent(in) :: alpha, beta
+ class(psb_d_base_vect_type), intent(inout) :: x,y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_d_oacc_csr_inner_vect_sv
+ end interface
+
+ interface
+ subroutine psb_d_oacc_csr_csmm(alpha, a, x, beta, y, info, trans)
+ import :: psb_d_oacc_csr_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_
+ class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
+ real(psb_dpk_), intent(in) :: alpha, beta, x(:,:)
+ real(psb_dpk_), intent(inout) :: y(:,:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_d_oacc_csr_csmm
+ end interface
+
+ interface
+ subroutine psb_d_oacc_csr_csmv(alpha, a, x, beta, y, info, trans)
+ import :: psb_d_oacc_csr_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_
+ class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
+ real(psb_dpk_), intent(in) :: alpha, beta, x(:)
+ real(psb_dpk_), intent(inout) :: y(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_d_oacc_csr_csmv
+ end interface
+
+ interface
+ subroutine psb_d_oacc_csr_scals(d, a, info)
+ import :: psb_d_oacc_csr_sparse_mat, psb_dpk_, psb_ipk_
+ class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
+ real(psb_dpk_), intent(in) :: d
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_d_oacc_csr_scals
+ end interface
+
+ interface
+ subroutine psb_d_oacc_csr_scal(d,a,info,side)
+ import :: psb_d_oacc_csr_sparse_mat, psb_dpk_, psb_ipk_
+ class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
+ real(psb_dpk_), intent(in) :: d(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: side
+ end subroutine psb_d_oacc_csr_scal
+ end interface
+
+ interface
+ subroutine psb_d_oacc_csr_reallocate_nz(nz,a)
+ import :: psb_d_oacc_csr_sparse_mat, psb_ipk_
+ class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_), intent(in) :: nz
+ end subroutine psb_d_oacc_csr_reallocate_nz
+ end interface
+
+ interface
+ subroutine psb_d_oacc_csr_allocate_mnnz(m,n,a,nz)
+ import :: psb_d_oacc_csr_sparse_mat, psb_ipk_
+ class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_), intent(in) :: m,n
+ integer(psb_ipk_), intent(in), optional :: nz
+ end subroutine psb_d_oacc_csr_allocate_mnnz
+ end interface
+
+ interface
+ subroutine psb_d_oacc_csr_cp_from_coo(a,b,info)
+ import :: psb_d_oacc_csr_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_
+ class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
+ class(psb_d_coo_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_d_oacc_csr_cp_from_coo
+ end interface
+
contains
+
+
+ subroutine d_oacc_csr_free(a)
+ use psb_base_mod
+ implicit none
+ class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_) :: info
+
+ if (allocated(a%val)) then
+ !$acc exit data delete(a%val)
+ end if
+ if (allocated(a%ja)) then
+ !$acc exit data delete(a%ja)
+ end if
+ if (allocated(a%irp)) then
+ !$acc exit data delete(a%irp)
+ end if
+
+ call a%psb_d_csr_sparse_mat%free()
+
+ return
+ end subroutine d_oacc_csr_free
+
+
+
+ function d_oacc_csr_sizeof(a) result(res)
+ implicit none
+ class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
+ integer(psb_epk_) :: res
+
+ if (a%is_dev()) call a%sync()
+
+ res = 8
+ res = res + psb_sizeof_dp * size(a%val)
+ res = res + psb_sizeof_ip * size(a%irp)
+ res = res + psb_sizeof_ip * size(a%ja)
+
+ end function d_oacc_csr_sizeof
+
+
+ function d_oacc_csr_get_fmt() result(res)
+ implicit none
+ character(len=5) :: res
+ res = 'CSR_oacc'
+ end function d_oacc_csr_get_fmt
subroutine d_oacc_csr_all(m, n, nz, a, info)
implicit none
From 9e8294066d885c3fd7fa5e846b476b29c7e25418 Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Fri, 12 Jul 2024 13:01:22 +0200
Subject: [PATCH 11/86] Fix Makefile
---
openacc/impl/Makefile | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/openacc/impl/Makefile b/openacc/impl/Makefile
index ff0e8251..64879eb6 100755
--- a/openacc/impl/Makefile
+++ b/openacc/impl/Makefile
@@ -13,8 +13,8 @@ OBJS= psb_d_oacc_csr_vect_mv.o psb_d_oacc_csr_inner_vect_sv.o \
psb_d_oacc_csr_csmm.o psb_d_oacc_csr_csmv.o psb_d_oacc_csr_scals.o \
psb_d_oacc_csr_scal.o psb_d_oacc_csr_allocate_mnnz.o \
psb_d_oacc_csr_reallocate_nz.o psb_d_oacc_csr_cp_from_coo.o \
- psb_d_oacc_csr_cp_from_fmt.o psb_d_oacc_csr_mv_from_coo \
- psb_d_oacc_csr_mv_from_fmt psb_d_oacc_csr_mold.o \
+ psb_d_oacc_csr_cp_from_fmt.o psb_d_oacc_csr_mv_from_coo.o \
+ psb_d_oacc_csr_mv_from_fmt.o psb_d_oacc_csr_mold.o \
psb_d_oacc_mlt_v_2.o psb_d_oacc_mlt_v.o
objs: $(OBJS)
From 0707cc0a72fcb6d3282f8d3e353e21887eb46494 Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Fri, 12 Jul 2024 13:01:32 +0200
Subject: [PATCH 12/86] Take out reference to elldev_mod
---
openacc/impl/psb_d_oacc_csr_csmm.F90 | 6 +++---
openacc/impl/psb_d_oacc_csr_csmv.F90 | 6 +++---
openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90 | 6 +++---
3 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/openacc/impl/psb_d_oacc_csr_csmm.F90 b/openacc/impl/psb_d_oacc_csr_csmm.F90
index e7224764..4b404ad1 100644
--- a/openacc/impl/psb_d_oacc_csr_csmm.F90
+++ b/openacc/impl/psb_d_oacc_csr_csmm.F90
@@ -1,7 +1,7 @@
subroutine psb_d_oacc_csr_csmm(alpha, a, x, beta, y, info, trans)
use psb_base_mod
- use elldev_mod
- use psb_vectordev_mod
+ !use elldev_mod
+ !use psb_vectordev_mod
use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_csmm
implicit none
class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
@@ -83,4 +83,4 @@ subroutine psb_d_oacc_csr_csmm(alpha, a, x, beta, y, info, trans)
return
end subroutine psb_d_oacc_csr_csmm
-
\ No newline at end of file
+
diff --git a/openacc/impl/psb_d_oacc_csr_csmv.F90 b/openacc/impl/psb_d_oacc_csr_csmv.F90
index c3291acf..65acc8e4 100644
--- a/openacc/impl/psb_d_oacc_csr_csmv.F90
+++ b/openacc/impl/psb_d_oacc_csr_csmv.F90
@@ -1,7 +1,7 @@
subroutine psb_d_oacc_csr_csmv(alpha, a, x, beta, y, info, trans)
use psb_base_mod
- use elldev_mod
- use psb_vectordev_mod
+ !use elldev_mod
+ !use psb_vectordev_mod
use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_csmv
implicit none
class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
@@ -78,4 +78,4 @@ subroutine psb_d_oacc_csr_csmv(alpha, a, x, beta, y, info, trans)
return
end subroutine psb_d_oacc_csr_csmv
-
\ No newline at end of file
+
diff --git a/openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90 b/openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90
index c0461c82..a15a710e 100644
--- a/openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90
+++ b/openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90
@@ -1,7 +1,7 @@
subroutine psb_d_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans)
use psb_base_mod
- use elldev_mod
- use psb_vectordev_mod
+ ! use elldev_mod
+ ! use psb_vectordev_mod
use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_inner_vect_sv
use psb_d_oacc_vect_mod
implicit none
@@ -81,4 +81,4 @@ subroutine psb_d_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans)
9999 call psb_error_handler(err_act)
return
end subroutine psb_d_oacc_csr_inner_vect_sv
-
\ No newline at end of file
+
From 7006665d823f9ca3efcbd929e933fe0cc956adc0 Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Fri, 12 Jul 2024 16:34:48 +0200
Subject: [PATCH 13/86] Introduce submodules, adjust Makefile
---
openacc/impl/Makefile | 12 +
openacc/impl/psb_d_oacc_csr_allocate_mnnz.F90 | 27 +-
openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 | 25 +-
openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 | 21 +-
openacc/impl/psb_d_oacc_csr_csmm.F90 | 40 +-
openacc/impl/psb_d_oacc_csr_csmv.F90 | 40 +-
openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90 | 33 +-
openacc/impl/psb_d_oacc_csr_mold.F90 | 24 +-
openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 | 27 +-
openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 | 21 +-
openacc/impl/psb_d_oacc_csr_reallocate_nz.F90 | 23 +-
openacc/impl/psb_d_oacc_csr_scal.F90 | 27 +-
openacc/impl/psb_d_oacc_csr_scals.F90 | 46 +-
openacc/impl/psb_d_oacc_csr_vect_mv.F90 | 66 +-
openacc/impl/psb_d_oacc_mlt_v.f90 | 57 +-
openacc/impl/psb_d_oacc_mlt_v_2.f90 | 84 +--
openacc/psb_d_oacc_csr_mat_mod.F90 | 689 +++++++++---------
openacc/psb_d_oacc_vect_mod.F90 | 14 +-
18 files changed, 645 insertions(+), 631 deletions(-)
diff --git a/openacc/impl/Makefile b/openacc/impl/Makefile
index 64879eb6..56df9402 100755
--- a/openacc/impl/Makefile
+++ b/openacc/impl/Makefile
@@ -2,6 +2,7 @@ include ../../Make.inc
LIBDIR=../../lib
INCDIR=../../include
MODDIR=../../modules
+UP=..
#
# Compilers and such
#
@@ -22,6 +23,17 @@ objs: $(OBJS)
lib: objs
ar cur ../$(LIBNAME) $(OBJS)
+
+psb_d_oacc_csr_vect_mv.o psb_d_oacc_csr_inner_vect_sv.o \
+ psb_d_oacc_csr_csmm.o psb_d_oacc_csr_csmv.o psb_d_oacc_csr_scals.o \
+ psb_d_oacc_csr_scal.o psb_d_oacc_csr_allocate_mnnz.o \
+ psb_d_oacc_csr_reallocate_nz.o psb_d_oacc_csr_cp_from_coo.o \
+ psb_d_oacc_csr_cp_from_fmt.o psb_d_oacc_csr_mv_from_coo.o \
+ psb_d_oacc_csr_mv_from_fmt.o psb_d_oacc_csr_mold.o: $(UP)/psb_d_oacc_csr_mat_mod.o
+
+psb_d_oacc_mlt_v_2.o psb_d_oacc_mlt_v.o: $(UP)/psb_d_oacc_vect_mod.o
+
+
clean:
/bin/rm -f $(OBJS)
diff --git a/openacc/impl/psb_d_oacc_csr_allocate_mnnz.F90 b/openacc/impl/psb_d_oacc_csr_allocate_mnnz.F90
index a0b8abe0..5281edee 100644
--- a/openacc/impl/psb_d_oacc_csr_allocate_mnnz.F90
+++ b/openacc/impl/psb_d_oacc_csr_allocate_mnnz.F90
@@ -1,6 +1,7 @@
-subroutine psb_d_oacc_csr_allocate_mnnz(m, n, a, nz)
- use psb_base_mod
- use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_allocate_mnnz
+submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_allocate_mnnz_impl
+ use psb_base_mod
+contains
+ module subroutine psb_d_oacc_csr_allocate_mnnz(m, n, a, nz)
implicit none
integer(psb_ipk_), intent(in) :: m, n
class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
@@ -9,26 +10,26 @@ subroutine psb_d_oacc_csr_allocate_mnnz(m, n, a, nz)
integer(psb_ipk_) :: err_act, nz_
character(len=20) :: name='allocate_mnz'
logical, parameter :: debug=.false.
-
+
call psb_erractionsave(err_act)
info = psb_success_
-
+
call a%psb_d_csr_sparse_mat%allocate(m, n, nz)
-
+
if (.not.allocated(a%val)) then
allocate(a%val(nz))
allocate(a%ja(nz))
allocate(a%irp(m+1))
end if
-
+
call a%set_dev()
if (info /= 0) goto 9999
-
+
call psb_erractionrestore(err_act)
return
-
- 9999 call psb_error_handler(err_act)
+
+9999 call psb_error_handler(err_act)
return
-
-end subroutine psb_d_oacc_csr_allocate_mnnz
-
\ No newline at end of file
+
+ end subroutine psb_d_oacc_csr_allocate_mnnz
+end submodule psb_d_oacc_csr_allocate_mnnz_impl
diff --git a/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 b/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90
index c84c5876..ec92f618 100644
--- a/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90
+++ b/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90
@@ -1,25 +1,26 @@
-subroutine psb_d_oacc_csr_cp_from_coo(a, b, info)
- use psb_base_mod
- use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_cp_from_coo
+submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_cp_from_coo_impl
+ use psb_base_mod
+contains
+ module subroutine psb_d_oacc_csr_cp_from_coo(a, b, info)
implicit none
-
+
class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
class(psb_d_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
-
+
info = psb_success_
-
+
call a%psb_d_csr_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
-
+
call a%set_dev()
if (info /= 0) goto 9999
-
+
return
-
- 9999 continue
+
+9999 continue
info = psb_err_alloc_dealloc_
return
-
+
end subroutine psb_d_oacc_csr_cp_from_coo
-
\ No newline at end of file
+end submodule psb_d_oacc_csr_cp_from_coo_impl
diff --git a/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 b/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90
index 2eae41d7..37541ea9 100644
--- a/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90
+++ b/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90
@@ -1,23 +1,24 @@
-subroutine psb_d_oacc_csr_cp_from_fmt(a, b, info)
- use psb_base_mod
- use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_cp_from_fmt
+submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_cp_from_fmt_impl
+ use psb_base_mod
+contains
+ module subroutine psb_d_oacc_csr_cp_from_fmt(a, b, info)
implicit none
-
+
class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
class(psb_d_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
-
+
info = psb_success_
-
+
select type(b)
type is (psb_d_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
call a%psb_d_csr_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
-
+
!$acc update device(a%val, a%ja, a%irp)
end select
-
-end subroutine psb_d_oacc_csr_cp_from_fmt
-
\ No newline at end of file
+
+ end subroutine psb_d_oacc_csr_cp_from_fmt
+end submodule psb_d_oacc_csr_cp_from_fmt_impl
diff --git a/openacc/impl/psb_d_oacc_csr_csmm.F90 b/openacc/impl/psb_d_oacc_csr_csmm.F90
index 4b404ad1..6ab87175 100644
--- a/openacc/impl/psb_d_oacc_csr_csmm.F90
+++ b/openacc/impl/psb_d_oacc_csr_csmm.F90
@@ -1,8 +1,7 @@
-subroutine psb_d_oacc_csr_csmm(alpha, a, x, beta, y, info, trans)
- use psb_base_mod
- !use elldev_mod
- !use psb_vectordev_mod
- use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_csmm
+submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_csmm_impl
+ use psb_base_mod
+contains
+ module subroutine psb_d_oacc_csr_csmm(alpha, a, x, beta, y, info, trans)
implicit none
class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
real(psb_dpk_), intent(in) :: alpha, beta
@@ -10,30 +9,30 @@ subroutine psb_d_oacc_csr_csmm(alpha, a, x, beta, y, info, trans)
real(psb_dpk_), intent(inout) :: y(:,:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
-
+
character :: trans_
integer(psb_ipk_) :: i, j, m, n,k, nxy
logical :: tra
integer(psb_ipk_) :: err_act
character(len=20) :: name = 'd_oacc_csmm'
logical, parameter :: debug = .false.
-
+
info = psb_success_
call psb_erractionsave(err_act)
-
+
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
-
+
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info, name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
-
+
if (tra) then
m = a%get_ncols()
n = a%get_nrows()
@@ -41,31 +40,31 @@ subroutine psb_d_oacc_csr_csmm(alpha, a, x, beta, y, info, trans)
n = a%get_ncols()
m = a%get_nrows()
end if
-
+
if (size(x,1) < n) then
info = 36
call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
goto 9999
end if
-
+
if (size(y,1) < m) then
info = 36
call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
goto 9999
end if
-
+
if (tra) then
call a%psb_d_csr_sparse_mat%spmm(alpha, x, beta, y, info, trans)
else
nxy = min(size(x,2), size(y,2))
-
+
!$acc parallel loop collapse(2) present(a, x, y)
do j = 1, nxy
do i = 1, m
y(i,j) = beta * y(i,j)
end do
end do
-
+
!$acc parallel loop collapse(2) present(a, x, y)
do j = 1, nxy
do i = 1, n
@@ -75,12 +74,13 @@ subroutine psb_d_oacc_csr_csmm(alpha, a, x, beta, y, info, trans)
end do
end do
endif
-
+
call psb_erractionrestore(err_act)
return
-
- 9999 call psb_error_handler(err_act)
+
+9999 call psb_error_handler(err_act)
return
-
+
end subroutine psb_d_oacc_csr_csmm
-
+end submodule psb_d_oacc_csr_csmm_impl
+
diff --git a/openacc/impl/psb_d_oacc_csr_csmv.F90 b/openacc/impl/psb_d_oacc_csr_csmv.F90
index 65acc8e4..205a7a59 100644
--- a/openacc/impl/psb_d_oacc_csr_csmv.F90
+++ b/openacc/impl/psb_d_oacc_csr_csmv.F90
@@ -1,8 +1,7 @@
-subroutine psb_d_oacc_csr_csmv(alpha, a, x, beta, y, info, trans)
- use psb_base_mod
- !use elldev_mod
- !use psb_vectordev_mod
- use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_csmv
+submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_csmv_impl
+ use psb_base_mod
+contains
+ module subroutine psb_d_oacc_csr_csmv(alpha, a, x, beta, y, info, trans)
implicit none
class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
real(psb_dpk_), intent(in) :: alpha, beta
@@ -10,31 +9,31 @@ subroutine psb_d_oacc_csr_csmv(alpha, a, x, beta, y, info, trans)
real(psb_dpk_), intent(inout) :: y(:)
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
-
+
character :: trans_
integer(psb_ipk_) :: i, j, m, n
logical :: tra
integer(psb_ipk_) :: err_act
character(len=20) :: name = 'd_oacc_csmv'
logical, parameter :: debug = .false.
-
+
call psb_erractionsave(err_act)
info = psb_success_
-
+
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
-
+
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info, name)
goto 9999
endif
-
+
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
-
+
if (tra) then
m = a%get_ncols()
n = a%get_nrows()
@@ -42,19 +41,19 @@ subroutine psb_d_oacc_csr_csmv(alpha, a, x, beta, y, info, trans)
n = a%get_ncols()
m = a%get_nrows()
end if
-
+
if (size(x,1) < n) then
info = 36
call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
goto 9999
end if
-
+
if (size(y,1) < m) then
info = 36
call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
goto 9999
end if
-
+
if (tra) then
call a%psb_d_csr_sparse_mat%spmm(alpha, x, beta, y, info, trans)
else
@@ -62,7 +61,7 @@ subroutine psb_d_oacc_csr_csmv(alpha, a, x, beta, y, info, trans)
do i = 1, m
y(i) = beta * y(i)
end do
-
+
!$acc parallel loop present(a, x, y)
do i = 1, n
do j = a%irp(i), a%irp(i+1) - 1
@@ -70,12 +69,13 @@ subroutine psb_d_oacc_csr_csmv(alpha, a, x, beta, y, info, trans)
end do
end do
endif
-
+
call psb_erractionrestore(err_act)
return
-
- 9999 call psb_error_handler(err_act)
+
+9999 call psb_error_handler(err_act)
return
-
+
end subroutine psb_d_oacc_csr_csmv
-
+end submodule psb_d_oacc_csr_csmv_impl
+
diff --git a/openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90 b/openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90
index a15a710e..5f0ef4a0 100644
--- a/openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90
+++ b/openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90
@@ -1,16 +1,14 @@
-subroutine psb_d_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans)
- use psb_base_mod
- ! use elldev_mod
- ! use psb_vectordev_mod
- use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_inner_vect_sv
- use psb_d_oacc_vect_mod
+submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_inner_vect_sv_impl
+ use psb_base_mod
+contains
+ module subroutine psb_d_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans)
implicit none
class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
real(psb_dpk_), intent(in) :: alpha, beta
class(psb_d_base_vect_type), intent(inout) :: x, y
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
-
+
real(psb_dpk_), allocatable :: rx(:), ry(:)
logical :: tra
character :: trans_
@@ -18,24 +16,24 @@ subroutine psb_d_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans)
character(len=20) :: name = 'd_oacc_csr_inner_vect_sv'
logical, parameter :: debug = .false.
integer(psb_ipk_) :: i
-
+
call psb_get_erraction(err_act)
info = psb_success_
-
+
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
-
+
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info, name)
goto 9999
endif
-
+
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
-
+
if (tra .or. (beta /= dzero)) then
call x%sync()
call y%sync()
@@ -68,17 +66,18 @@ subroutine psb_d_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans)
call y%bld(ry)
end select
endif
-
+
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info, name, a_err = 'csrg_vect_sv')
goto 9999
endif
-
+
call psb_erractionrestore(err_act)
return
-
- 9999 call psb_error_handler(err_act)
+
+9999 call psb_error_handler(err_act)
return
end subroutine psb_d_oacc_csr_inner_vect_sv
-
+end submodule psb_d_oacc_csr_inner_vect_sv_impl
+
diff --git a/openacc/impl/psb_d_oacc_csr_mold.F90 b/openacc/impl/psb_d_oacc_csr_mold.F90
index 08598cb6..dc9ff711 100644
--- a/openacc/impl/psb_d_oacc_csr_mold.F90
+++ b/openacc/impl/psb_d_oacc_csr_mold.F90
@@ -1,6 +1,7 @@
-subroutine psb_d_oacc_csr_mold(a, b, info)
- use psb_base_mod
- use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_mold
+submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_mold_impl
+ use psb_base_mod
+contains
+ module subroutine psb_d_oacc_csr_mold(a, b, info)
implicit none
class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
class(psb_d_base_sparse_mat), intent(inout), allocatable :: b
@@ -8,26 +9,27 @@ subroutine psb_d_oacc_csr_mold(a, b, info)
integer(psb_ipk_) :: err_act
character(len=20) :: name='csr_mold'
logical, parameter :: debug=.false.
-
+
call psb_get_erraction(err_act)
-
+
info = 0
if (allocated(b)) then
call b%free()
deallocate(b, stat=info)
end if
if (info == 0) allocate(psb_d_oacc_csr_sparse_mat :: b, stat=info)
-
+
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
goto 9999
end if
return
-
- 9999 call psb_error_handler(err_act)
-
+
+9999 call psb_error_handler(err_act)
+
return
-
+
end subroutine psb_d_oacc_csr_mold
-
\ No newline at end of file
+end submodule psb_d_oacc_csr_mold_impl
+
diff --git a/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 b/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90
index 5f4a6b41..2ed9b032 100644
--- a/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90
+++ b/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90
@@ -1,24 +1,25 @@
-subroutine psb_d_oacc_csr_mv_from_coo(a, b, info)
- use psb_base_mod
- use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_mv_from_coo
+submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_mv_from_coo_impl
+ use psb_base_mod
+contains
+ module subroutine psb_d_oacc_csr_mv_from_coo(a, b, info)
implicit none
-
+
class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
class(psb_d_coo_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
-
+
info = psb_success_
-
+
call a%psb_d_csr_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
-
+
!$acc update device(a%val, a%ja, a%irp)
-
+
return
-
- 9999 continue
+
+9999 continue
info = psb_err_alloc_dealloc_
return
-
-end subroutine psb_d_oacc_csr_mv_from_coo
-
\ No newline at end of file
+
+ end subroutine psb_d_oacc_csr_mv_from_coo
+end submodule psb_d_oacc_csr_mv_from_coo_impl
diff --git a/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 b/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90
index 16a4636b..197ed911 100644
--- a/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90
+++ b/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90
@@ -1,23 +1,24 @@
-subroutine psb_d_oacc_csr_mv_from_fmt(a, b, info)
- use psb_base_mod
- use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_mv_from_fmt
+submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_mv_from_fmt_impl
+ use psb_base_mod
+contains
+ module subroutine psb_d_oacc_csr_mv_from_fmt(a, b, info)
implicit none
-
+
class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
class(psb_d_base_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
-
+
info = psb_success_
-
+
select type(b)
type is (psb_d_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
call a%psb_d_csr_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
-
+
!$acc update device(a%val, a%ja, a%irp)
end select
-
-end subroutine psb_d_oacc_csr_mv_from_fmt
-
\ No newline at end of file
+
+ end subroutine psb_d_oacc_csr_mv_from_fmt
+end submodule psb_d_oacc_csr_mv_from_fmt_impl
diff --git a/openacc/impl/psb_d_oacc_csr_reallocate_nz.F90 b/openacc/impl/psb_d_oacc_csr_reallocate_nz.F90
index 7a6723d0..c345f681 100644
--- a/openacc/impl/psb_d_oacc_csr_reallocate_nz.F90
+++ b/openacc/impl/psb_d_oacc_csr_reallocate_nz.F90
@@ -1,6 +1,7 @@
-subroutine psb_d_oacc_csr_reallocate_nz(nz, a)
- use psb_base_mod
- use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_reallocate_nz
+submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_reallocate_nz_impl
+ use psb_base_mod
+contains
+ module subroutine psb_d_oacc_csr_reallocate_nz(nz, a)
implicit none
integer(psb_ipk_), intent(in) :: nz
class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
@@ -8,20 +9,20 @@ subroutine psb_d_oacc_csr_reallocate_nz(nz, a)
integer(psb_ipk_) :: err_act
character(len=20) :: name='d_oacc_csr_reallocate_nz'
logical, parameter :: debug=.false.
-
+
call psb_erractionsave(err_act)
info = psb_success_
-
+
call a%psb_d_csr_sparse_mat%reallocate(nz)
-
+
call a%set_dev()
if (info /= 0) goto 9999
-
+
call psb_erractionrestore(err_act)
return
-
- 9999 call psb_error_handler(err_act)
+
+9999 call psb_error_handler(err_act)
return
-
+
end subroutine psb_d_oacc_csr_reallocate_nz
-
\ No newline at end of file
+end submodule psb_d_oacc_csr_reallocate_nz_impl
diff --git a/openacc/impl/psb_d_oacc_csr_scal.F90 b/openacc/impl/psb_d_oacc_csr_scal.F90
index b2bf12e8..cc693fa2 100644
--- a/openacc/impl/psb_d_oacc_csr_scal.F90
+++ b/openacc/impl/psb_d_oacc_csr_scal.F90
@@ -1,22 +1,23 @@
-subroutine psb_d_oacc_csr_scal(d, a, info, side)
- use psb_base_mod
- use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_scal
+submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_scal_impl
+ use psb_base_mod
+contains
+ module subroutine psb_d_oacc_csr_scal(d, a, info, side)
implicit none
class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
real(psb_dpk_), intent(in) :: d(:)
integer(psb_ipk_), intent(out) :: info
character, intent(in), optional :: side
-
+
integer(psb_ipk_) :: err_act
character(len=20) :: name='scal'
logical, parameter :: debug=.false.
integer(psb_ipk_) :: i, j
-
+
info = psb_success_
call psb_erractionsave(err_act)
-
+
if (a%is_host()) call a%sync()
-
+
if (present(side)) then
if (side == 'L') then
!$acc parallel loop present(a, d)
@@ -39,14 +40,14 @@ subroutine psb_d_oacc_csr_scal(d, a, info, side)
a%val(i) = a%val(i) * d(i)
end do
end if
-
+
call a%set_dev()
-
+
call psb_erractionrestore(err_act)
return
-
- 9999 call psb_error_handler(err_act)
+
+9999 call psb_error_handler(err_act)
return
-
+
end subroutine psb_d_oacc_csr_scal
-
\ No newline at end of file
+end submodule psb_d_oacc_csr_scal_impl
diff --git a/openacc/impl/psb_d_oacc_csr_scals.F90 b/openacc/impl/psb_d_oacc_csr_scals.F90
index 8c77f647..157355d8 100644
--- a/openacc/impl/psb_d_oacc_csr_scals.F90
+++ b/openacc/impl/psb_d_oacc_csr_scals.F90
@@ -1,32 +1,34 @@
-subroutine psb_d_oacc_csr_scals(d, a, info)
+submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_scals_impl
use psb_base_mod
- use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_scals
- implicit none
- class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
- real(psb_dpk_), intent(in) :: d
- integer(psb_ipk_), intent(out) :: info
+contains
+ module subroutine psb_d_oacc_csr_scals(d, a, info)
+ implicit none
+ class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
+ real(psb_dpk_), intent(in) :: d
+ integer(psb_ipk_), intent(out) :: info
- integer(psb_ipk_) :: err_act
- character(len=20) :: name='scal'
- logical, parameter :: debug=.false.
- integer(psb_ipk_) :: i
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name='scal'
+ logical, parameter :: debug=.false.
+ integer(psb_ipk_) :: i
- info = psb_success_
- call psb_erractionsave(err_act)
+ info = psb_success_
+ call psb_erractionsave(err_act)
- if (a%is_host()) call a%sync()
+ if (a%is_host()) call a%sync()
- !$acc parallel loop present(a)
- do i = 1, size(a%val)
- a%val(i) = a%val(i) * d
- end do
+ !$acc parallel loop present(a)
+ do i = 1, size(a%val)
+ a%val(i) = a%val(i) * d
+ end do
- call a%set_dev()
+ call a%set_dev()
- call psb_erractionrestore(err_act)
- return
+ call psb_erractionrestore(err_act)
+ return
9999 call psb_error_handler(err_act)
- return
+ return
-end subroutine psb_d_oacc_csr_scals
+ end subroutine psb_d_oacc_csr_scals
+end submodule psb_d_oacc_csr_scals_impl
diff --git a/openacc/impl/psb_d_oacc_csr_vect_mv.F90 b/openacc/impl/psb_d_oacc_csr_vect_mv.F90
index f0394591..1dca2ba2 100644
--- a/openacc/impl/psb_d_oacc_csr_vect_mv.F90
+++ b/openacc/impl/psb_d_oacc_csr_vect_mv.F90
@@ -1,6 +1,7 @@
-subroutine psb_d_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans)
- use psb_base_mod
- use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_vect_mv
+submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_vect_mv_impl
+ use psb_base_mod
+contains
+ module subroutine psb_d_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans)
implicit none
real(psb_dpk_), intent(in) :: alpha, beta
@@ -16,9 +17,9 @@ subroutine psb_d_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans)
n = a%get_ncols()
if ((n /= size(x%v)) .or. (n /= size(y%v))) then
- write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
- info = psb_err_invalid_mat_state_
- return
+ write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
+ info = psb_err_invalid_mat_state_
+ return
end if
if (a%is_host()) call a%sync()
@@ -28,34 +29,35 @@ subroutine psb_d_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans)
call inner_spmv(m, n, alpha, a%val, a%ja, a%irp, x%v, beta, y%v, info)
call y%set_dev()
-contains
+ contains
subroutine inner_spmv(m, n, alpha, val, ja, irp, x, beta, y, info)
- implicit none
- integer(psb_ipk_) :: m, n
- real(psb_dpk_), intent(in) :: alpha, beta
- real(psb_dpk_) :: val(:), x(:), y(:)
- integer(psb_ipk_) :: ja(:), irp(:)
- integer(psb_ipk_), intent(out) :: info
- integer(psb_ipk_) :: i, j, ii, isz
- real(psb_dpk_) :: tmp
- integer(psb_ipk_), parameter :: vsz = 256
-
- info = 0
-
- !$acc parallel loop vector_length(vsz) private(isz)
- do ii = 1, m, vsz
- isz = min(vsz, m - ii + 1)
- !$acc loop independent private(tmp)
- do i = ii, ii + isz - 1
- tmp = 0.0_psb_dpk_
- !$acc loop seq
- do j = irp(i), irp(i + 1) - 1
- tmp = tmp + val(j) * x(ja(j))
- end do
- y(i) = alpha * tmp + beta * y(i)
- end do
+ implicit none
+ integer(psb_ipk_) :: m, n
+ real(psb_dpk_), intent(in) :: alpha, beta
+ real(psb_dpk_) :: val(:), x(:), y(:)
+ integer(psb_ipk_) :: ja(:), irp(:)
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: i, j, ii, isz
+ real(psb_dpk_) :: tmp
+ integer(psb_ipk_), parameter :: vsz = 256
+
+ info = 0
+
+ !$acc parallel loop vector_length(vsz) private(isz)
+ do ii = 1, m, vsz
+ isz = min(vsz, m - ii + 1)
+ !$acc loop independent private(tmp)
+ do i = ii, ii + isz - 1
+ tmp = 0.0_psb_dpk_
+ !$acc loop seq
+ do j = irp(i), irp(i + 1) - 1
+ tmp = tmp + val(j) * x(ja(j))
+ end do
+ y(i) = alpha * tmp + beta * y(i)
end do
+ end do
end subroutine inner_spmv
-end subroutine psb_d_oacc_csr_vect_mv
+ end subroutine psb_d_oacc_csr_vect_mv
+end submodule psb_d_oacc_csr_vect_mv_impl
diff --git a/openacc/impl/psb_d_oacc_mlt_v.f90 b/openacc/impl/psb_d_oacc_mlt_v.f90
index bedd0247..ab242b57 100644
--- a/openacc/impl/psb_d_oacc_mlt_v.f90
+++ b/openacc/impl/psb_d_oacc_mlt_v.f90
@@ -1,31 +1,34 @@
+submodule (psb_d_oacc_vect_mod) psb_d_oacc_mlt_v_impl
+ use psb_string_mod
+contains
-subroutine d_oacc_mlt_v(x, y, info)
- use psb_d_oacc_vect_mod, psb_protect_name => d_oacc_mlt_v
+ module subroutine psb_d_oacc_mlt_v(x, y, info)
- implicit none
- class(psb_d_base_vect_type), intent(inout) :: x
- class(psb_d_vect_oacc), intent(inout) :: y
- integer(psb_ipk_), intent(out) :: info
+ implicit none
+ class(psb_d_base_vect_type), intent(inout) :: x
+ class(psb_d_vect_oacc), intent(inout) :: y
+ integer(psb_ipk_), intent(out) :: info
- integer(psb_ipk_) :: i, n
+ integer(psb_ipk_) :: i, n
- info = 0
- n = min(x%get_nrows(), y%get_nrows())
- select type(xx => x)
- class is (psb_d_vect_oacc)
- if (y%is_host()) call y%sync()
- if (xx%is_host()) call xx%sync()
- !$acc parallel loop
- do i = 1, n
- y%v(i) = y%v(i) * xx%v(i)
- end do
- call y%set_dev()
- class default
- if (xx%is_dev()) call xx%sync()
- if (y%is_dev()) call y%sync()
- do i = 1, n
- y%v(i) = y%v(i) * xx%v(i)
- end do
- call y%set_host()
- end select
-end subroutine d_oacc_mlt_v
+ info = 0
+ n = min(x%get_nrows(), y%get_nrows())
+ select type(xx => x)
+ class is (psb_d_vect_oacc)
+ if (y%is_host()) call y%sync()
+ if (xx%is_host()) call xx%sync()
+ !$acc parallel loop
+ do i = 1, n
+ y%v(i) = y%v(i) * xx%v(i)
+ end do
+ call y%set_dev()
+ class default
+ if (xx%is_dev()) call xx%sync()
+ if (y%is_dev()) call y%sync()
+ do i = 1, n
+ y%v(i) = y%v(i) * xx%v(i)
+ end do
+ call y%set_host()
+ end select
+ end subroutine psb_d_oacc_mlt_v
+end submodule psb_d_oacc_mlt_v_impl
diff --git a/openacc/impl/psb_d_oacc_mlt_v_2.f90 b/openacc/impl/psb_d_oacc_mlt_v_2.f90
index 7e46495f..4ca2bdab 100644
--- a/openacc/impl/psb_d_oacc_mlt_v_2.f90
+++ b/openacc/impl/psb_d_oacc_mlt_v_2.f90
@@ -1,53 +1,55 @@
-subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
- use psb_d_oacc_vect_mod, psb_protect_name => d_oacc_mlt_v_2
+submodule (psb_d_oacc_vect_mod) d_oacc_mlt_v_2_impl
use psb_string_mod
- implicit none
- real(psb_dpk_), intent(in) :: alpha, beta
- class(psb_d_base_vect_type), intent(inout) :: x
- class(psb_d_base_vect_type), intent(inout) :: y
- class(psb_d_vect_oacc), intent(inout) :: z
- integer(psb_ipk_), intent(out) :: info
- character(len=1), intent(in), optional :: conjgx, conjgy
- integer(psb_ipk_) :: i, n
- logical :: conjgx_, conjgy_
+contains
+ module subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
+ implicit none
+ real(psb_dpk_), intent(in) :: alpha, beta
+ class(psb_d_base_vect_type), intent(inout) :: x
+ class(psb_d_base_vect_type), intent(inout) :: y
+ class(psb_d_vect_oacc), intent(inout) :: z
+ integer(psb_ipk_), intent(out) :: info
+ character(len=1), intent(in), optional :: conjgx, conjgy
+ integer(psb_ipk_) :: i, n
+ logical :: conjgx_, conjgy_
- conjgx_ = .false.
- conjgy_ = .false.
- if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C')
- if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C')
+ conjgx_ = .false.
+ conjgy_ = .false.
+ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C')
+ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C')
- n = min(x%get_nrows(), y%get_nrows(), z%get_nrows())
+ n = min(x%get_nrows(), y%get_nrows(), z%get_nrows())
- info = 0
- select type(xx => x)
- class is (psb_d_vect_oacc)
- select type (yy => y)
+ info = 0
+ select type(xx => x)
class is (psb_d_vect_oacc)
- if (xx%is_host()) call xx%sync()
- if (yy%is_host()) call yy%sync()
- if ((beta /= dzero) .and. (z%is_host())) call z%sync()
- !$acc parallel loop
- do i = 1, n
- z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
- end do
- call z%set_dev()
+ select type (yy => y)
+ class is (psb_d_vect_oacc)
+ if (xx%is_host()) call xx%sync()
+ if (yy%is_host()) call yy%sync()
+ if ((beta /= dzero) .and. (z%is_host())) call z%sync()
+ !$acc parallel loop
+ do i = 1, n
+ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
+ end do
+ call z%set_dev()
+ class default
+ if (xx%is_dev()) call xx%sync()
+ if (yy%is_dev()) call yy%sync()
+ if ((beta /= dzero) .and. (z%is_dev())) call z%sync()
+ do i = 1, n
+ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
+ end do
+ call z%set_host()
+ end select
class default
- if (xx%is_dev()) call xx%sync()
- if (yy%is_dev()) call yy%sync()
+ if (x%is_dev()) call x%sync()
+ if (y%is_dev()) call y%sync()
if ((beta /= dzero) .and. (z%is_dev())) call z%sync()
do i = 1, n
- z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
+ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i)
end do
call z%set_host()
end select
- class default
- if (x%is_dev()) call x%sync()
- if (y%is_dev()) call y%sync()
- if ((beta /= dzero) .and. (z%is_dev())) call z%sync()
- do i = 1, n
- z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i)
- end do
- call z%set_host()
- end select
-end subroutine d_oacc_mlt_v_2
+ end subroutine d_oacc_mlt_v_2
+end submodule d_oacc_mlt_v_2_impl
diff --git a/openacc/psb_d_oacc_csr_mat_mod.F90 b/openacc/psb_d_oacc_csr_mat_mod.F90
index b8eb1be9..ca4199a8 100644
--- a/openacc/psb_d_oacc_csr_mat_mod.F90
+++ b/openacc/psb_d_oacc_csr_mat_mod.F90
@@ -1,359 +1,346 @@
module psb_d_oacc_csr_mat_mod
- use iso_c_binding
- use psb_d_mat_mod
- use psb_d_oacc_vect_mod
- !use oaccsparse_mod
-
- integer(psb_ipk_), parameter, private :: is_host = -1
- integer(psb_ipk_), parameter, private :: is_sync = 0
- integer(psb_ipk_), parameter, private :: is_dev = 1
-
- type, extends(psb_d_csr_sparse_mat) :: psb_d_oacc_csr_sparse_mat
- integer(psb_ipk_) :: devstate = is_host
- contains
- procedure, nopass :: get_fmt => d_oacc_csr_get_fmt
- procedure, pass(a) :: sizeof => d_oacc_csr_sizeof
- procedure, pass(a) :: vect_mv => psb_d_oacc_csr_vect_mv
- procedure, pass(a) :: in_vect_sv => psb_d_oacc_csr_inner_vect_sv
- procedure, pass(a) :: csmm => psb_d_oacc_csr_csmm
- procedure, pass(a) :: csmv => psb_d_oacc_csr_csmv
- procedure, pass(a) :: scals => psb_d_oacc_csr_scals
- procedure, pass(a) :: scalv => psb_d_oacc_csr_scal
- procedure, pass(a) :: reallocate_nz => psb_d_oacc_csr_reallocate_nz
- procedure, pass(a) :: allocate_mnnz => psb_d_oacc_csr_allocate_mnnz
- procedure, pass(a) :: cp_from_coo => psb_d_oacc_csr_cp_from_coo
- procedure, pass(a) :: cp_from_fmt => psb_d_oacc_csr_cp_from_fmt
- procedure, pass(a) :: mv_from_coo => psb_d_oacc_csr_mv_from_coo
- procedure, pass(a) :: mv_from_fmt => psb_d_oacc_csr_mv_from_fmt
- procedure, pass(a) :: free => d_oacc_csr_free
- procedure, pass(a) :: mold => psb_d_oacc_csr_mold
- procedure, pass(a) :: all => d_oacc_csr_all
- procedure, pass(a) :: is_host => d_oacc_csr_is_host
- procedure, pass(a) :: is_sync => d_oacc_csr_is_sync
- procedure, pass(a) :: is_dev => d_oacc_csr_is_dev
- procedure, pass(a) :: set_host => d_oacc_csr_set_host
- procedure, pass(a) :: set_sync => d_oacc_csr_set_sync
- procedure, pass(a) :: set_dev => d_oacc_csr_set_dev
- procedure, pass(a) :: sync_space => d_oacc_csr_sync_space
- procedure, pass(a) :: sync => d_oacc_csr_sync
- end type psb_d_oacc_csr_sparse_mat
-
- interface
- subroutine psb_d_oacc_csr_mold(a,b,info)
- import :: psb_d_oacc_csr_sparse_mat, psb_d_base_sparse_mat, psb_ipk_
- class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
- class(psb_d_base_sparse_mat), intent(inout), allocatable :: b
- integer(psb_ipk_), intent(out) :: info
- end subroutine psb_d_oacc_csr_mold
- end interface
-
- interface
- subroutine psb_d_oacc_csr_cp_from_fmt(a,b,info)
- import :: psb_d_oacc_csr_sparse_mat, psb_d_base_sparse_mat, psb_ipk_
- class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
- class(psb_d_base_sparse_mat), intent(in) :: b
- integer(psb_ipk_), intent(out) :: info
- end subroutine psb_d_oacc_csr_cp_from_fmt
- end interface
-
- interface
- subroutine psb_d_oacc_csr_mv_from_coo(a,b,info)
- import :: psb_d_oacc_csr_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_
- class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
- class(psb_d_coo_sparse_mat), intent(inout) :: b
- integer(psb_ipk_), intent(out) :: info
- end subroutine psb_d_oacc_csr_mv_from_coo
- end interface
-
- interface
- subroutine psb_d_oacc_csr_mv_from_fmt(a,b,info)
- import :: psb_d_oacc_csr_sparse_mat, psb_d_base_sparse_mat, psb_ipk_
- class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
- class(psb_d_base_sparse_mat), intent(inout) :: b
- integer(psb_ipk_), intent(out) :: info
- end subroutine psb_d_oacc_csr_mv_from_fmt
- end interface
-
- interface
- subroutine psb_d_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans)
- import :: psb_d_oacc_csr_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_
- class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
- real(psb_dpk_), intent(in) :: alpha, beta
- class(psb_d_base_vect_type), intent(inout) :: x, y
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
- end subroutine psb_d_oacc_csr_vect_mv
- end interface
-
- interface
- subroutine psb_d_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans)
- import :: psb_d_oacc_csr_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_
- class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
- real(psb_dpk_), intent(in) :: alpha, beta
- class(psb_d_base_vect_type), intent(inout) :: x,y
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
- end subroutine psb_d_oacc_csr_inner_vect_sv
- end interface
-
- interface
- subroutine psb_d_oacc_csr_csmm(alpha, a, x, beta, y, info, trans)
- import :: psb_d_oacc_csr_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_
- class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
- real(psb_dpk_), intent(in) :: alpha, beta, x(:,:)
- real(psb_dpk_), intent(inout) :: y(:,:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
- end subroutine psb_d_oacc_csr_csmm
- end interface
-
- interface
- subroutine psb_d_oacc_csr_csmv(alpha, a, x, beta, y, info, trans)
- import :: psb_d_oacc_csr_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_
- class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
- real(psb_dpk_), intent(in) :: alpha, beta, x(:)
- real(psb_dpk_), intent(inout) :: y(:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
- end subroutine psb_d_oacc_csr_csmv
- end interface
-
- interface
- subroutine psb_d_oacc_csr_scals(d, a, info)
- import :: psb_d_oacc_csr_sparse_mat, psb_dpk_, psb_ipk_
- class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
- real(psb_dpk_), intent(in) :: d
- integer(psb_ipk_), intent(out) :: info
- end subroutine psb_d_oacc_csr_scals
- end interface
-
- interface
- subroutine psb_d_oacc_csr_scal(d,a,info,side)
- import :: psb_d_oacc_csr_sparse_mat, psb_dpk_, psb_ipk_
- class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
- real(psb_dpk_), intent(in) :: d(:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: side
- end subroutine psb_d_oacc_csr_scal
- end interface
-
- interface
- subroutine psb_d_oacc_csr_reallocate_nz(nz,a)
- import :: psb_d_oacc_csr_sparse_mat, psb_ipk_
- class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
- integer(psb_ipk_), intent(in) :: nz
- end subroutine psb_d_oacc_csr_reallocate_nz
- end interface
-
- interface
- subroutine psb_d_oacc_csr_allocate_mnnz(m,n,a,nz)
- import :: psb_d_oacc_csr_sparse_mat, psb_ipk_
- class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
- integer(psb_ipk_), intent(in) :: m,n
- integer(psb_ipk_), intent(in), optional :: nz
- end subroutine psb_d_oacc_csr_allocate_mnnz
- end interface
-
- interface
- subroutine psb_d_oacc_csr_cp_from_coo(a,b,info)
- import :: psb_d_oacc_csr_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_
- class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
- class(psb_d_coo_sparse_mat), intent(in) :: b
- integer(psb_ipk_), intent(out) :: info
- end subroutine psb_d_oacc_csr_cp_from_coo
- end interface
+ use iso_c_binding
+ use psb_d_mat_mod
+ use psb_d_oacc_vect_mod
+ !use oaccsparse_mod
+
+ integer(psb_ipk_), parameter, private :: is_host = -1
+ integer(psb_ipk_), parameter, private :: is_sync = 0
+ integer(psb_ipk_), parameter, private :: is_dev = 1
+
+ type, extends(psb_d_csr_sparse_mat) :: psb_d_oacc_csr_sparse_mat
+ integer(psb_ipk_) :: devstate = is_host
+ contains
+ procedure, nopass :: get_fmt => d_oacc_csr_get_fmt
+ procedure, pass(a) :: sizeof => d_oacc_csr_sizeof
+ procedure, pass(a) :: vect_mv => psb_d_oacc_csr_vect_mv
+ procedure, pass(a) :: in_vect_sv => psb_d_oacc_csr_inner_vect_sv
+ procedure, pass(a) :: csmm => psb_d_oacc_csr_csmm
+ procedure, pass(a) :: csmv => psb_d_oacc_csr_csmv
+ procedure, pass(a) :: scals => psb_d_oacc_csr_scals
+ procedure, pass(a) :: scalv => psb_d_oacc_csr_scal
+ procedure, pass(a) :: reallocate_nz => psb_d_oacc_csr_reallocate_nz
+ procedure, pass(a) :: allocate_mnnz => psb_d_oacc_csr_allocate_mnnz
+ procedure, pass(a) :: cp_from_coo => psb_d_oacc_csr_cp_from_coo
+ procedure, pass(a) :: cp_from_fmt => psb_d_oacc_csr_cp_from_fmt
+ procedure, pass(a) :: mv_from_coo => psb_d_oacc_csr_mv_from_coo
+ procedure, pass(a) :: mv_from_fmt => psb_d_oacc_csr_mv_from_fmt
+ procedure, pass(a) :: free => d_oacc_csr_free
+ procedure, pass(a) :: mold => psb_d_oacc_csr_mold
+ procedure, pass(a) :: all => d_oacc_csr_all
+ procedure, pass(a) :: is_host => d_oacc_csr_is_host
+ procedure, pass(a) :: is_sync => d_oacc_csr_is_sync
+ procedure, pass(a) :: is_dev => d_oacc_csr_is_dev
+ procedure, pass(a) :: set_host => d_oacc_csr_set_host
+ procedure, pass(a) :: set_sync => d_oacc_csr_set_sync
+ procedure, pass(a) :: set_dev => d_oacc_csr_set_dev
+ procedure, pass(a) :: sync_space => d_oacc_csr_sync_space
+ procedure, pass(a) :: sync => d_oacc_csr_sync
+ end type psb_d_oacc_csr_sparse_mat
+
+ interface
+ module subroutine psb_d_oacc_csr_mold(a,b,info)
+ class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
+ class(psb_d_base_sparse_mat), intent(inout), allocatable :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_d_oacc_csr_mold
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_csr_cp_from_fmt(a,b,info)
+ class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
+ class(psb_d_base_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_d_oacc_csr_cp_from_fmt
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_csr_mv_from_coo(a,b,info)
+ class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
+ class(psb_d_coo_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_d_oacc_csr_mv_from_coo
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_csr_mv_from_fmt(a,b,info)
+ class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
+ class(psb_d_base_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_d_oacc_csr_mv_from_fmt
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans)
+ class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
+ real(psb_dpk_), intent(in) :: alpha, beta
+ class(psb_d_base_vect_type), intent(inout) :: x, y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_d_oacc_csr_vect_mv
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans)
+ class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
+ real(psb_dpk_), intent(in) :: alpha, beta
+ class(psb_d_base_vect_type), intent(inout) :: x,y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_d_oacc_csr_inner_vect_sv
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_csr_csmm(alpha, a, x, beta, y, info, trans)
+ class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
+ real(psb_dpk_), intent(in) :: alpha, beta, x(:,:)
+ real(psb_dpk_), intent(inout) :: y(:,:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_d_oacc_csr_csmm
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_csr_csmv(alpha, a, x, beta, y, info, trans)
+ class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
+ real(psb_dpk_), intent(in) :: alpha, beta, x(:)
+ real(psb_dpk_), intent(inout) :: y(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_d_oacc_csr_csmv
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_csr_scals(d, a, info)
+ class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
+ real(psb_dpk_), intent(in) :: d
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_d_oacc_csr_scals
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_csr_scal(d,a,info,side)
+ class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
+ real(psb_dpk_), intent(in) :: d(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: side
+ end subroutine psb_d_oacc_csr_scal
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_csr_reallocate_nz(nz,a)
+ class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_), intent(in) :: nz
+ end subroutine psb_d_oacc_csr_reallocate_nz
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_csr_allocate_mnnz(m,n,a,nz)
+ class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_), intent(in) :: m,n
+ integer(psb_ipk_), intent(in), optional :: nz
+ end subroutine psb_d_oacc_csr_allocate_mnnz
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_csr_cp_from_coo(a,b,info)
+ class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
+ class(psb_d_coo_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_d_oacc_csr_cp_from_coo
+ end interface
contains
-
-
- subroutine d_oacc_csr_free(a)
- use psb_base_mod
- implicit none
- class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
- integer(psb_ipk_) :: info
-
- if (allocated(a%val)) then
- !$acc exit data delete(a%val)
- end if
- if (allocated(a%ja)) then
- !$acc exit data delete(a%ja)
- end if
- if (allocated(a%irp)) then
- !$acc exit data delete(a%irp)
- end if
-
- call a%psb_d_csr_sparse_mat%free()
-
- return
- end subroutine d_oacc_csr_free
-
-
- function d_oacc_csr_sizeof(a) result(res)
- implicit none
- class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
- integer(psb_epk_) :: res
- if (a%is_dev()) call a%sync()
-
- res = 8
- res = res + psb_sizeof_dp * size(a%val)
- res = res + psb_sizeof_ip * size(a%irp)
- res = res + psb_sizeof_ip * size(a%ja)
-
- end function d_oacc_csr_sizeof
-
-
- function d_oacc_csr_get_fmt() result(res)
- implicit none
- character(len=5) :: res
- res = 'CSR_oacc'
- end function d_oacc_csr_get_fmt
-
- subroutine d_oacc_csr_all(m, n, nz, a, info)
- implicit none
- integer(psb_ipk_), intent(in) :: m, n, nz
- class(psb_d_oacc_csr_sparse_mat), intent(out) :: a
- integer(psb_ipk_), intent(out) :: info
-
- info = 0
- if (allocated(a%val)) then
- !$acc exit data delete(a%val) finalize
- deallocate(a%val, stat=info)
- end if
- if (allocated(a%ja)) then
- !$acc exit data delete(a%ja) finalize
- deallocate(a%ja, stat=info)
- end if
- if (allocated(a%irp)) then
- !$acc exit data delete(a%irp) finalize
- deallocate(a%irp, stat=info)
- end if
-
- call a%set_nrows(m)
- call a%set_ncols(n)
-
- allocate(a%val(nz),stat=info)
- allocate(a%ja(nz),stat=info)
- allocate(a%irp(m+1),stat=info)
- if (info == 0) call a%set_host()
- if (info == 0) call a%sync_space()
- end subroutine d_oacc_csr_all
-
- function d_oacc_csr_is_host(a) result(res)
- implicit none
- class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
- logical :: res
-
- res = (a%devstate == is_host)
- end function d_oacc_csr_is_host
-
- function d_oacc_csr_is_sync(a) result(res)
- implicit none
- class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
- logical :: res
-
- res = (a%devstate == is_sync)
- end function d_oacc_csr_is_sync
-
- function d_oacc_csr_is_dev(a) result(res)
- implicit none
- class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
- logical :: res
-
- res = (a%devstate == is_dev)
- end function d_oacc_csr_is_dev
-
- subroutine d_oacc_csr_set_host(a)
- implicit none
- class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
-
- a%devstate = is_host
- end subroutine d_oacc_csr_set_host
-
- subroutine d_oacc_csr_set_sync(a)
- implicit none
- class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
-
- a%devstate = is_sync
- end subroutine d_oacc_csr_set_sync
-
- subroutine d_oacc_csr_set_dev(a)
- implicit none
- class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
-
- a%devstate = is_dev
- end subroutine d_oacc_csr_set_dev
-
- subroutine d_oacc_csr_sync_space(a)
- implicit none
- class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
- if (allocated(a%val)) then
- call d_oacc_create_dev(a%val)
- end if
- if (allocated(a%ja)) then
- call i_oacc_create_dev(a%ja)
- end if
- if (allocated(a%irp)) then
- call i_oacc_create_dev(a%irp)
- end if
- contains
- subroutine d_oacc_create_dev(v)
- implicit none
- real(psb_dpk_), intent(in) :: v(:)
- !$acc enter data copyin(v)
- end subroutine d_oacc_create_dev
- subroutine i_oacc_create_dev(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc enter data copyin(v)
- end subroutine i_oacc_create_dev
- end subroutine d_oacc_csr_sync_space
-
- subroutine d_oacc_csr_sync(a)
- implicit none
- class(psb_d_oacc_csr_sparse_mat), target, intent(in) :: a
- class(psb_d_oacc_csr_sparse_mat), pointer :: tmpa
- integer(psb_ipk_) :: info
-
- tmpa => a
- if (a%is_dev()) then
- call d_oacc_csr_to_host(a%val)
- call i_oacc_csr_to_host(a%ja)
- call i_oacc_csr_to_host(a%irp)
- else if (a%is_host()) then
- call d_oacc_csr_to_dev(a%val)
- call i_oacc_csr_to_dev(a%ja)
- call i_oacc_csr_to_dev(a%irp)
- end if
- call tmpa%set_sync()
- end subroutine d_oacc_csr_sync
-
- subroutine d_oacc_csr_to_dev(v)
- implicit none
- real(psb_dpk_), intent(in) :: v(:)
- !$acc update device(v)
- end subroutine d_oacc_csr_to_dev
-
- subroutine d_oacc_csr_to_host(v)
- implicit none
- real(psb_dpk_), intent(in) :: v(:)
- !$acc update self(v)
- end subroutine d_oacc_csr_to_host
-
- subroutine i_oacc_csr_to_dev(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc update device(v)
- end subroutine i_oacc_csr_to_dev
-
- subroutine i_oacc_csr_to_host(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc update self(v)
- end subroutine i_oacc_csr_to_host
-
-
+ subroutine d_oacc_csr_free(a)
+ use psb_base_mod
+ implicit none
+ class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_) :: info
+
+ if (allocated(a%val)) then
+ !$acc exit data delete(a%val)
+ end if
+ if (allocated(a%ja)) then
+ !$acc exit data delete(a%ja)
+ end if
+ if (allocated(a%irp)) then
+ !$acc exit data delete(a%irp)
+ end if
+
+ call a%psb_d_csr_sparse_mat%free()
+
+ return
+ end subroutine d_oacc_csr_free
+
+
+
+ function d_oacc_csr_sizeof(a) result(res)
+ implicit none
+ class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
+ integer(psb_epk_) :: res
+
+ if (a%is_dev()) call a%sync()
+
+ res = 8
+ res = res + psb_sizeof_dp * size(a%val)
+ res = res + psb_sizeof_ip * size(a%irp)
+ res = res + psb_sizeof_ip * size(a%ja)
+
+ end function d_oacc_csr_sizeof
+
+
+ function d_oacc_csr_get_fmt() result(res)
+ implicit none
+ character(len=5) :: res
+ res = 'CSR_oacc'
+ end function d_oacc_csr_get_fmt
+
+ subroutine d_oacc_csr_all(m, n, nz, a, info)
+ implicit none
+ integer(psb_ipk_), intent(in) :: m, n, nz
+ class(psb_d_oacc_csr_sparse_mat), intent(out) :: a
+ integer(psb_ipk_), intent(out) :: info
+
+ info = 0
+ if (allocated(a%val)) then
+ !$acc exit data delete(a%val) finalize
+ deallocate(a%val, stat=info)
+ end if
+ if (allocated(a%ja)) then
+ !$acc exit data delete(a%ja) finalize
+ deallocate(a%ja, stat=info)
+ end if
+ if (allocated(a%irp)) then
+ !$acc exit data delete(a%irp) finalize
+ deallocate(a%irp, stat=info)
+ end if
+
+ call a%set_nrows(m)
+ call a%set_ncols(n)
+
+ allocate(a%val(nz),stat=info)
+ allocate(a%ja(nz),stat=info)
+ allocate(a%irp(m+1),stat=info)
+ if (info == 0) call a%set_host()
+ if (info == 0) call a%sync_space()
+ end subroutine d_oacc_csr_all
+
+ function d_oacc_csr_is_host(a) result(res)
+ implicit none
+ class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_host)
+ end function d_oacc_csr_is_host
+
+ function d_oacc_csr_is_sync(a) result(res)
+ implicit none
+ class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_sync)
+ end function d_oacc_csr_is_sync
+
+ function d_oacc_csr_is_dev(a) result(res)
+ implicit none
+ class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_dev)
+ end function d_oacc_csr_is_dev
+
+ subroutine d_oacc_csr_set_host(a)
+ implicit none
+ class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_host
+ end subroutine d_oacc_csr_set_host
+
+ subroutine d_oacc_csr_set_sync(a)
+ implicit none
+ class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_sync
+ end subroutine d_oacc_csr_set_sync
+
+ subroutine d_oacc_csr_set_dev(a)
+ implicit none
+ class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_dev
+ end subroutine d_oacc_csr_set_dev
+
+ subroutine d_oacc_csr_sync_space(a)
+ implicit none
+ class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
+ if (allocated(a%val)) then
+ call d_oacc_create_dev(a%val)
+ end if
+ if (allocated(a%ja)) then
+ call i_oacc_create_dev(a%ja)
+ end if
+ if (allocated(a%irp)) then
+ call i_oacc_create_dev(a%irp)
+ end if
+ contains
+ subroutine d_oacc_create_dev(v)
+ implicit none
+ real(psb_dpk_), intent(in) :: v(:)
+ !$acc enter data copyin(v)
+ end subroutine d_oacc_create_dev
+ subroutine i_oacc_create_dev(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc enter data copyin(v)
+ end subroutine i_oacc_create_dev
+ end subroutine d_oacc_csr_sync_space
+
+ subroutine d_oacc_csr_sync(a)
+ implicit none
+ class(psb_d_oacc_csr_sparse_mat), target, intent(in) :: a
+ class(psb_d_oacc_csr_sparse_mat), pointer :: tmpa
+ integer(psb_ipk_) :: info
+
+ tmpa => a
+ if (a%is_dev()) then
+ call d_oacc_csr_to_host(a%val)
+ call i_oacc_csr_to_host(a%ja)
+ call i_oacc_csr_to_host(a%irp)
+ else if (a%is_host()) then
+ call d_oacc_csr_to_dev(a%val)
+ call i_oacc_csr_to_dev(a%ja)
+ call i_oacc_csr_to_dev(a%irp)
+ end if
+ call tmpa%set_sync()
+ end subroutine d_oacc_csr_sync
+
+ subroutine d_oacc_csr_to_dev(v)
+ implicit none
+ real(psb_dpk_), intent(in) :: v(:)
+ !$acc update device(v)
+ end subroutine d_oacc_csr_to_dev
+
+ subroutine d_oacc_csr_to_host(v)
+ implicit none
+ real(psb_dpk_), intent(in) :: v(:)
+ !$acc update self(v)
+ end subroutine d_oacc_csr_to_host
+
+ subroutine i_oacc_csr_to_dev(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc update device(v)
+ end subroutine i_oacc_csr_to_dev
+
+ subroutine i_oacc_csr_to_host(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc update self(v)
+ end subroutine i_oacc_csr_to_host
+
+
end module psb_d_oacc_csr_mat_mod
diff --git a/openacc/psb_d_oacc_vect_mod.F90 b/openacc/psb_d_oacc_vect_mod.F90
index 3139f9b8..3385f1ec 100644
--- a/openacc/psb_d_oacc_vect_mod.F90
+++ b/openacc/psb_d_oacc_vect_mod.F90
@@ -49,8 +49,8 @@ module psb_d_oacc_vect_mod
procedure, pass(z) :: abgdxyz => d_oacc_abgdxyz
procedure, pass(y) :: mlt_a => d_oacc_mlt_a
procedure, pass(z) :: mlt_a_2 => d_oacc_mlt_a_2
- procedure, pass(y) :: mlt_v => d_oacc_mlt_v
- procedure, pass(z) :: mlt_v_2 => d_oacc_mlt_v_2
+ procedure, pass(y) :: mlt_v => psb_d_oacc_mlt_v
+ procedure, pass(z) :: mlt_v_2 => psb_d_oacc_mlt_v_2
procedure, pass(x) :: scal => d_oacc_scal
procedure, pass(x) :: nrm2 => d_oacc_nrm2
procedure, pass(x) :: amax => d_oacc_amax
@@ -63,19 +63,17 @@ module psb_d_oacc_vect_mod
real(psb_dpk_), allocatable :: v1(:),v2(:),p(:)
interface
- subroutine d_oacc_mlt_v(x, y, info)
- import
+ module subroutine psb_d_oacc_mlt_v(x, y, info)
implicit none
class(psb_d_base_vect_type), intent(inout) :: x
class(psb_d_vect_oacc), intent(inout) :: y
integer(psb_ipk_), intent(out) :: info
- end subroutine d_oacc_mlt_v
+ end subroutine psb_d_oacc_mlt_v
end interface
interface
- subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
- import
+ module subroutine psb_d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
implicit none
real(psb_dpk_), intent(in) :: alpha, beta
class(psb_d_base_vect_type), intent(inout) :: x
@@ -83,7 +81,7 @@ module psb_d_oacc_vect_mod
class(psb_d_vect_oacc), intent(inout) :: z
integer(psb_ipk_), intent(out) :: info
character(len=1), intent(in), optional :: conjgx, conjgy
- end subroutine d_oacc_mlt_v_2
+ end subroutine psb_d_oacc_mlt_v_2
end interface
contains
From b6fe0f3344727bfdad1c44d85d511d8796fcd70e Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Wed, 17 Jul 2024 08:52:43 +0200
Subject: [PATCH 14/86] New version for modules and methods
---
openacc/Makefile | 28 +-
openacc/impl/psb_c_oacc_csr_allocate_mnnz.F90 | 35 +
openacc/impl/psb_c_oacc_csr_cp_from_coo.F90 | 26 +
openacc/impl/psb_c_oacc_csr_cp_from_fmt.F90 | 24 +
openacc/impl/psb_c_oacc_csr_csmm.F90 | 86 ++
openacc/impl/psb_c_oacc_csr_csmv.F90 | 81 ++
openacc/impl/psb_c_oacc_csr_inner_vect_sv.F90 | 83 ++
openacc/impl/psb_c_oacc_csr_mold.F90 | 35 +
openacc/impl/psb_c_oacc_csr_mv_from_coo.F90 | 25 +
openacc/impl/psb_c_oacc_csr_mv_from_fmt.F90 | 24 +
openacc/impl/psb_c_oacc_csr_reallocate_nz.F90 | 28 +
openacc/impl/psb_c_oacc_csr_scal.F90 | 53 +
openacc/impl/psb_c_oacc_csr_scals.F90 | 34 +
openacc/impl/psb_c_oacc_csr_vect_mv.F90 | 63 ++
openacc/impl/psb_c_oacc_mlt_v.f90 | 31 +
openacc/impl/psb_c_oacc_mlt_v_2.f90 | 98 ++
openacc/impl/psb_d_oacc_mlt_v.f90 | 57 +-
openacc/impl/psb_d_oacc_mlt_v_2.f90 | 125 ++-
openacc/impl/psb_s_oacc_csr_allocate_mnnz.F90 | 35 +
openacc/impl/psb_s_oacc_csr_cp_from_coo.F90 | 26 +
openacc/impl/psb_s_oacc_csr_cp_from_fmt.F90 | 24 +
openacc/impl/psb_s_oacc_csr_csmm.F90 | 86 ++
openacc/impl/psb_s_oacc_csr_csmv.F90 | 81 ++
openacc/impl/psb_s_oacc_csr_inner_vect_sv.F90 | 83 ++
openacc/impl/psb_s_oacc_csr_mold.F90 | 35 +
openacc/impl/psb_s_oacc_csr_mv_from_coo.F90 | 25 +
openacc/impl/psb_s_oacc_csr_mv_from_fmt.F90 | 24 +
openacc/impl/psb_s_oacc_csr_reallocate_nz.F90 | 28 +
openacc/impl/psb_s_oacc_csr_scal.F90 | 53 +
openacc/impl/psb_s_oacc_csr_scals.F90 | 34 +
openacc/impl/psb_s_oacc_csr_vect_mv.F90 | 63 ++
openacc/impl/psb_s_oacc_mlt_v.f90 | 31 +
openacc/impl/psb_s_oacc_mlt_v_2.f90 | 98 ++
openacc/impl/psb_z_oacc_csr_allocate_mnnz.F90 | 35 +
openacc/impl/psb_z_oacc_csr_cp_from_coo.F90 | 26 +
openacc/impl/psb_z_oacc_csr_cp_from_fmt.F90 | 24 +
openacc/impl/psb_z_oacc_csr_csmm.F90 | 86 ++
openacc/impl/psb_z_oacc_csr_csmv.F90 | 81 ++
openacc/impl/psb_z_oacc_csr_inner_vect_sv.F90 | 83 ++
openacc/impl/psb_z_oacc_csr_mold.F90 | 35 +
openacc/impl/psb_z_oacc_csr_mv_from_coo.F90 | 25 +
openacc/impl/psb_z_oacc_csr_mv_from_fmt.F90 | 24 +
openacc/impl/psb_z_oacc_csr_reallocate_nz.F90 | 28 +
openacc/impl/psb_z_oacc_csr_scal.F90 | 53 +
openacc/impl/psb_z_oacc_csr_scals.F90 | 34 +
openacc/impl/psb_z_oacc_csr_vect_mv.F90 | 63 ++
openacc/impl/psb_z_oacc_mlt_v.f90 | 31 +
openacc/impl/psb_z_oacc_mlt_v_2.f90 | 98 ++
openacc/psb_c_oacc_csr_mat_mod.F90 | 343 +++++++
openacc/psb_c_oacc_vect_mod.F90 | 935 ++++++++++++++++++
openacc/psb_d_oacc_csr_mat_mod.F90 | 3 -
openacc/psb_d_oacc_vect_mod.F90 | 54 +-
openacc/psb_s_oacc_csr_mat_mod.F90 | 343 +++++++
openacc/psb_s_oacc_vect_mod.F90 | 935 ++++++++++++++++++
openacc/psb_z_oacc_csr_mat_mod.F90 | 343 +++++++
openacc/psb_z_oacc_vect_mod.F90 | 935 ++++++++++++++++++
56 files changed, 6159 insertions(+), 120 deletions(-)
create mode 100644 openacc/impl/psb_c_oacc_csr_allocate_mnnz.F90
create mode 100644 openacc/impl/psb_c_oacc_csr_cp_from_coo.F90
create mode 100644 openacc/impl/psb_c_oacc_csr_cp_from_fmt.F90
create mode 100644 openacc/impl/psb_c_oacc_csr_csmm.F90
create mode 100644 openacc/impl/psb_c_oacc_csr_csmv.F90
create mode 100644 openacc/impl/psb_c_oacc_csr_inner_vect_sv.F90
create mode 100644 openacc/impl/psb_c_oacc_csr_mold.F90
create mode 100644 openacc/impl/psb_c_oacc_csr_mv_from_coo.F90
create mode 100644 openacc/impl/psb_c_oacc_csr_mv_from_fmt.F90
create mode 100644 openacc/impl/psb_c_oacc_csr_reallocate_nz.F90
create mode 100644 openacc/impl/psb_c_oacc_csr_scal.F90
create mode 100644 openacc/impl/psb_c_oacc_csr_scals.F90
create mode 100644 openacc/impl/psb_c_oacc_csr_vect_mv.F90
create mode 100644 openacc/impl/psb_c_oacc_mlt_v.f90
create mode 100644 openacc/impl/psb_c_oacc_mlt_v_2.f90
create mode 100644 openacc/impl/psb_s_oacc_csr_allocate_mnnz.F90
create mode 100644 openacc/impl/psb_s_oacc_csr_cp_from_coo.F90
create mode 100644 openacc/impl/psb_s_oacc_csr_cp_from_fmt.F90
create mode 100644 openacc/impl/psb_s_oacc_csr_csmm.F90
create mode 100644 openacc/impl/psb_s_oacc_csr_csmv.F90
create mode 100644 openacc/impl/psb_s_oacc_csr_inner_vect_sv.F90
create mode 100644 openacc/impl/psb_s_oacc_csr_mold.F90
create mode 100644 openacc/impl/psb_s_oacc_csr_mv_from_coo.F90
create mode 100644 openacc/impl/psb_s_oacc_csr_mv_from_fmt.F90
create mode 100644 openacc/impl/psb_s_oacc_csr_reallocate_nz.F90
create mode 100644 openacc/impl/psb_s_oacc_csr_scal.F90
create mode 100644 openacc/impl/psb_s_oacc_csr_scals.F90
create mode 100644 openacc/impl/psb_s_oacc_csr_vect_mv.F90
create mode 100644 openacc/impl/psb_s_oacc_mlt_v.f90
create mode 100644 openacc/impl/psb_s_oacc_mlt_v_2.f90
create mode 100644 openacc/impl/psb_z_oacc_csr_allocate_mnnz.F90
create mode 100644 openacc/impl/psb_z_oacc_csr_cp_from_coo.F90
create mode 100644 openacc/impl/psb_z_oacc_csr_cp_from_fmt.F90
create mode 100644 openacc/impl/psb_z_oacc_csr_csmm.F90
create mode 100644 openacc/impl/psb_z_oacc_csr_csmv.F90
create mode 100644 openacc/impl/psb_z_oacc_csr_inner_vect_sv.F90
create mode 100644 openacc/impl/psb_z_oacc_csr_mold.F90
create mode 100644 openacc/impl/psb_z_oacc_csr_mv_from_coo.F90
create mode 100644 openacc/impl/psb_z_oacc_csr_mv_from_fmt.F90
create mode 100644 openacc/impl/psb_z_oacc_csr_reallocate_nz.F90
create mode 100644 openacc/impl/psb_z_oacc_csr_scal.F90
create mode 100644 openacc/impl/psb_z_oacc_csr_scals.F90
create mode 100644 openacc/impl/psb_z_oacc_csr_vect_mv.F90
create mode 100644 openacc/impl/psb_z_oacc_mlt_v.f90
create mode 100644 openacc/impl/psb_z_oacc_mlt_v_2.f90
create mode 100644 openacc/psb_c_oacc_csr_mat_mod.F90
create mode 100644 openacc/psb_c_oacc_vect_mod.F90
create mode 100644 openacc/psb_s_oacc_csr_mat_mod.F90
create mode 100644 openacc/psb_s_oacc_vect_mod.F90
create mode 100644 openacc/psb_z_oacc_csr_mat_mod.F90
create mode 100644 openacc/psb_z_oacc_vect_mod.F90
diff --git a/openacc/Makefile b/openacc/Makefile
index d1311fe2..3a752ac7 100644
--- a/openacc/Makefile
+++ b/openacc/Makefile
@@ -17,9 +17,12 @@ CINCLUDES=
#LIBS=-L$(LIBDIR) -lpsb_util -lpsb_ext -lpsb_base -lopenblas -lmetis
-FOBJS= psb_i_oacc_vect_mod.o psb_d_oacc_vect_mod.o \
- psb_oacc_mod.o psb_d_oacc_csr_mat_mod.o \
- psb_oacc_env_mod.o
+FOBJS= psb_i_oacc_vect_mod.o \
+ psb_s_oacc_vect_mod.o psb_s_oacc_csr_mat_mod.o \
+ psb_d_oacc_vect_mod.o psb_d_oacc_csr_mat_mod.o \
+ psb_c_oacc_vect_mod.o psb_c_oacc_csr_mat_mod.o \
+ psb_z_oacc_vect_mod.o psb_z_oacc_csr_mat_mod.o \
+ psb_oacc_mod.o psb_oacc_env_mod.o
LIBNAME=libpsb_openacc.a
@@ -40,8 +43,23 @@ iobjs: $(OBJS)
ilib: $(OBJS)
$(MAKE) -C impl lib
-psb_oacc_mod.o : psb_i_oacc_vect_mod.o psb_d_oacc_vect_mod.o \
- psb_d_oacc_csr_mat_mod.o psb_oacc_env_mod.o
+psb_oacc_mod.o : psb_i_oacc_vect_mod.o \
+ psb_s_oacc_vect_mod.o psb_s_oacc_csr_mat_mod.o \
+ psb_d_oacc_vect_mod.o psb_d_oacc_csr_mat_mod.o \
+ psb_c_oacc_vect_mod.o psb_c_oacc_csr_mat_mod.o \
+ psb_z_oacc_vect_mod.o psb_z_oacc_csr_mat_mod.o \
+ psb_oacc_env_mod.o
+
+psb_s_oacc_vect_mod.o psb_d_oacc_vect_mod.o \
+ psb_c_oacc_vect_mod.o psb_z_oacc_vect_mod.o : psb_i_oacc_vect_mod.o
+
+
+psb_s_oacc_csr_mat_mod.o: psb_s_oacc_vect_mod.o
+psb_d_oacc_csr_mat_mod.o: psb_d_oacc_vect_mod.o
+psb_c_oacc_csr_mat_mod.o: psb_c_oacc_vect_mod.o
+psb_z_oacc_csr_mat_mod.o: psb_z_oacc_vect_mod.o
+
+
clean: cclean iclean
/bin/rm -f $(FOBJS) *$(.mod) *.a
diff --git a/openacc/impl/psb_c_oacc_csr_allocate_mnnz.F90 b/openacc/impl/psb_c_oacc_csr_allocate_mnnz.F90
new file mode 100644
index 00000000..09cdc228
--- /dev/null
+++ b/openacc/impl/psb_c_oacc_csr_allocate_mnnz.F90
@@ -0,0 +1,35 @@
+submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_allocate_mnnz_impl
+ use psb_base_mod
+contains
+ module subroutine psb_c_oacc_csr_allocate_mnnz(m, n, a, nz)
+ implicit none
+ integer(psb_ipk_), intent(in) :: m, n
+ class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_), intent(in), optional :: nz
+ integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: err_act, nz_
+ character(len=20) :: name='allocate_mnz'
+ logical, parameter :: debug=.false.
+
+ call psb_erractionsave(err_act)
+ info = psb_success_
+
+ call a%psb_c_csr_sparse_mat%allocate(m, n, nz)
+
+ if (.not.allocated(a%val)) then
+ allocate(a%val(nz))
+ allocate(a%ja(nz))
+ allocate(a%irp(m+1))
+ end if
+
+ call a%set_dev()
+ if (info /= 0) goto 9999
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_c_oacc_csr_allocate_mnnz
+end submodule psb_c_oacc_csr_allocate_mnnz_impl
diff --git a/openacc/impl/psb_c_oacc_csr_cp_from_coo.F90 b/openacc/impl/psb_c_oacc_csr_cp_from_coo.F90
new file mode 100644
index 00000000..70380c95
--- /dev/null
+++ b/openacc/impl/psb_c_oacc_csr_cp_from_coo.F90
@@ -0,0 +1,26 @@
+submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_cp_from_coo_impl
+ use psb_base_mod
+contains
+ module subroutine psb_c_oacc_csr_cp_from_coo(a, b, info)
+ implicit none
+
+ class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
+ class(psb_c_coo_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ info = psb_success_
+
+ call a%psb_c_csr_sparse_mat%cp_from_coo(b, info)
+ if (info /= 0) goto 9999
+
+ call a%set_dev()
+ if (info /= 0) goto 9999
+
+ return
+
+9999 continue
+ info = psb_err_alloc_dealloc_
+ return
+
+ end subroutine psb_c_oacc_csr_cp_from_coo
+end submodule psb_c_oacc_csr_cp_from_coo_impl
diff --git a/openacc/impl/psb_c_oacc_csr_cp_from_fmt.F90 b/openacc/impl/psb_c_oacc_csr_cp_from_fmt.F90
new file mode 100644
index 00000000..7e664791
--- /dev/null
+++ b/openacc/impl/psb_c_oacc_csr_cp_from_fmt.F90
@@ -0,0 +1,24 @@
+submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_cp_from_fmt_impl
+ use psb_base_mod
+contains
+ module subroutine psb_c_oacc_csr_cp_from_fmt(a, b, info)
+ implicit none
+
+ class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
+ class(psb_c_base_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ info = psb_success_
+
+ select type(b)
+ type is (psb_c_coo_sparse_mat)
+ call a%cp_from_coo(b, info)
+ class default
+ call a%psb_c_csr_sparse_mat%cp_from_fmt(b, info)
+ if (info /= 0) return
+
+ !$acc update device(a%val, a%ja, a%irp)
+ end select
+
+ end subroutine psb_c_oacc_csr_cp_from_fmt
+end submodule psb_c_oacc_csr_cp_from_fmt_impl
diff --git a/openacc/impl/psb_c_oacc_csr_csmm.F90 b/openacc/impl/psb_c_oacc_csr_csmm.F90
new file mode 100644
index 00000000..c26df410
--- /dev/null
+++ b/openacc/impl/psb_c_oacc_csr_csmm.F90
@@ -0,0 +1,86 @@
+submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_csmm_impl
+ use psb_base_mod
+contains
+ module subroutine psb_c_oacc_csr_csmm(alpha, a, x, beta, y, info, trans)
+ implicit none
+ class(psb_c_oacc_csr_sparse_mat), intent(in) :: a
+ complex(psb_spk_), intent(in) :: alpha, beta
+ complex(psb_spk_), intent(in) :: x(:,:)
+ complex(psb_spk_), intent(inout) :: y(:,:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ character :: trans_
+ integer(psb_ipk_) :: i, j, m, n,k, nxy
+ logical :: tra
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 'c_oacc_csmm'
+ logical, parameter :: debug = .false.
+
+ info = psb_success_
+ call psb_erractionsave(err_act)
+
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+
+ if (.not.a%is_asb()) then
+ info = psb_err_invalic_mat_state_
+ call psb_errpush(info, name)
+ goto 9999
+ endif
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
+
+ if (tra) then
+ m = a%get_ncols()
+ n = a%get_nrows()
+ else
+ n = a%get_ncols()
+ m = a%get_nrows()
+ end if
+
+ if (size(x,1) < n) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (size(y,1) < m) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (tra) then
+ call a%psb_c_csr_sparse_mat%spmm(alpha, x, beta, y, info, trans)
+ else
+ nxy = min(size(x,2), size(y,2))
+
+ !$acc parallel loop collapse(2) present(a, x, y)
+ do j = 1, nxy
+ do i = 1, m
+ y(i,j) = beta * y(i,j)
+ end do
+ end do
+
+ !$acc parallel loop collapse(2) present(a, x, y)
+ do j = 1, nxy
+ do i = 1, n
+ do k = a%irp(i), a%irp(i+1) - 1
+ y(a%ja(k), j) = y(a%ja(k), j) + alpha * a%val(k) * x(i, j)
+ end do
+ end do
+ end do
+ endif
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_c_oacc_csr_csmm
+end submodule psb_c_oacc_csr_csmm_impl
+
diff --git a/openacc/impl/psb_c_oacc_csr_csmv.F90 b/openacc/impl/psb_c_oacc_csr_csmv.F90
new file mode 100644
index 00000000..8f37efb3
--- /dev/null
+++ b/openacc/impl/psb_c_oacc_csr_csmv.F90
@@ -0,0 +1,81 @@
+submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_csmv_impl
+ use psb_base_mod
+contains
+ module subroutine psb_c_oacc_csr_csmv(alpha, a, x, beta, y, info, trans)
+ implicit none
+ class(psb_c_oacc_csr_sparse_mat), intent(in) :: a
+ complex(psb_spk_), intent(in) :: alpha, beta
+ complex(psb_spk_), intent(in) :: x(:)
+ complex(psb_spk_), intent(inout) :: y(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ character :: trans_
+ integer(psb_ipk_) :: i, j, m, n
+ logical :: tra
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 'c_oacc_csmv'
+ logical, parameter :: debug = .false.
+
+ call psb_erractionsave(err_act)
+ info = psb_success_
+
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+
+ if (.not.a%is_asb()) then
+ info = psb_err_invalic_mat_state_
+ call psb_errpush(info, name)
+ goto 9999
+ endif
+
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
+
+ if (tra) then
+ m = a%get_ncols()
+ n = a%get_nrows()
+ else
+ n = a%get_ncols()
+ m = a%get_nrows()
+ end if
+
+ if (size(x,1) < n) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (size(y,1) < m) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (tra) then
+ call a%psb_c_csr_sparse_mat%spmm(alpha, x, beta, y, info, trans)
+ else
+ !$acc parallel loop present(a, x, y)
+ do i = 1, m
+ y(i) = beta * y(i)
+ end do
+
+ !$acc parallel loop present(a, x, y)
+ do i = 1, n
+ do j = a%irp(i), a%irp(i+1) - 1
+ y(a%ja(j)) = y(a%ja(j)) + alpha * a%val(j) * x(i)
+ end do
+ end do
+ endif
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_c_oacc_csr_csmv
+end submodule psb_c_oacc_csr_csmv_impl
+
diff --git a/openacc/impl/psb_c_oacc_csr_inner_vect_sv.F90 b/openacc/impl/psb_c_oacc_csr_inner_vect_sv.F90
new file mode 100644
index 00000000..2d733f48
--- /dev/null
+++ b/openacc/impl/psb_c_oacc_csr_inner_vect_sv.F90
@@ -0,0 +1,83 @@
+submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_inner_vect_sv_impl
+ use psb_base_mod
+contains
+ module subroutine psb_c_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans)
+ implicit none
+ class(psb_c_oacc_csr_sparse_mat), intent(in) :: a
+ complex(psb_spk_), intent(in) :: alpha, beta
+ class(psb_c_base_vect_type), intent(inout) :: x, y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ complex(psb_spk_), allocatable :: rx(:), ry(:)
+ logical :: tra
+ character :: trans_
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 'c_oacc_csr_inner_vect_sv'
+ logical, parameter :: debug = .false.
+ integer(psb_ipk_) :: i
+
+ call psb_get_erraction(err_act)
+ info = psb_success_
+
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+
+ if (.not.a%is_asb()) then
+ info = psb_err_invalic_mat_state_
+ call psb_errpush(info, name)
+ goto 9999
+ endif
+
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
+
+ if (tra .or. (beta /= dzero)) then
+ call x%sync()
+ call y%sync()
+ call a%psb_c_csr_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans)
+ call y%set_host()
+ else
+ select type (xx => x)
+ type is (psb_c_vect_oacc)
+ select type(yy => y)
+ type is (psb_c_vect_oacc)
+ if (xx%is_host()) call xx%sync()
+ if (beta /= dzero) then
+ if (yy%is_host()) call yy%sync()
+ end if
+ !$acc parallel loop present(a, xx, yy)
+ do i = 1, size(a%val)
+ yy%v(i) = alpha * a%val(i) * xx%v(a%ja(i)) + beta * yy%v(i)
+ end do
+ call yy%set_dev()
+ class default
+ rx = xx%get_vect()
+ ry = y%get_vect()
+ call a%psb_c_csr_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
+ call y%bld(ry)
+ end select
+ class default
+ rx = x%get_vect()
+ ry = y%get_vect()
+ call a%psb_c_csr_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
+ call y%bld(ry)
+ end select
+ endif
+
+ if (info /= psb_success_) then
+ info = psb_err_from_subroutine_
+ call psb_errpush(info, name, a_err = 'csrg_vect_sv')
+ goto 9999
+ endif
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+ end subroutine psb_c_oacc_csr_inner_vect_sv
+end submodule psb_c_oacc_csr_inner_vect_sv_impl
+
diff --git a/openacc/impl/psb_c_oacc_csr_mold.F90 b/openacc/impl/psb_c_oacc_csr_mold.F90
new file mode 100644
index 00000000..6ee36985
--- /dev/null
+++ b/openacc/impl/psb_c_oacc_csr_mold.F90
@@ -0,0 +1,35 @@
+submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_mold_impl
+ use psb_base_mod
+contains
+ module subroutine psb_c_oacc_csr_mold(a, b, info)
+ implicit none
+ class(psb_c_oacc_csr_sparse_mat), intent(in) :: a
+ class(psb_c_base_sparse_mat), intent(inout), allocatable :: b
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name='csr_mold'
+ logical, parameter :: debug=.false.
+
+ call psb_get_erraction(err_act)
+
+ info = 0
+ if (allocated(b)) then
+ call b%free()
+ deallocate(b, stat=info)
+ end if
+ if (info == 0) allocate(psb_c_oacc_csr_sparse_mat :: b, stat=info)
+
+ if (info /= psb_success_) then
+ info = psb_err_alloc_dealloc_
+ call psb_errpush(info, name)
+ goto 9999
+ end if
+ return
+
+9999 call psb_error_handler(err_act)
+
+ return
+
+ end subroutine psb_c_oacc_csr_mold
+end submodule psb_c_oacc_csr_molc_impl
+
diff --git a/openacc/impl/psb_c_oacc_csr_mv_from_coo.F90 b/openacc/impl/psb_c_oacc_csr_mv_from_coo.F90
new file mode 100644
index 00000000..f8c5c39d
--- /dev/null
+++ b/openacc/impl/psb_c_oacc_csr_mv_from_coo.F90
@@ -0,0 +1,25 @@
+submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_mv_from_coo_impl
+ use psb_base_mod
+contains
+ module subroutine psb_c_oacc_csr_mv_from_coo(a, b, info)
+ implicit none
+
+ class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
+ class(psb_c_coo_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ info = psb_success_
+
+ call a%psb_c_csr_sparse_mat%mv_from_coo(b, info)
+ if (info /= 0) goto 9999
+
+ !$acc update device(a%val, a%ja, a%irp)
+
+ return
+
+9999 continue
+ info = psb_err_alloc_dealloc_
+ return
+
+ end subroutine psb_c_oacc_csr_mv_from_coo
+end submodule psb_c_oacc_csr_mv_from_coo_impl
diff --git a/openacc/impl/psb_c_oacc_csr_mv_from_fmt.F90 b/openacc/impl/psb_c_oacc_csr_mv_from_fmt.F90
new file mode 100644
index 00000000..7ba971b4
--- /dev/null
+++ b/openacc/impl/psb_c_oacc_csr_mv_from_fmt.F90
@@ -0,0 +1,24 @@
+submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_mv_from_fmt_impl
+ use psb_base_mod
+contains
+ module subroutine psb_c_oacc_csr_mv_from_fmt(a, b, info)
+ implicit none
+
+ class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
+ class(psb_c_base_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ info = psb_success_
+
+ select type(b)
+ type is (psb_c_coo_sparse_mat)
+ call a%mv_from_coo(b, info)
+ class default
+ call a%psb_c_csr_sparse_mat%mv_from_fmt(b, info)
+ if (info /= 0) return
+
+ !$acc update device(a%val, a%ja, a%irp)
+ end select
+
+ end subroutine psb_c_oacc_csr_mv_from_fmt
+end submodule psb_c_oacc_csr_mv_from_fmt_impl
diff --git a/openacc/impl/psb_c_oacc_csr_reallocate_nz.F90 b/openacc/impl/psb_c_oacc_csr_reallocate_nz.F90
new file mode 100644
index 00000000..92a53116
--- /dev/null
+++ b/openacc/impl/psb_c_oacc_csr_reallocate_nz.F90
@@ -0,0 +1,28 @@
+submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_reallocate_nz_impl
+ use psb_base_mod
+contains
+ module subroutine psb_c_oacc_csr_reallocate_nz(nz, a)
+ implicit none
+ integer(psb_ipk_), intent(in) :: nz
+ class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name='c_oacc_csr_reallocate_nz'
+ logical, parameter :: debug=.false.
+
+ call psb_erractionsave(err_act)
+ info = psb_success_
+
+ call a%psb_c_csr_sparse_mat%reallocate(nz)
+
+ call a%set_dev()
+ if (info /= 0) goto 9999
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_c_oacc_csr_reallocate_nz
+end submodule psb_c_oacc_csr_reallocate_nz_impl
diff --git a/openacc/impl/psb_c_oacc_csr_scal.F90 b/openacc/impl/psb_c_oacc_csr_scal.F90
new file mode 100644
index 00000000..5dece48b
--- /dev/null
+++ b/openacc/impl/psb_c_oacc_csr_scal.F90
@@ -0,0 +1,53 @@
+submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_scal_impl
+ use psb_base_mod
+contains
+ module subroutine psb_c_oacc_csr_scal(d, a, info, side)
+ implicit none
+ class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
+ complex(psb_spk_), intent(in) :: d(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, intent(in), optional :: side
+
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name='scal'
+ logical, parameter :: debug=.false.
+ integer(psb_ipk_) :: i, j
+
+ info = psb_success_
+ call psb_erractionsave(err_act)
+
+ if (a%is_host()) call a%sync()
+
+ if (present(side)) then
+ if (side == 'L') then
+ !$acc parallel loop present(a, d)
+ do i = 1, a%get_nrows()
+ do j = a%irp(i), a%irp(i+1) - 1
+ a%val(j) = a%val(j) * d(i)
+ end do
+ end do
+ else if (side == 'R') then
+ !$acc parallel loop present(a, d)
+ do i = 1, a%get_ncols()
+ do j = a%irp(i), a%irp(i+1) - 1
+ a%val(j) = a%val(j) * d(a%ja(j))
+ end do
+ end do
+ end if
+ else
+ !$acc parallel loop present(a, d)
+ do i = 1, size(a%val)
+ a%val(i) = a%val(i) * d(i)
+ end do
+ end if
+
+ call a%set_dev()
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_c_oacc_csr_scal
+end submodule psb_c_oacc_csr_scal_impl
diff --git a/openacc/impl/psb_c_oacc_csr_scals.F90 b/openacc/impl/psb_c_oacc_csr_scals.F90
new file mode 100644
index 00000000..aba22d43
--- /dev/null
+++ b/openacc/impl/psb_c_oacc_csr_scals.F90
@@ -0,0 +1,34 @@
+submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_scals_impl
+ use psb_base_mod
+contains
+ module subroutine psb_c_oacc_csr_scals(d, a, info)
+ implicit none
+ class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
+ complex(psb_spk_), intent(in) :: d
+ integer(psb_ipk_), intent(out) :: info
+
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name='scal'
+ logical, parameter :: debug=.false.
+ integer(psb_ipk_) :: i
+
+ info = psb_success_
+ call psb_erractionsave(err_act)
+
+ if (a%is_host()) call a%sync()
+
+ !$acc parallel loop present(a)
+ do i = 1, size(a%val)
+ a%val(i) = a%val(i) * d
+ end do
+
+ call a%set_dev()
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_c_oacc_csr_scals
+end submodule psb_c_oacc_csr_scals_impl
diff --git a/openacc/impl/psb_c_oacc_csr_vect_mv.F90 b/openacc/impl/psb_c_oacc_csr_vect_mv.F90
new file mode 100644
index 00000000..b4b79d56
--- /dev/null
+++ b/openacc/impl/psb_c_oacc_csr_vect_mv.F90
@@ -0,0 +1,63 @@
+submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_vect_mv_impl
+ use psb_base_mod
+contains
+ module subroutine psb_c_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans)
+ implicit none
+
+ complex(psb_spk_), intent(in) :: alpha, beta
+ class(psb_c_oacc_csr_sparse_mat), intent(in) :: a
+ class(psb_c_base_vect_type), intent(inout) :: x, y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ integer(psb_ipk_) :: m, n
+
+ info = psb_success_
+ m = a%get_nrows()
+ n = a%get_ncols()
+
+ if ((n /= size(x%v)) .or. (n /= size(y%v))) then
+ write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
+ info = psb_err_invalic_mat_state_
+ return
+ end if
+
+ if (a%is_host()) call a%sync()
+ if (x%is_host()) call x%sync()
+ if (y%is_host()) call y%sync()
+
+ call inner_spmv(m, n, alpha, a%val, a%ja, a%irp, x%v, beta, y%v, info)
+ call y%set_dev()
+
+ contains
+
+ subroutine inner_spmv(m, n, alpha, val, ja, irp, x, beta, y, info)
+ implicit none
+ integer(psb_ipk_) :: m, n
+ complex(psb_spk_), intent(in) :: alpha, beta
+ complex(psb_spk_) :: val(:), x(:), y(:)
+ integer(psb_ipk_) :: ja(:), irp(:)
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: i, j, ii, isz
+ complex(psb_spk_) :: tmp
+ integer(psb_ipk_), parameter :: vsz = 256
+
+ info = 0
+
+ !$acc parallel loop vector_length(vsz) private(isz)
+ do ii = 1, m, vsz
+ isz = min(vsz, m - ii + 1)
+ !$acc loop independent private(tmp)
+ do i = ii, ii + isz - 1
+ tmp = 0.0_psb_dpk_
+ !$acc loop seq
+ do j = irp(i), irp(i + 1) - 1
+ tmp = tmp + val(j) * x(ja(j))
+ end do
+ y(i) = alpha * tmp + beta * y(i)
+ end do
+ end do
+ end subroutine inner_spmv
+
+ end subroutine psb_c_oacc_csr_vect_mv
+end submodule psb_c_oacc_csr_vect_mv_impl
diff --git a/openacc/impl/psb_c_oacc_mlt_v.f90 b/openacc/impl/psb_c_oacc_mlt_v.f90
new file mode 100644
index 00000000..66c4e865
--- /dev/null
+++ b/openacc/impl/psb_c_oacc_mlt_v.f90
@@ -0,0 +1,31 @@
+
+subroutine c_oacc_mlt_v(x, y, info)
+ use psb_c_oacc_vect_mod, psb_protect_name => c_oacc_mlt_v
+
+ implicit none
+ class(psb_c_base_vect_type), intent(inout) :: x
+ class(psb_c_vect_oacc), intent(inout) :: y
+ integer(psb_ipk_), intent(out) :: info
+
+ integer(psb_ipk_) :: i, n
+
+ info = 0
+ n = min(x%get_nrows(), y%get_nrows())
+ select type(xx => x)
+ class is (psb_c_vect_oacc)
+ if (y%is_host()) call y%sync()
+ if (xx%is_host()) call xx%sync()
+ !$acc parallel loop
+ do i = 1, n
+ y%v(i) = y%v(i) * xx%v(i)
+ end do
+ call y%set_dev()
+ class default
+ if (xx%is_dev()) call xx%sync()
+ if (y%is_dev()) call y%sync()
+ do i = 1, n
+ y%v(i) = y%v(i) * xx%v(i)
+ end do
+ call y%set_host()
+ end select
+end subroutine c_oacc_mlt_v
diff --git a/openacc/impl/psb_c_oacc_mlt_v_2.f90 b/openacc/impl/psb_c_oacc_mlt_v_2.f90
new file mode 100644
index 00000000..a6bb6cc5
--- /dev/null
+++ b/openacc/impl/psb_c_oacc_mlt_v_2.f90
@@ -0,0 +1,98 @@
+subroutine c_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
+ use psb_c_oacc_vect_mod, psb_protect_name => c_oacc_mlt_v_2
+ use psb_string_mod
+ implicit none
+ complex(psb_spk_), intent(in) :: alpha, beta
+ class(psb_c_base_vect_type), intent(inout) :: x
+ class(psb_c_base_vect_type), intent(inout) :: y
+ class(psb_c_vect_oacc), intent(inout) :: z
+ integer(psb_ipk_), intent(out) :: info
+ character(len=1), intent(in), optional :: conjgx, conjgy
+ integer(psb_ipk_) :: i, n
+ logical :: conjgx_, conjgy_
+
+ conjgx_ = .false.
+ conjgy_ = .false.
+ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C')
+ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C')
+
+ n = min(x%get_nrows(), y%get_nrows(), z%get_nrows())
+ info = 0
+ select type(xx => x)
+ class is (psb_c_vect_oacc)
+ select type (yy => y)
+ class is (psb_c_vect_oacc)
+ if (xx%is_host()) call xx%sync()
+ if (yy%is_host()) call yy%sync()
+ if ((beta /= czero) .and. (z%is_host())) call z%sync()
+ if (conjgx_.and.conjgy_) then
+ !$acc parallel loop
+ do i = 1, n
+ z%v(i) = alpha * conjg(xx%v(i)) * conjg(yy%v(i)) + beta * z%v(i)
+ end do
+ else if (conjgx_.and.(.not.conjgy_)) then
+ !$acc parallel loop
+ do i = 1, n
+ z%v(i) = alpha * conjg(xx%v(i)) * (yy%v(i)) + beta * z%v(i)
+ end do
+ else if ((.not.conjgx_).and.(conjgy_)) then
+ !$acc parallel loop
+ do i = 1, n
+ z%v(i) = alpha * (xx%v(i)) * conjg(yy%v(i)) + beta * z%v(i)
+ end do
+ else
+ !$acc parallel loop
+ do i = 1, n
+ z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
+ end do
+
+ end if
+ call z%set_dev()
+ class default
+ if (xx%is_dev()) call xx%sync()
+ if (yy%is_dev()) call yy%sync()
+ if ((beta /= czero) .and. (z%is_dev())) call z%sync()
+ if (conjgx_.and.conjgy_) then
+ do i = 1, n
+ z%v(i) = alpha * conjg(xx%v(i)) * conjg(yy%v(i)) + beta * z%v(i)
+ end do
+ else if (conjgx_.and.(.not.conjgy_)) then
+ do i = 1, n
+ z%v(i) = alpha * conjg(xx%v(i)) * (yy%v(i)) + beta * z%v(i)
+ end do
+ else if ((.not.conjgx_).and.(conjgy_)) then
+ do i = 1, n
+ z%v(i) = alpha * (xx%v(i)) * conjg(yy%v(i)) + beta * z%v(i)
+ end do
+ else
+ do i = 1, n
+ z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
+ end do
+ end if
+ call z%set_host()
+ end select
+ class default
+ if (x%is_dev()) call x%sync()
+ if (y%is_dev()) call y%sync()
+ if ((beta /= czero) .and. (z%is_dev())) call z%sync()
+ if (conjgx_.and.conjgy_) then
+ do i = 1, n
+ z%v(i) = alpha * conjg(x%v(i)) * conjg(y%v(i)) + beta * z%v(i)
+ end do
+ else if (conjgx_.and.(.not.conjgy_)) then
+ do i = 1, n
+ z%v(i) = alpha * conjg(x%v(i)) * (y%v(i)) + beta * z%v(i)
+ end do
+ else if ((.not.conjgx_).and.(conjgy_)) then
+ do i = 1, n
+ z%v(i) = alpha * (x%v(i)) * conjg(y%v(i)) + beta * z%v(i)
+ end do
+ else
+ do i = 1, n
+ z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i)
+ end do
+ end if
+ call z%set_host()
+ end select
+end subroutine c_oacc_mlt_v_2
+
diff --git a/openacc/impl/psb_d_oacc_mlt_v.f90 b/openacc/impl/psb_d_oacc_mlt_v.f90
index ab242b57..bedd0247 100644
--- a/openacc/impl/psb_d_oacc_mlt_v.f90
+++ b/openacc/impl/psb_d_oacc_mlt_v.f90
@@ -1,34 +1,31 @@
-submodule (psb_d_oacc_vect_mod) psb_d_oacc_mlt_v_impl
- use psb_string_mod
-contains
- module subroutine psb_d_oacc_mlt_v(x, y, info)
+subroutine d_oacc_mlt_v(x, y, info)
+ use psb_d_oacc_vect_mod, psb_protect_name => d_oacc_mlt_v
- implicit none
- class(psb_d_base_vect_type), intent(inout) :: x
- class(psb_d_vect_oacc), intent(inout) :: y
- integer(psb_ipk_), intent(out) :: info
+ implicit none
+ class(psb_d_base_vect_type), intent(inout) :: x
+ class(psb_d_vect_oacc), intent(inout) :: y
+ integer(psb_ipk_), intent(out) :: info
- integer(psb_ipk_) :: i, n
+ integer(psb_ipk_) :: i, n
- info = 0
- n = min(x%get_nrows(), y%get_nrows())
- select type(xx => x)
- class is (psb_d_vect_oacc)
- if (y%is_host()) call y%sync()
- if (xx%is_host()) call xx%sync()
- !$acc parallel loop
- do i = 1, n
- y%v(i) = y%v(i) * xx%v(i)
- end do
- call y%set_dev()
- class default
- if (xx%is_dev()) call xx%sync()
- if (y%is_dev()) call y%sync()
- do i = 1, n
- y%v(i) = y%v(i) * xx%v(i)
- end do
- call y%set_host()
- end select
- end subroutine psb_d_oacc_mlt_v
-end submodule psb_d_oacc_mlt_v_impl
+ info = 0
+ n = min(x%get_nrows(), y%get_nrows())
+ select type(xx => x)
+ class is (psb_d_vect_oacc)
+ if (y%is_host()) call y%sync()
+ if (xx%is_host()) call xx%sync()
+ !$acc parallel loop
+ do i = 1, n
+ y%v(i) = y%v(i) * xx%v(i)
+ end do
+ call y%set_dev()
+ class default
+ if (xx%is_dev()) call xx%sync()
+ if (y%is_dev()) call y%sync()
+ do i = 1, n
+ y%v(i) = y%v(i) * xx%v(i)
+ end do
+ call y%set_host()
+ end select
+end subroutine d_oacc_mlt_v
diff --git a/openacc/impl/psb_d_oacc_mlt_v_2.f90 b/openacc/impl/psb_d_oacc_mlt_v_2.f90
index 4ca2bdab..e7dd604f 100644
--- a/openacc/impl/psb_d_oacc_mlt_v_2.f90
+++ b/openacc/impl/psb_d_oacc_mlt_v_2.f90
@@ -1,55 +1,98 @@
-submodule (psb_d_oacc_vect_mod) d_oacc_mlt_v_2_impl
+subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
+ use psb_d_oacc_vect_mod, psb_protect_name => d_oacc_mlt_v_2
use psb_string_mod
-contains
- module subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
- implicit none
- real(psb_dpk_), intent(in) :: alpha, beta
- class(psb_d_base_vect_type), intent(inout) :: x
- class(psb_d_base_vect_type), intent(inout) :: y
- class(psb_d_vect_oacc), intent(inout) :: z
- integer(psb_ipk_), intent(out) :: info
- character(len=1), intent(in), optional :: conjgx, conjgy
- integer(psb_ipk_) :: i, n
- logical :: conjgx_, conjgy_
+ implicit none
+ real(psb_dpk_), intent(in) :: alpha, beta
+ class(psb_d_base_vect_type), intent(inout) :: x
+ class(psb_d_base_vect_type), intent(inout) :: y
+ class(psb_d_vect_oacc), intent(inout) :: z
+ integer(psb_ipk_), intent(out) :: info
+ character(len=1), intent(in), optional :: conjgx, conjgy
+ integer(psb_ipk_) :: i, n
+ logical :: conjgx_, conjgy_
- conjgx_ = .false.
- conjgy_ = .false.
- if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C')
- if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C')
+ conjgx_ = .false.
+ conjgy_ = .false.
+ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C')
+ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C')
- n = min(x%get_nrows(), y%get_nrows(), z%get_nrows())
-
- info = 0
- select type(xx => x)
+ n = min(x%get_nrows(), y%get_nrows(), z%get_nrows())
+ info = 0
+ select type(xx => x)
+ class is (psb_d_vect_oacc)
+ select type (yy => y)
class is (psb_d_vect_oacc)
- select type (yy => y)
- class is (psb_d_vect_oacc)
- if (xx%is_host()) call xx%sync()
- if (yy%is_host()) call yy%sync()
- if ((beta /= dzero) .and. (z%is_host())) call z%sync()
+ if (xx%is_host()) call xx%sync()
+ if (yy%is_host()) call yy%sync()
+ if ((beta /= dzero) .and. (z%is_host())) call z%sync()
+ if (conjgx_.and.conjgy_) then
+ !$acc parallel loop
+ do i = 1, n
+ z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
+ end do
+ else if (conjgx_.and.(.not.conjgy_)) then
!$acc parallel loop
do i = 1, n
- z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
+ z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
end do
- call z%set_dev()
- class default
- if (xx%is_dev()) call xx%sync()
- if (yy%is_dev()) call yy%sync()
- if ((beta /= dzero) .and. (z%is_dev())) call z%sync()
+ else if ((.not.conjgx_).and.(conjgy_)) then
+ !$acc parallel loop
do i = 1, n
- z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
+ z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
end do
- call z%set_host()
- end select
+ else
+ !$acc parallel loop
+ do i = 1, n
+ z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
+ end do
+
+ end if
+ call z%set_dev()
class default
- if (x%is_dev()) call x%sync()
- if (y%is_dev()) call y%sync()
+ if (xx%is_dev()) call xx%sync()
+ if (yy%is_dev()) call yy%sync()
if ((beta /= dzero) .and. (z%is_dev())) call z%sync()
- do i = 1, n
- z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i)
- end do
+ if (conjgx_.and.conjgy_) then
+ do i = 1, n
+ z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
+ end do
+ else if (conjgx_.and.(.not.conjgy_)) then
+ do i = 1, n
+ z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
+ end do
+ else if ((.not.conjgx_).and.(conjgy_)) then
+ do i = 1, n
+ z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
+ end do
+ else
+ do i = 1, n
+ z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
+ end do
+ end if
call z%set_host()
end select
- end subroutine d_oacc_mlt_v_2
-end submodule d_oacc_mlt_v_2_impl
+ class default
+ if (x%is_dev()) call x%sync()
+ if (y%is_dev()) call y%sync()
+ if ((beta /= dzero) .and. (z%is_dev())) call z%sync()
+ if (conjgx_.and.conjgy_) then
+ do i = 1, n
+ z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i)
+ end do
+ else if (conjgx_.and.(.not.conjgy_)) then
+ do i = 1, n
+ z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i)
+ end do
+ else if ((.not.conjgx_).and.(conjgy_)) then
+ do i = 1, n
+ z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i)
+ end do
+ else
+ do i = 1, n
+ z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i)
+ end do
+ end if
+ call z%set_host()
+ end select
+end subroutine d_oacc_mlt_v_2
diff --git a/openacc/impl/psb_s_oacc_csr_allocate_mnnz.F90 b/openacc/impl/psb_s_oacc_csr_allocate_mnnz.F90
new file mode 100644
index 00000000..08c51bce
--- /dev/null
+++ b/openacc/impl/psb_s_oacc_csr_allocate_mnnz.F90
@@ -0,0 +1,35 @@
+submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_allocate_mnnz_impl
+ use psb_base_mod
+contains
+ module subroutine psb_s_oacc_csr_allocate_mnnz(m, n, a, nz)
+ implicit none
+ integer(psb_ipk_), intent(in) :: m, n
+ class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_), intent(in), optional :: nz
+ integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: err_act, nz_
+ character(len=20) :: name='allocate_mnz'
+ logical, parameter :: debug=.false.
+
+ call psb_erractionsave(err_act)
+ info = psb_success_
+
+ call a%psb_s_csr_sparse_mat%allocate(m, n, nz)
+
+ if (.not.allocated(a%val)) then
+ allocate(a%val(nz))
+ allocate(a%ja(nz))
+ allocate(a%irp(m+1))
+ end if
+
+ call a%set_dev()
+ if (info /= 0) goto 9999
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_s_oacc_csr_allocate_mnnz
+end submodule psb_s_oacc_csr_allocate_mnnz_impl
diff --git a/openacc/impl/psb_s_oacc_csr_cp_from_coo.F90 b/openacc/impl/psb_s_oacc_csr_cp_from_coo.F90
new file mode 100644
index 00000000..94ef67b3
--- /dev/null
+++ b/openacc/impl/psb_s_oacc_csr_cp_from_coo.F90
@@ -0,0 +1,26 @@
+submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_cp_from_coo_impl
+ use psb_base_mod
+contains
+ module subroutine psb_s_oacc_csr_cp_from_coo(a, b, info)
+ implicit none
+
+ class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
+ class(psb_s_coo_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ info = psb_success_
+
+ call a%psb_s_csr_sparse_mat%cp_from_coo(b, info)
+ if (info /= 0) goto 9999
+
+ call a%set_dev()
+ if (info /= 0) goto 9999
+
+ return
+
+9999 continue
+ info = psb_err_alloc_dealloc_
+ return
+
+ end subroutine psb_s_oacc_csr_cp_from_coo
+end submodule psb_s_oacc_csr_cp_from_coo_impl
diff --git a/openacc/impl/psb_s_oacc_csr_cp_from_fmt.F90 b/openacc/impl/psb_s_oacc_csr_cp_from_fmt.F90
new file mode 100644
index 00000000..2c64b5fe
--- /dev/null
+++ b/openacc/impl/psb_s_oacc_csr_cp_from_fmt.F90
@@ -0,0 +1,24 @@
+submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_cp_from_fmt_impl
+ use psb_base_mod
+contains
+ module subroutine psb_s_oacc_csr_cp_from_fmt(a, b, info)
+ implicit none
+
+ class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
+ class(psb_s_base_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ info = psb_success_
+
+ select type(b)
+ type is (psb_s_coo_sparse_mat)
+ call a%cp_from_coo(b, info)
+ class default
+ call a%psb_s_csr_sparse_mat%cp_from_fmt(b, info)
+ if (info /= 0) return
+
+ !$acc update device(a%val, a%ja, a%irp)
+ end select
+
+ end subroutine psb_s_oacc_csr_cp_from_fmt
+end submodule psb_s_oacc_csr_cp_from_fmt_impl
diff --git a/openacc/impl/psb_s_oacc_csr_csmm.F90 b/openacc/impl/psb_s_oacc_csr_csmm.F90
new file mode 100644
index 00000000..2e7def53
--- /dev/null
+++ b/openacc/impl/psb_s_oacc_csr_csmm.F90
@@ -0,0 +1,86 @@
+submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_csmm_impl
+ use psb_base_mod
+contains
+ module subroutine psb_s_oacc_csr_csmm(alpha, a, x, beta, y, info, trans)
+ implicit none
+ class(psb_s_oacc_csr_sparse_mat), intent(in) :: a
+ real(psb_spk_), intent(in) :: alpha, beta
+ real(psb_spk_), intent(in) :: x(:,:)
+ real(psb_spk_), intent(inout) :: y(:,:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ character :: trans_
+ integer(psb_ipk_) :: i, j, m, n,k, nxy
+ logical :: tra
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 's_oacc_csmm'
+ logical, parameter :: debug = .false.
+
+ info = psb_success_
+ call psb_erractionsave(err_act)
+
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+
+ if (.not.a%is_asb()) then
+ info = psb_err_invalis_mat_state_
+ call psb_errpush(info, name)
+ goto 9999
+ endif
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
+
+ if (tra) then
+ m = a%get_ncols()
+ n = a%get_nrows()
+ else
+ n = a%get_ncols()
+ m = a%get_nrows()
+ end if
+
+ if (size(x,1) < n) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (size(y,1) < m) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (tra) then
+ call a%psb_s_csr_sparse_mat%spmm(alpha, x, beta, y, info, trans)
+ else
+ nxy = min(size(x,2), size(y,2))
+
+ !$acc parallel loop collapse(2) present(a, x, y)
+ do j = 1, nxy
+ do i = 1, m
+ y(i,j) = beta * y(i,j)
+ end do
+ end do
+
+ !$acc parallel loop collapse(2) present(a, x, y)
+ do j = 1, nxy
+ do i = 1, n
+ do k = a%irp(i), a%irp(i+1) - 1
+ y(a%ja(k), j) = y(a%ja(k), j) + alpha * a%val(k) * x(i, j)
+ end do
+ end do
+ end do
+ endif
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_s_oacc_csr_csmm
+end submodule psb_s_oacc_csr_csmm_impl
+
diff --git a/openacc/impl/psb_s_oacc_csr_csmv.F90 b/openacc/impl/psb_s_oacc_csr_csmv.F90
new file mode 100644
index 00000000..ba673941
--- /dev/null
+++ b/openacc/impl/psb_s_oacc_csr_csmv.F90
@@ -0,0 +1,81 @@
+submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_csmv_impl
+ use psb_base_mod
+contains
+ module subroutine psb_s_oacc_csr_csmv(alpha, a, x, beta, y, info, trans)
+ implicit none
+ class(psb_s_oacc_csr_sparse_mat), intent(in) :: a
+ real(psb_spk_), intent(in) :: alpha, beta
+ real(psb_spk_), intent(in) :: x(:)
+ real(psb_spk_), intent(inout) :: y(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ character :: trans_
+ integer(psb_ipk_) :: i, j, m, n
+ logical :: tra
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 's_oacc_csmv'
+ logical, parameter :: debug = .false.
+
+ call psb_erractionsave(err_act)
+ info = psb_success_
+
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+
+ if (.not.a%is_asb()) then
+ info = psb_err_invalis_mat_state_
+ call psb_errpush(info, name)
+ goto 9999
+ endif
+
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
+
+ if (tra) then
+ m = a%get_ncols()
+ n = a%get_nrows()
+ else
+ n = a%get_ncols()
+ m = a%get_nrows()
+ end if
+
+ if (size(x,1) < n) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (size(y,1) < m) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (tra) then
+ call a%psb_s_csr_sparse_mat%spmm(alpha, x, beta, y, info, trans)
+ else
+ !$acc parallel loop present(a, x, y)
+ do i = 1, m
+ y(i) = beta * y(i)
+ end do
+
+ !$acc parallel loop present(a, x, y)
+ do i = 1, n
+ do j = a%irp(i), a%irp(i+1) - 1
+ y(a%ja(j)) = y(a%ja(j)) + alpha * a%val(j) * x(i)
+ end do
+ end do
+ endif
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_s_oacc_csr_csmv
+end submodule psb_s_oacc_csr_csmv_impl
+
diff --git a/openacc/impl/psb_s_oacc_csr_inner_vect_sv.F90 b/openacc/impl/psb_s_oacc_csr_inner_vect_sv.F90
new file mode 100644
index 00000000..7af897a7
--- /dev/null
+++ b/openacc/impl/psb_s_oacc_csr_inner_vect_sv.F90
@@ -0,0 +1,83 @@
+submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_inner_vect_sv_impl
+ use psb_base_mod
+contains
+ module subroutine psb_s_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans)
+ implicit none
+ class(psb_s_oacc_csr_sparse_mat), intent(in) :: a
+ real(psb_spk_), intent(in) :: alpha, beta
+ class(psb_s_base_vect_type), intent(inout) :: x, y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ real(psb_spk_), allocatable :: rx(:), ry(:)
+ logical :: tra
+ character :: trans_
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 's_oacc_csr_inner_vect_sv'
+ logical, parameter :: debug = .false.
+ integer(psb_ipk_) :: i
+
+ call psb_get_erraction(err_act)
+ info = psb_success_
+
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+
+ if (.not.a%is_asb()) then
+ info = psb_err_invalis_mat_state_
+ call psb_errpush(info, name)
+ goto 9999
+ endif
+
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
+
+ if (tra .or. (beta /= dzero)) then
+ call x%sync()
+ call y%sync()
+ call a%psb_s_csr_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans)
+ call y%set_host()
+ else
+ select type (xx => x)
+ type is (psb_s_vect_oacc)
+ select type(yy => y)
+ type is (psb_s_vect_oacc)
+ if (xx%is_host()) call xx%sync()
+ if (beta /= dzero) then
+ if (yy%is_host()) call yy%sync()
+ end if
+ !$acc parallel loop present(a, xx, yy)
+ do i = 1, size(a%val)
+ yy%v(i) = alpha * a%val(i) * xx%v(a%ja(i)) + beta * yy%v(i)
+ end do
+ call yy%set_dev()
+ class default
+ rx = xx%get_vect()
+ ry = y%get_vect()
+ call a%psb_s_csr_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
+ call y%bld(ry)
+ end select
+ class default
+ rx = x%get_vect()
+ ry = y%get_vect()
+ call a%psb_s_csr_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
+ call y%bld(ry)
+ end select
+ endif
+
+ if (info /= psb_success_) then
+ info = psb_err_from_subroutine_
+ call psb_errpush(info, name, a_err = 'csrg_vect_sv')
+ goto 9999
+ endif
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+ end subroutine psb_s_oacc_csr_inner_vect_sv
+end submodule psb_s_oacc_csr_inner_vect_sv_impl
+
diff --git a/openacc/impl/psb_s_oacc_csr_mold.F90 b/openacc/impl/psb_s_oacc_csr_mold.F90
new file mode 100644
index 00000000..a85471e5
--- /dev/null
+++ b/openacc/impl/psb_s_oacc_csr_mold.F90
@@ -0,0 +1,35 @@
+submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_mold_impl
+ use psb_base_mod
+contains
+ module subroutine psb_s_oacc_csr_mold(a, b, info)
+ implicit none
+ class(psb_s_oacc_csr_sparse_mat), intent(in) :: a
+ class(psb_s_base_sparse_mat), intent(inout), allocatable :: b
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name='csr_mold'
+ logical, parameter :: debug=.false.
+
+ call psb_get_erraction(err_act)
+
+ info = 0
+ if (allocated(b)) then
+ call b%free()
+ deallocate(b, stat=info)
+ end if
+ if (info == 0) allocate(psb_s_oacc_csr_sparse_mat :: b, stat=info)
+
+ if (info /= psb_success_) then
+ info = psb_err_alloc_dealloc_
+ call psb_errpush(info, name)
+ goto 9999
+ end if
+ return
+
+9999 call psb_error_handler(err_act)
+
+ return
+
+ end subroutine psb_s_oacc_csr_mold
+end submodule psb_s_oacc_csr_mols_impl
+
diff --git a/openacc/impl/psb_s_oacc_csr_mv_from_coo.F90 b/openacc/impl/psb_s_oacc_csr_mv_from_coo.F90
new file mode 100644
index 00000000..e531d309
--- /dev/null
+++ b/openacc/impl/psb_s_oacc_csr_mv_from_coo.F90
@@ -0,0 +1,25 @@
+submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_mv_from_coo_impl
+ use psb_base_mod
+contains
+ module subroutine psb_s_oacc_csr_mv_from_coo(a, b, info)
+ implicit none
+
+ class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
+ class(psb_s_coo_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ info = psb_success_
+
+ call a%psb_s_csr_sparse_mat%mv_from_coo(b, info)
+ if (info /= 0) goto 9999
+
+ !$acc update device(a%val, a%ja, a%irp)
+
+ return
+
+9999 continue
+ info = psb_err_alloc_dealloc_
+ return
+
+ end subroutine psb_s_oacc_csr_mv_from_coo
+end submodule psb_s_oacc_csr_mv_from_coo_impl
diff --git a/openacc/impl/psb_s_oacc_csr_mv_from_fmt.F90 b/openacc/impl/psb_s_oacc_csr_mv_from_fmt.F90
new file mode 100644
index 00000000..a9dc0c70
--- /dev/null
+++ b/openacc/impl/psb_s_oacc_csr_mv_from_fmt.F90
@@ -0,0 +1,24 @@
+submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_mv_from_fmt_impl
+ use psb_base_mod
+contains
+ module subroutine psb_s_oacc_csr_mv_from_fmt(a, b, info)
+ implicit none
+
+ class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
+ class(psb_s_base_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ info = psb_success_
+
+ select type(b)
+ type is (psb_s_coo_sparse_mat)
+ call a%mv_from_coo(b, info)
+ class default
+ call a%psb_s_csr_sparse_mat%mv_from_fmt(b, info)
+ if (info /= 0) return
+
+ !$acc update device(a%val, a%ja, a%irp)
+ end select
+
+ end subroutine psb_s_oacc_csr_mv_from_fmt
+end submodule psb_s_oacc_csr_mv_from_fmt_impl
diff --git a/openacc/impl/psb_s_oacc_csr_reallocate_nz.F90 b/openacc/impl/psb_s_oacc_csr_reallocate_nz.F90
new file mode 100644
index 00000000..77c17120
--- /dev/null
+++ b/openacc/impl/psb_s_oacc_csr_reallocate_nz.F90
@@ -0,0 +1,28 @@
+submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_reallocate_nz_impl
+ use psb_base_mod
+contains
+ module subroutine psb_s_oacc_csr_reallocate_nz(nz, a)
+ implicit none
+ integer(psb_ipk_), intent(in) :: nz
+ class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name='s_oacc_csr_reallocate_nz'
+ logical, parameter :: debug=.false.
+
+ call psb_erractionsave(err_act)
+ info = psb_success_
+
+ call a%psb_s_csr_sparse_mat%reallocate(nz)
+
+ call a%set_dev()
+ if (info /= 0) goto 9999
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_s_oacc_csr_reallocate_nz
+end submodule psb_s_oacc_csr_reallocate_nz_impl
diff --git a/openacc/impl/psb_s_oacc_csr_scal.F90 b/openacc/impl/psb_s_oacc_csr_scal.F90
new file mode 100644
index 00000000..b9c8a986
--- /dev/null
+++ b/openacc/impl/psb_s_oacc_csr_scal.F90
@@ -0,0 +1,53 @@
+submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_scal_impl
+ use psb_base_mod
+contains
+ module subroutine psb_s_oacc_csr_scal(d, a, info, side)
+ implicit none
+ class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
+ real(psb_spk_), intent(in) :: d(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, intent(in), optional :: side
+
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name='scal'
+ logical, parameter :: debug=.false.
+ integer(psb_ipk_) :: i, j
+
+ info = psb_success_
+ call psb_erractionsave(err_act)
+
+ if (a%is_host()) call a%sync()
+
+ if (present(side)) then
+ if (side == 'L') then
+ !$acc parallel loop present(a, d)
+ do i = 1, a%get_nrows()
+ do j = a%irp(i), a%irp(i+1) - 1
+ a%val(j) = a%val(j) * d(i)
+ end do
+ end do
+ else if (side == 'R') then
+ !$acc parallel loop present(a, d)
+ do i = 1, a%get_ncols()
+ do j = a%irp(i), a%irp(i+1) - 1
+ a%val(j) = a%val(j) * d(a%ja(j))
+ end do
+ end do
+ end if
+ else
+ !$acc parallel loop present(a, d)
+ do i = 1, size(a%val)
+ a%val(i) = a%val(i) * d(i)
+ end do
+ end if
+
+ call a%set_dev()
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_s_oacc_csr_scal
+end submodule psb_s_oacc_csr_scal_impl
diff --git a/openacc/impl/psb_s_oacc_csr_scals.F90 b/openacc/impl/psb_s_oacc_csr_scals.F90
new file mode 100644
index 00000000..76ad7cf2
--- /dev/null
+++ b/openacc/impl/psb_s_oacc_csr_scals.F90
@@ -0,0 +1,34 @@
+submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_scals_impl
+ use psb_base_mod
+contains
+ module subroutine psb_s_oacc_csr_scals(d, a, info)
+ implicit none
+ class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
+ real(psb_spk_), intent(in) :: d
+ integer(psb_ipk_), intent(out) :: info
+
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name='scal'
+ logical, parameter :: debug=.false.
+ integer(psb_ipk_) :: i
+
+ info = psb_success_
+ call psb_erractionsave(err_act)
+
+ if (a%is_host()) call a%sync()
+
+ !$acc parallel loop present(a)
+ do i = 1, size(a%val)
+ a%val(i) = a%val(i) * d
+ end do
+
+ call a%set_dev()
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_s_oacc_csr_scals
+end submodule psb_s_oacc_csr_scals_impl
diff --git a/openacc/impl/psb_s_oacc_csr_vect_mv.F90 b/openacc/impl/psb_s_oacc_csr_vect_mv.F90
new file mode 100644
index 00000000..9b15da3b
--- /dev/null
+++ b/openacc/impl/psb_s_oacc_csr_vect_mv.F90
@@ -0,0 +1,63 @@
+submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_vect_mv_impl
+ use psb_base_mod
+contains
+ module subroutine psb_s_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans)
+ implicit none
+
+ real(psb_spk_), intent(in) :: alpha, beta
+ class(psb_s_oacc_csr_sparse_mat), intent(in) :: a
+ class(psb_s_base_vect_type), intent(inout) :: x, y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ integer(psb_ipk_) :: m, n
+
+ info = psb_success_
+ m = a%get_nrows()
+ n = a%get_ncols()
+
+ if ((n /= size(x%v)) .or. (n /= size(y%v))) then
+ write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
+ info = psb_err_invalis_mat_state_
+ return
+ end if
+
+ if (a%is_host()) call a%sync()
+ if (x%is_host()) call x%sync()
+ if (y%is_host()) call y%sync()
+
+ call inner_spmv(m, n, alpha, a%val, a%ja, a%irp, x%v, beta, y%v, info)
+ call y%set_dev()
+
+ contains
+
+ subroutine inner_spmv(m, n, alpha, val, ja, irp, x, beta, y, info)
+ implicit none
+ integer(psb_ipk_) :: m, n
+ real(psb_spk_), intent(in) :: alpha, beta
+ real(psb_spk_) :: val(:), x(:), y(:)
+ integer(psb_ipk_) :: ja(:), irp(:)
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: i, j, ii, isz
+ real(psb_spk_) :: tmp
+ integer(psb_ipk_), parameter :: vsz = 256
+
+ info = 0
+
+ !$acc parallel loop vector_length(vsz) private(isz)
+ do ii = 1, m, vsz
+ isz = min(vsz, m - ii + 1)
+ !$acc loop independent private(tmp)
+ do i = ii, ii + isz - 1
+ tmp = 0.0_psb_dpk_
+ !$acc loop seq
+ do j = irp(i), irp(i + 1) - 1
+ tmp = tmp + val(j) * x(ja(j))
+ end do
+ y(i) = alpha * tmp + beta * y(i)
+ end do
+ end do
+ end subroutine inner_spmv
+
+ end subroutine psb_s_oacc_csr_vect_mv
+end submodule psb_s_oacc_csr_vect_mv_impl
diff --git a/openacc/impl/psb_s_oacc_mlt_v.f90 b/openacc/impl/psb_s_oacc_mlt_v.f90
new file mode 100644
index 00000000..fb043cf2
--- /dev/null
+++ b/openacc/impl/psb_s_oacc_mlt_v.f90
@@ -0,0 +1,31 @@
+
+subroutine s_oacc_mlt_v(x, y, info)
+ use psb_s_oacc_vect_mod, psb_protect_name => s_oacc_mlt_v
+
+ implicit none
+ class(psb_s_base_vect_type), intent(inout) :: x
+ class(psb_s_vect_oacc), intent(inout) :: y
+ integer(psb_ipk_), intent(out) :: info
+
+ integer(psb_ipk_) :: i, n
+
+ info = 0
+ n = min(x%get_nrows(), y%get_nrows())
+ select type(xx => x)
+ class is (psb_s_vect_oacc)
+ if (y%is_host()) call y%sync()
+ if (xx%is_host()) call xx%sync()
+ !$acc parallel loop
+ do i = 1, n
+ y%v(i) = y%v(i) * xx%v(i)
+ end do
+ call y%set_dev()
+ class default
+ if (xx%is_dev()) call xx%sync()
+ if (y%is_dev()) call y%sync()
+ do i = 1, n
+ y%v(i) = y%v(i) * xx%v(i)
+ end do
+ call y%set_host()
+ end select
+end subroutine s_oacc_mlt_v
diff --git a/openacc/impl/psb_s_oacc_mlt_v_2.f90 b/openacc/impl/psb_s_oacc_mlt_v_2.f90
new file mode 100644
index 00000000..04ee8e09
--- /dev/null
+++ b/openacc/impl/psb_s_oacc_mlt_v_2.f90
@@ -0,0 +1,98 @@
+subroutine s_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
+ use psb_s_oacc_vect_mod, psb_protect_name => s_oacc_mlt_v_2
+ use psb_string_mod
+ implicit none
+ real(psb_spk_), intent(in) :: alpha, beta
+ class(psb_s_base_vect_type), intent(inout) :: x
+ class(psb_s_base_vect_type), intent(inout) :: y
+ class(psb_s_vect_oacc), intent(inout) :: z
+ integer(psb_ipk_), intent(out) :: info
+ character(len=1), intent(in), optional :: conjgx, conjgy
+ integer(psb_ipk_) :: i, n
+ logical :: conjgx_, conjgy_
+
+ conjgx_ = .false.
+ conjgy_ = .false.
+ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C')
+ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C')
+
+ n = min(x%get_nrows(), y%get_nrows(), z%get_nrows())
+ info = 0
+ select type(xx => x)
+ class is (psb_s_vect_oacc)
+ select type (yy => y)
+ class is (psb_s_vect_oacc)
+ if (xx%is_host()) call xx%sync()
+ if (yy%is_host()) call yy%sync()
+ if ((beta /= szero) .and. (z%is_host())) call z%sync()
+ if (conjgx_.and.conjgy_) then
+ !$acc parallel loop
+ do i = 1, n
+ z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
+ end do
+ else if (conjgx_.and.(.not.conjgy_)) then
+ !$acc parallel loop
+ do i = 1, n
+ z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
+ end do
+ else if ((.not.conjgx_).and.(conjgy_)) then
+ !$acc parallel loop
+ do i = 1, n
+ z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
+ end do
+ else
+ !$acc parallel loop
+ do i = 1, n
+ z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
+ end do
+
+ end if
+ call z%set_dev()
+ class default
+ if (xx%is_dev()) call xx%sync()
+ if (yy%is_dev()) call yy%sync()
+ if ((beta /= szero) .and. (z%is_dev())) call z%sync()
+ if (conjgx_.and.conjgy_) then
+ do i = 1, n
+ z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
+ end do
+ else if (conjgx_.and.(.not.conjgy_)) then
+ do i = 1, n
+ z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
+ end do
+ else if ((.not.conjgx_).and.(conjgy_)) then
+ do i = 1, n
+ z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
+ end do
+ else
+ do i = 1, n
+ z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
+ end do
+ end if
+ call z%set_host()
+ end select
+ class default
+ if (x%is_dev()) call x%sync()
+ if (y%is_dev()) call y%sync()
+ if ((beta /= szero) .and. (z%is_dev())) call z%sync()
+ if (conjgx_.and.conjgy_) then
+ do i = 1, n
+ z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i)
+ end do
+ else if (conjgx_.and.(.not.conjgy_)) then
+ do i = 1, n
+ z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i)
+ end do
+ else if ((.not.conjgx_).and.(conjgy_)) then
+ do i = 1, n
+ z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i)
+ end do
+ else
+ do i = 1, n
+ z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i)
+ end do
+ end if
+ call z%set_host()
+ end select
+end subroutine s_oacc_mlt_v_2
+
diff --git a/openacc/impl/psb_z_oacc_csr_allocate_mnnz.F90 b/openacc/impl/psb_z_oacc_csr_allocate_mnnz.F90
new file mode 100644
index 00000000..fd19d6f9
--- /dev/null
+++ b/openacc/impl/psb_z_oacc_csr_allocate_mnnz.F90
@@ -0,0 +1,35 @@
+submodule (psb_z_oacc_csr_mat_mod) psb_z_oacc_csr_allocate_mnnz_impl
+ use psb_base_mod
+contains
+ module subroutine psb_z_oacc_csr_allocate_mnnz(m, n, a, nz)
+ implicit none
+ integer(psb_ipk_), intent(in) :: m, n
+ class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_), intent(in), optional :: nz
+ integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: err_act, nz_
+ character(len=20) :: name='allocate_mnz'
+ logical, parameter :: debug=.false.
+
+ call psb_erractionsave(err_act)
+ info = psb_success_
+
+ call a%psb_z_csr_sparse_mat%allocate(m, n, nz)
+
+ if (.not.allocated(a%val)) then
+ allocate(a%val(nz))
+ allocate(a%ja(nz))
+ allocate(a%irp(m+1))
+ end if
+
+ call a%set_dev()
+ if (info /= 0) goto 9999
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_z_oacc_csr_allocate_mnnz
+end submodule psb_z_oacc_csr_allocate_mnnz_impl
diff --git a/openacc/impl/psb_z_oacc_csr_cp_from_coo.F90 b/openacc/impl/psb_z_oacc_csr_cp_from_coo.F90
new file mode 100644
index 00000000..0485c9ca
--- /dev/null
+++ b/openacc/impl/psb_z_oacc_csr_cp_from_coo.F90
@@ -0,0 +1,26 @@
+submodule (psb_z_oacc_csr_mat_mod) psb_z_oacc_csr_cp_from_coo_impl
+ use psb_base_mod
+contains
+ module subroutine psb_z_oacc_csr_cp_from_coo(a, b, info)
+ implicit none
+
+ class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a
+ class(psb_z_coo_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ info = psb_success_
+
+ call a%psb_z_csr_sparse_mat%cp_from_coo(b, info)
+ if (info /= 0) goto 9999
+
+ call a%set_dev()
+ if (info /= 0) goto 9999
+
+ return
+
+9999 continue
+ info = psb_err_alloc_dealloc_
+ return
+
+ end subroutine psb_z_oacc_csr_cp_from_coo
+end submodule psb_z_oacc_csr_cp_from_coo_impl
diff --git a/openacc/impl/psb_z_oacc_csr_cp_from_fmt.F90 b/openacc/impl/psb_z_oacc_csr_cp_from_fmt.F90
new file mode 100644
index 00000000..f2c68816
--- /dev/null
+++ b/openacc/impl/psb_z_oacc_csr_cp_from_fmt.F90
@@ -0,0 +1,24 @@
+submodule (psb_z_oacc_csr_mat_mod) psb_z_oacc_csr_cp_from_fmt_impl
+ use psb_base_mod
+contains
+ module subroutine psb_z_oacc_csr_cp_from_fmt(a, b, info)
+ implicit none
+
+ class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a
+ class(psb_z_base_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ info = psb_success_
+
+ select type(b)
+ type is (psb_z_coo_sparse_mat)
+ call a%cp_from_coo(b, info)
+ class default
+ call a%psb_z_csr_sparse_mat%cp_from_fmt(b, info)
+ if (info /= 0) return
+
+ !$acc update device(a%val, a%ja, a%irp)
+ end select
+
+ end subroutine psb_z_oacc_csr_cp_from_fmt
+end submodule psb_z_oacc_csr_cp_from_fmt_impl
diff --git a/openacc/impl/psb_z_oacc_csr_csmm.F90 b/openacc/impl/psb_z_oacc_csr_csmm.F90
new file mode 100644
index 00000000..aeaaab33
--- /dev/null
+++ b/openacc/impl/psb_z_oacc_csr_csmm.F90
@@ -0,0 +1,86 @@
+submodule (psb_z_oacc_csr_mat_mod) psb_z_oacc_csr_csmm_impl
+ use psb_base_mod
+contains
+ module subroutine psb_z_oacc_csr_csmm(alpha, a, x, beta, y, info, trans)
+ implicit none
+ class(psb_z_oacc_csr_sparse_mat), intent(in) :: a
+ complex(psb_dpk_), intent(in) :: alpha, beta
+ complex(psb_dpk_), intent(in) :: x(:,:)
+ complex(psb_dpk_), intent(inout) :: y(:,:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ character :: trans_
+ integer(psb_ipk_) :: i, j, m, n,k, nxy
+ logical :: tra
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 'z_oacc_csmm'
+ logical, parameter :: debug = .false.
+
+ info = psb_success_
+ call psb_erractionsave(err_act)
+
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+
+ if (.not.a%is_asb()) then
+ info = psb_err_invaliz_mat_state_
+ call psb_errpush(info, name)
+ goto 9999
+ endif
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
+
+ if (tra) then
+ m = a%get_ncols()
+ n = a%get_nrows()
+ else
+ n = a%get_ncols()
+ m = a%get_nrows()
+ end if
+
+ if (size(x,1) < n) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (size(y,1) < m) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (tra) then
+ call a%psb_z_csr_sparse_mat%spmm(alpha, x, beta, y, info, trans)
+ else
+ nxy = min(size(x,2), size(y,2))
+
+ !$acc parallel loop collapse(2) present(a, x, y)
+ do j = 1, nxy
+ do i = 1, m
+ y(i,j) = beta * y(i,j)
+ end do
+ end do
+
+ !$acc parallel loop collapse(2) present(a, x, y)
+ do j = 1, nxy
+ do i = 1, n
+ do k = a%irp(i), a%irp(i+1) - 1
+ y(a%ja(k), j) = y(a%ja(k), j) + alpha * a%val(k) * x(i, j)
+ end do
+ end do
+ end do
+ endif
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_z_oacc_csr_csmm
+end submodule psb_z_oacc_csr_csmm_impl
+
diff --git a/openacc/impl/psb_z_oacc_csr_csmv.F90 b/openacc/impl/psb_z_oacc_csr_csmv.F90
new file mode 100644
index 00000000..f5501b21
--- /dev/null
+++ b/openacc/impl/psb_z_oacc_csr_csmv.F90
@@ -0,0 +1,81 @@
+submodule (psb_z_oacc_csr_mat_mod) psb_z_oacc_csr_csmv_impl
+ use psb_base_mod
+contains
+ module subroutine psb_z_oacc_csr_csmv(alpha, a, x, beta, y, info, trans)
+ implicit none
+ class(psb_z_oacc_csr_sparse_mat), intent(in) :: a
+ complex(psb_dpk_), intent(in) :: alpha, beta
+ complex(psb_dpk_), intent(in) :: x(:)
+ complex(psb_dpk_), intent(inout) :: y(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ character :: trans_
+ integer(psb_ipk_) :: i, j, m, n
+ logical :: tra
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 'z_oacc_csmv'
+ logical, parameter :: debug = .false.
+
+ call psb_erractionsave(err_act)
+ info = psb_success_
+
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+
+ if (.not.a%is_asb()) then
+ info = psb_err_invaliz_mat_state_
+ call psb_errpush(info, name)
+ goto 9999
+ endif
+
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
+
+ if (tra) then
+ m = a%get_ncols()
+ n = a%get_nrows()
+ else
+ n = a%get_ncols()
+ m = a%get_nrows()
+ end if
+
+ if (size(x,1) < n) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (size(y,1) < m) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (tra) then
+ call a%psb_z_csr_sparse_mat%spmm(alpha, x, beta, y, info, trans)
+ else
+ !$acc parallel loop present(a, x, y)
+ do i = 1, m
+ y(i) = beta * y(i)
+ end do
+
+ !$acc parallel loop present(a, x, y)
+ do i = 1, n
+ do j = a%irp(i), a%irp(i+1) - 1
+ y(a%ja(j)) = y(a%ja(j)) + alpha * a%val(j) * x(i)
+ end do
+ end do
+ endif
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_z_oacc_csr_csmv
+end submodule psb_z_oacc_csr_csmv_impl
+
diff --git a/openacc/impl/psb_z_oacc_csr_inner_vect_sv.F90 b/openacc/impl/psb_z_oacc_csr_inner_vect_sv.F90
new file mode 100644
index 00000000..b5d552d3
--- /dev/null
+++ b/openacc/impl/psb_z_oacc_csr_inner_vect_sv.F90
@@ -0,0 +1,83 @@
+submodule (psb_z_oacc_csr_mat_mod) psb_z_oacc_csr_inner_vect_sv_impl
+ use psb_base_mod
+contains
+ module subroutine psb_z_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans)
+ implicit none
+ class(psb_z_oacc_csr_sparse_mat), intent(in) :: a
+ complex(psb_dpk_), intent(in) :: alpha, beta
+ class(psb_z_base_vect_type), intent(inout) :: x, y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ complex(psb_dpk_), allocatable :: rx(:), ry(:)
+ logical :: tra
+ character :: trans_
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 'z_oacc_csr_inner_vect_sv'
+ logical, parameter :: debug = .false.
+ integer(psb_ipk_) :: i
+
+ call psb_get_erraction(err_act)
+ info = psb_success_
+
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+
+ if (.not.a%is_asb()) then
+ info = psb_err_invaliz_mat_state_
+ call psb_errpush(info, name)
+ goto 9999
+ endif
+
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
+
+ if (tra .or. (beta /= dzero)) then
+ call x%sync()
+ call y%sync()
+ call a%psb_z_csr_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans)
+ call y%set_host()
+ else
+ select type (xx => x)
+ type is (psb_z_vect_oacc)
+ select type(yy => y)
+ type is (psb_z_vect_oacc)
+ if (xx%is_host()) call xx%sync()
+ if (beta /= dzero) then
+ if (yy%is_host()) call yy%sync()
+ end if
+ !$acc parallel loop present(a, xx, yy)
+ do i = 1, size(a%val)
+ yy%v(i) = alpha * a%val(i) * xx%v(a%ja(i)) + beta * yy%v(i)
+ end do
+ call yy%set_dev()
+ class default
+ rx = xx%get_vect()
+ ry = y%get_vect()
+ call a%psb_z_csr_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
+ call y%bld(ry)
+ end select
+ class default
+ rx = x%get_vect()
+ ry = y%get_vect()
+ call a%psb_z_csr_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
+ call y%bld(ry)
+ end select
+ endif
+
+ if (info /= psb_success_) then
+ info = psb_err_from_subroutine_
+ call psb_errpush(info, name, a_err = 'csrg_vect_sv')
+ goto 9999
+ endif
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+ end subroutine psb_z_oacc_csr_inner_vect_sv
+end submodule psb_z_oacc_csr_inner_vect_sv_impl
+
diff --git a/openacc/impl/psb_z_oacc_csr_mold.F90 b/openacc/impl/psb_z_oacc_csr_mold.F90
new file mode 100644
index 00000000..e7e9e8b9
--- /dev/null
+++ b/openacc/impl/psb_z_oacc_csr_mold.F90
@@ -0,0 +1,35 @@
+submodule (psb_z_oacc_csr_mat_mod) psb_z_oacc_csr_mold_impl
+ use psb_base_mod
+contains
+ module subroutine psb_z_oacc_csr_mold(a, b, info)
+ implicit none
+ class(psb_z_oacc_csr_sparse_mat), intent(in) :: a
+ class(psb_z_base_sparse_mat), intent(inout), allocatable :: b
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name='csr_mold'
+ logical, parameter :: debug=.false.
+
+ call psb_get_erraction(err_act)
+
+ info = 0
+ if (allocated(b)) then
+ call b%free()
+ deallocate(b, stat=info)
+ end if
+ if (info == 0) allocate(psb_z_oacc_csr_sparse_mat :: b, stat=info)
+
+ if (info /= psb_success_) then
+ info = psb_err_alloc_dealloc_
+ call psb_errpush(info, name)
+ goto 9999
+ end if
+ return
+
+9999 call psb_error_handler(err_act)
+
+ return
+
+ end subroutine psb_z_oacc_csr_mold
+end submodule psb_z_oacc_csr_molz_impl
+
diff --git a/openacc/impl/psb_z_oacc_csr_mv_from_coo.F90 b/openacc/impl/psb_z_oacc_csr_mv_from_coo.F90
new file mode 100644
index 00000000..44b01b68
--- /dev/null
+++ b/openacc/impl/psb_z_oacc_csr_mv_from_coo.F90
@@ -0,0 +1,25 @@
+submodule (psb_z_oacc_csr_mat_mod) psb_z_oacc_csr_mv_from_coo_impl
+ use psb_base_mod
+contains
+ module subroutine psb_z_oacc_csr_mv_from_coo(a, b, info)
+ implicit none
+
+ class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a
+ class(psb_z_coo_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ info = psb_success_
+
+ call a%psb_z_csr_sparse_mat%mv_from_coo(b, info)
+ if (info /= 0) goto 9999
+
+ !$acc update device(a%val, a%ja, a%irp)
+
+ return
+
+9999 continue
+ info = psb_err_alloc_dealloc_
+ return
+
+ end subroutine psb_z_oacc_csr_mv_from_coo
+end submodule psb_z_oacc_csr_mv_from_coo_impl
diff --git a/openacc/impl/psb_z_oacc_csr_mv_from_fmt.F90 b/openacc/impl/psb_z_oacc_csr_mv_from_fmt.F90
new file mode 100644
index 00000000..bf777e85
--- /dev/null
+++ b/openacc/impl/psb_z_oacc_csr_mv_from_fmt.F90
@@ -0,0 +1,24 @@
+submodule (psb_z_oacc_csr_mat_mod) psb_z_oacc_csr_mv_from_fmt_impl
+ use psb_base_mod
+contains
+ module subroutine psb_z_oacc_csr_mv_from_fmt(a, b, info)
+ implicit none
+
+ class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a
+ class(psb_z_base_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ info = psb_success_
+
+ select type(b)
+ type is (psb_z_coo_sparse_mat)
+ call a%mv_from_coo(b, info)
+ class default
+ call a%psb_z_csr_sparse_mat%mv_from_fmt(b, info)
+ if (info /= 0) return
+
+ !$acc update device(a%val, a%ja, a%irp)
+ end select
+
+ end subroutine psb_z_oacc_csr_mv_from_fmt
+end submodule psb_z_oacc_csr_mv_from_fmt_impl
diff --git a/openacc/impl/psb_z_oacc_csr_reallocate_nz.F90 b/openacc/impl/psb_z_oacc_csr_reallocate_nz.F90
new file mode 100644
index 00000000..bdfb88d6
--- /dev/null
+++ b/openacc/impl/psb_z_oacc_csr_reallocate_nz.F90
@@ -0,0 +1,28 @@
+submodule (psb_z_oacc_csr_mat_mod) psb_z_oacc_csr_reallocate_nz_impl
+ use psb_base_mod
+contains
+ module subroutine psb_z_oacc_csr_reallocate_nz(nz, a)
+ implicit none
+ integer(psb_ipk_), intent(in) :: nz
+ class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name='z_oacc_csr_reallocate_nz'
+ logical, parameter :: debug=.false.
+
+ call psb_erractionsave(err_act)
+ info = psb_success_
+
+ call a%psb_z_csr_sparse_mat%reallocate(nz)
+
+ call a%set_dev()
+ if (info /= 0) goto 9999
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_z_oacc_csr_reallocate_nz
+end submodule psb_z_oacc_csr_reallocate_nz_impl
diff --git a/openacc/impl/psb_z_oacc_csr_scal.F90 b/openacc/impl/psb_z_oacc_csr_scal.F90
new file mode 100644
index 00000000..f09ff595
--- /dev/null
+++ b/openacc/impl/psb_z_oacc_csr_scal.F90
@@ -0,0 +1,53 @@
+submodule (psb_z_oacc_csr_mat_mod) psb_z_oacc_csr_scal_impl
+ use psb_base_mod
+contains
+ module subroutine psb_z_oacc_csr_scal(d, a, info, side)
+ implicit none
+ class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a
+ complex(psb_dpk_), intent(in) :: d(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, intent(in), optional :: side
+
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name='scal'
+ logical, parameter :: debug=.false.
+ integer(psb_ipk_) :: i, j
+
+ info = psb_success_
+ call psb_erractionsave(err_act)
+
+ if (a%is_host()) call a%sync()
+
+ if (present(side)) then
+ if (side == 'L') then
+ !$acc parallel loop present(a, d)
+ do i = 1, a%get_nrows()
+ do j = a%irp(i), a%irp(i+1) - 1
+ a%val(j) = a%val(j) * d(i)
+ end do
+ end do
+ else if (side == 'R') then
+ !$acc parallel loop present(a, d)
+ do i = 1, a%get_ncols()
+ do j = a%irp(i), a%irp(i+1) - 1
+ a%val(j) = a%val(j) * d(a%ja(j))
+ end do
+ end do
+ end if
+ else
+ !$acc parallel loop present(a, d)
+ do i = 1, size(a%val)
+ a%val(i) = a%val(i) * d(i)
+ end do
+ end if
+
+ call a%set_dev()
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_z_oacc_csr_scal
+end submodule psb_z_oacc_csr_scal_impl
diff --git a/openacc/impl/psb_z_oacc_csr_scals.F90 b/openacc/impl/psb_z_oacc_csr_scals.F90
new file mode 100644
index 00000000..1fe64951
--- /dev/null
+++ b/openacc/impl/psb_z_oacc_csr_scals.F90
@@ -0,0 +1,34 @@
+submodule (psb_z_oacc_csr_mat_mod) psb_z_oacc_csr_scals_impl
+ use psb_base_mod
+contains
+ module subroutine psb_z_oacc_csr_scals(d, a, info)
+ implicit none
+ class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a
+ complex(psb_dpk_), intent(in) :: d
+ integer(psb_ipk_), intent(out) :: info
+
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name='scal'
+ logical, parameter :: debug=.false.
+ integer(psb_ipk_) :: i
+
+ info = psb_success_
+ call psb_erractionsave(err_act)
+
+ if (a%is_host()) call a%sync()
+
+ !$acc parallel loop present(a)
+ do i = 1, size(a%val)
+ a%val(i) = a%val(i) * d
+ end do
+
+ call a%set_dev()
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_z_oacc_csr_scals
+end submodule psb_z_oacc_csr_scals_impl
diff --git a/openacc/impl/psb_z_oacc_csr_vect_mv.F90 b/openacc/impl/psb_z_oacc_csr_vect_mv.F90
new file mode 100644
index 00000000..437dd70a
--- /dev/null
+++ b/openacc/impl/psb_z_oacc_csr_vect_mv.F90
@@ -0,0 +1,63 @@
+submodule (psb_z_oacc_csr_mat_mod) psb_z_oacc_csr_vect_mv_impl
+ use psb_base_mod
+contains
+ module subroutine psb_z_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans)
+ implicit none
+
+ complex(psb_dpk_), intent(in) :: alpha, beta
+ class(psb_z_oacc_csr_sparse_mat), intent(in) :: a
+ class(psb_z_base_vect_type), intent(inout) :: x, y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ integer(psb_ipk_) :: m, n
+
+ info = psb_success_
+ m = a%get_nrows()
+ n = a%get_ncols()
+
+ if ((n /= size(x%v)) .or. (n /= size(y%v))) then
+ write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
+ info = psb_err_invaliz_mat_state_
+ return
+ end if
+
+ if (a%is_host()) call a%sync()
+ if (x%is_host()) call x%sync()
+ if (y%is_host()) call y%sync()
+
+ call inner_spmv(m, n, alpha, a%val, a%ja, a%irp, x%v, beta, y%v, info)
+ call y%set_dev()
+
+ contains
+
+ subroutine inner_spmv(m, n, alpha, val, ja, irp, x, beta, y, info)
+ implicit none
+ integer(psb_ipk_) :: m, n
+ complex(psb_dpk_), intent(in) :: alpha, beta
+ complex(psb_dpk_) :: val(:), x(:), y(:)
+ integer(psb_ipk_) :: ja(:), irp(:)
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: i, j, ii, isz
+ complex(psb_dpk_) :: tmp
+ integer(psb_ipk_), parameter :: vsz = 256
+
+ info = 0
+
+ !$acc parallel loop vector_length(vsz) private(isz)
+ do ii = 1, m, vsz
+ isz = min(vsz, m - ii + 1)
+ !$acc loop independent private(tmp)
+ do i = ii, ii + isz - 1
+ tmp = 0.0_psb_dpk_
+ !$acc loop seq
+ do j = irp(i), irp(i + 1) - 1
+ tmp = tmp + val(j) * x(ja(j))
+ end do
+ y(i) = alpha * tmp + beta * y(i)
+ end do
+ end do
+ end subroutine inner_spmv
+
+ end subroutine psb_z_oacc_csr_vect_mv
+end submodule psb_z_oacc_csr_vect_mv_impl
diff --git a/openacc/impl/psb_z_oacc_mlt_v.f90 b/openacc/impl/psb_z_oacc_mlt_v.f90
new file mode 100644
index 00000000..7018f009
--- /dev/null
+++ b/openacc/impl/psb_z_oacc_mlt_v.f90
@@ -0,0 +1,31 @@
+
+subroutine z_oacc_mlt_v(x, y, info)
+ use psb_z_oacc_vect_mod, psb_protect_name => z_oacc_mlt_v
+
+ implicit none
+ class(psb_z_base_vect_type), intent(inout) :: x
+ class(psb_z_vect_oacc), intent(inout) :: y
+ integer(psb_ipk_), intent(out) :: info
+
+ integer(psb_ipk_) :: i, n
+
+ info = 0
+ n = min(x%get_nrows(), y%get_nrows())
+ select type(xx => x)
+ class is (psb_z_vect_oacc)
+ if (y%is_host()) call y%sync()
+ if (xx%is_host()) call xx%sync()
+ !$acc parallel loop
+ do i = 1, n
+ y%v(i) = y%v(i) * xx%v(i)
+ end do
+ call y%set_dev()
+ class default
+ if (xx%is_dev()) call xx%sync()
+ if (y%is_dev()) call y%sync()
+ do i = 1, n
+ y%v(i) = y%v(i) * xx%v(i)
+ end do
+ call y%set_host()
+ end select
+end subroutine z_oacc_mlt_v
diff --git a/openacc/impl/psb_z_oacc_mlt_v_2.f90 b/openacc/impl/psb_z_oacc_mlt_v_2.f90
new file mode 100644
index 00000000..dbc0929c
--- /dev/null
+++ b/openacc/impl/psb_z_oacc_mlt_v_2.f90
@@ -0,0 +1,98 @@
+subroutine z_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
+ use psb_z_oacc_vect_mod, psb_protect_name => z_oacc_mlt_v_2
+ use psb_string_mod
+ implicit none
+ complex(psb_dpk_), intent(in) :: alpha, beta
+ class(psb_z_base_vect_type), intent(inout) :: x
+ class(psb_z_base_vect_type), intent(inout) :: y
+ class(psb_z_vect_oacc), intent(inout) :: z
+ integer(psb_ipk_), intent(out) :: info
+ character(len=1), intent(in), optional :: conjgx, conjgy
+ integer(psb_ipk_) :: i, n
+ logical :: conjgx_, conjgy_
+
+ conjgx_ = .false.
+ conjgy_ = .false.
+ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C')
+ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C')
+
+ n = min(x%get_nrows(), y%get_nrows(), z%get_nrows())
+ info = 0
+ select type(xx => x)
+ class is (psb_z_vect_oacc)
+ select type (yy => y)
+ class is (psb_z_vect_oacc)
+ if (xx%is_host()) call xx%sync()
+ if (yy%is_host()) call yy%sync()
+ if ((beta /= zzero) .and. (z%is_host())) call z%sync()
+ if (conjgx_.and.conjgy_) then
+ !$acc parallel loop
+ do i = 1, n
+ z%v(i) = alpha * conjg(xx%v(i)) * conjg(yy%v(i)) + beta * z%v(i)
+ end do
+ else if (conjgx_.and.(.not.conjgy_)) then
+ !$acc parallel loop
+ do i = 1, n
+ z%v(i) = alpha * conjg(xx%v(i)) * (yy%v(i)) + beta * z%v(i)
+ end do
+ else if ((.not.conjgx_).and.(conjgy_)) then
+ !$acc parallel loop
+ do i = 1, n
+ z%v(i) = alpha * (xx%v(i)) * conjg(yy%v(i)) + beta * z%v(i)
+ end do
+ else
+ !$acc parallel loop
+ do i = 1, n
+ z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
+ end do
+
+ end if
+ call z%set_dev()
+ class default
+ if (xx%is_dev()) call xx%sync()
+ if (yy%is_dev()) call yy%sync()
+ if ((beta /= zzero) .and. (z%is_dev())) call z%sync()
+ if (conjgx_.and.conjgy_) then
+ do i = 1, n
+ z%v(i) = alpha * conjg(xx%v(i)) * conjg(yy%v(i)) + beta * z%v(i)
+ end do
+ else if (conjgx_.and.(.not.conjgy_)) then
+ do i = 1, n
+ z%v(i) = alpha * conjg(xx%v(i)) * (yy%v(i)) + beta * z%v(i)
+ end do
+ else if ((.not.conjgx_).and.(conjgy_)) then
+ do i = 1, n
+ z%v(i) = alpha * (xx%v(i)) * conjg(yy%v(i)) + beta * z%v(i)
+ end do
+ else
+ do i = 1, n
+ z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
+ end do
+ end if
+ call z%set_host()
+ end select
+ class default
+ if (x%is_dev()) call x%sync()
+ if (y%is_dev()) call y%sync()
+ if ((beta /= zzero) .and. (z%is_dev())) call z%sync()
+ if (conjgx_.and.conjgy_) then
+ do i = 1, n
+ z%v(i) = alpha * conjg(x%v(i)) * conjg(y%v(i)) + beta * z%v(i)
+ end do
+ else if (conjgx_.and.(.not.conjgy_)) then
+ do i = 1, n
+ z%v(i) = alpha * conjg(x%v(i)) * (y%v(i)) + beta * z%v(i)
+ end do
+ else if ((.not.conjgx_).and.(conjgy_)) then
+ do i = 1, n
+ z%v(i) = alpha * (x%v(i)) * conjg(y%v(i)) + beta * z%v(i)
+ end do
+ else
+ do i = 1, n
+ z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i)
+ end do
+ end if
+ call z%set_host()
+ end select
+end subroutine z_oacc_mlt_v_2
+
diff --git a/openacc/psb_c_oacc_csr_mat_mod.F90 b/openacc/psb_c_oacc_csr_mat_mod.F90
new file mode 100644
index 00000000..00e79570
--- /dev/null
+++ b/openacc/psb_c_oacc_csr_mat_mod.F90
@@ -0,0 +1,343 @@
+module psb_c_oacc_csr_mat_mod
+
+ use iso_c_binding
+ use psb_c_mat_mod
+ use psb_c_oacc_vect_mod
+ !use oaccsparse_mod
+
+ integer(psb_ipk_), parameter, private :: is_host = -1
+ integer(psb_ipk_), parameter, private :: is_sync = 0
+ integer(psb_ipk_), parameter, private :: is_dev = 1
+
+ type, extends(psb_c_csr_sparse_mat) :: psb_c_oacc_csr_sparse_mat
+ integer(psb_ipk_) :: devstate = is_host
+ contains
+ procedure, nopass :: get_fmt => c_oacc_csr_get_fmt
+ procedure, pass(a) :: sizeof => c_oacc_csr_sizeof
+ procedure, pass(a) :: vect_mv => psb_c_oacc_csr_vect_mv
+ procedure, pass(a) :: in_vect_sv => psb_c_oacc_csr_inner_vect_sv
+ procedure, pass(a) :: csmm => psb_c_oacc_csr_csmm
+ procedure, pass(a) :: csmv => psb_c_oacc_csr_csmv
+ procedure, pass(a) :: scals => psb_c_oacc_csr_scals
+ procedure, pass(a) :: scalv => psb_c_oacc_csr_scal
+ procedure, pass(a) :: reallocate_nz => psb_c_oacc_csr_reallocate_nz
+ procedure, pass(a) :: allocate_mnnz => psb_c_oacc_csr_allocate_mnnz
+ procedure, pass(a) :: cp_from_coo => psb_c_oacc_csr_cp_from_coo
+ procedure, pass(a) :: cp_from_fmt => psb_c_oacc_csr_cp_from_fmt
+ procedure, pass(a) :: mv_from_coo => psb_c_oacc_csr_mv_from_coo
+ procedure, pass(a) :: mv_from_fmt => psb_c_oacc_csr_mv_from_fmt
+ procedure, pass(a) :: free => c_oacc_csr_free
+ procedure, pass(a) :: mold => psb_c_oacc_csr_mold
+ procedure, pass(a) :: all => c_oacc_csr_all
+ procedure, pass(a) :: is_host => c_oacc_csr_is_host
+ procedure, pass(a) :: is_sync => c_oacc_csr_is_sync
+ procedure, pass(a) :: is_dev => c_oacc_csr_is_dev
+ procedure, pass(a) :: set_host => c_oacc_csr_set_host
+ procedure, pass(a) :: set_sync => c_oacc_csr_set_sync
+ procedure, pass(a) :: set_dev => c_oacc_csr_set_dev
+ procedure, pass(a) :: sync_space => c_oacc_csr_sync_space
+ procedure, pass(a) :: sync => c_oacc_csr_sync
+ end type psb_c_oacc_csr_sparse_mat
+
+ interface
+ module subroutine psb_c_oacc_csr_mold(a,b,info)
+ class(psb_c_oacc_csr_sparse_mat), intent(in) :: a
+ class(psb_c_base_sparse_mat), intent(inout), allocatable :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_c_oacc_csr_mold
+ end interface
+
+ interface
+ module subroutine psb_c_oacc_csr_cp_from_fmt(a,b,info)
+ class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
+ class(psb_c_base_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_c_oacc_csr_cp_from_fmt
+ end interface
+
+ interface
+ module subroutine psb_c_oacc_csr_mv_from_coo(a,b,info)
+ class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
+ class(psb_c_coo_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_c_oacc_csr_mv_from_coo
+ end interface
+
+ interface
+ module subroutine psb_c_oacc_csr_mv_from_fmt(a,b,info)
+ class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
+ class(psb_c_base_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_c_oacc_csr_mv_from_fmt
+ end interface
+
+ interface
+ module subroutine psb_c_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans)
+ class(psb_c_oacc_csr_sparse_mat), intent(in) :: a
+ complex(psb_spk_), intent(in) :: alpha, beta
+ class(psb_c_base_vect_type), intent(inout) :: x, y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_c_oacc_csr_vect_mv
+ end interface
+
+ interface
+ module subroutine psb_c_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans)
+ class(psb_c_oacc_csr_sparse_mat), intent(in) :: a
+ complex(psb_spk_), intent(in) :: alpha, beta
+ class(psb_c_base_vect_type), intent(inout) :: x,y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_c_oacc_csr_inner_vect_sv
+ end interface
+
+ interface
+ module subroutine psb_c_oacc_csr_csmm(alpha, a, x, beta, y, info, trans)
+ class(psb_c_oacc_csr_sparse_mat), intent(in) :: a
+ complex(psb_spk_), intent(in) :: alpha, beta, x(:,:)
+ complex(psb_spk_), intent(inout) :: y(:,:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_c_oacc_csr_csmm
+ end interface
+
+ interface
+ module subroutine psb_c_oacc_csr_csmv(alpha, a, x, beta, y, info, trans)
+ class(psb_c_oacc_csr_sparse_mat), intent(in) :: a
+ complex(psb_spk_), intent(in) :: alpha, beta, x(:)
+ complex(psb_spk_), intent(inout) :: y(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_c_oacc_csr_csmv
+ end interface
+
+ interface
+ module subroutine psb_c_oacc_csr_scals(d, a, info)
+ class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
+ complex(psb_spk_), intent(in) :: d
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_c_oacc_csr_scals
+ end interface
+
+ interface
+ module subroutine psb_c_oacc_csr_scal(d,a,info,side)
+ class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
+ complex(psb_spk_), intent(in) :: d(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: side
+ end subroutine psb_c_oacc_csr_scal
+ end interface
+
+ interface
+ module subroutine psb_c_oacc_csr_reallocate_nz(nz,a)
+ class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_), intent(in) :: nz
+ end subroutine psb_c_oacc_csr_reallocate_nz
+ end interface
+
+ interface
+ module subroutine psb_c_oacc_csr_allocate_mnnz(m,n,a,nz)
+ class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_), intent(in) :: m,n
+ integer(psb_ipk_), intent(in), optional :: nz
+ end subroutine psb_c_oacc_csr_allocate_mnnz
+ end interface
+
+ interface
+ module subroutine psb_c_oacc_csr_cp_from_coo(a,b,info)
+ class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
+ class(psb_c_coo_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_c_oacc_csr_cp_from_coo
+ end interface
+
+contains
+
+
+ subroutine c_oacc_csr_free(a)
+ use psb_base_mod
+ implicit none
+ class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_) :: info
+
+ if (allocated(a%val)) then
+ !$acc exit data delete(a%val)
+ end if
+ if (allocated(a%ja)) then
+ !$acc exit data delete(a%ja)
+ end if
+ if (allocated(a%irp)) then
+ !$acc exit data delete(a%irp)
+ end if
+
+ call a%psb_c_csr_sparse_mat%free()
+
+ return
+ end subroutine c_oacc_csr_free
+
+ function c_oacc_csr_sizeof(a) result(res)
+ implicit none
+ class(psb_c_oacc_csr_sparse_mat), intent(in) :: a
+ integer(psb_epk_) :: res
+
+ if (a%is_dev()) call a%sync()
+
+ res = 8
+ res = res + (2*psb_sizeof_sp) * size(a%val)
+ res = res + psb_sizeof_ip * size(a%irp)
+ res = res + psb_sizeof_ip * size(a%ja)
+
+ end function c_oacc_csr_sizeof
+
+
+ function c_oacc_csr_get_fmt() result(res)
+ implicit none
+ character(len=5) :: res
+ res = 'CSR_oacc'
+ end function c_oacc_csr_get_fmt
+
+ subroutine c_oacc_csr_all(m, n, nz, a, info)
+ implicit none
+ integer(psb_ipk_), intent(in) :: m, n, nz
+ class(psb_c_oacc_csr_sparse_mat), intent(out) :: a
+ integer(psb_ipk_), intent(out) :: info
+
+ info = 0
+ if (allocated(a%val)) then
+ !$acc exit data delete(a%val) finalize
+ deallocate(a%val, stat=info)
+ end if
+ if (allocated(a%ja)) then
+ !$acc exit data delete(a%ja) finalize
+ deallocate(a%ja, stat=info)
+ end if
+ if (allocated(a%irp)) then
+ !$acc exit data delete(a%irp) finalize
+ deallocate(a%irp, stat=info)
+ end if
+
+ call a%set_nrows(m)
+ call a%set_ncols(n)
+
+ allocate(a%val(nz),stat=info)
+ allocate(a%ja(nz),stat=info)
+ allocate(a%irp(m+1),stat=info)
+ if (info == 0) call a%set_host()
+ if (info == 0) call a%sync_space()
+ end subroutine c_oacc_csr_all
+
+ function c_oacc_csr_is_host(a) result(res)
+ implicit none
+ class(psb_c_oacc_csr_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_host)
+ end function c_oacc_csr_is_host
+
+ function c_oacc_csr_is_sync(a) result(res)
+ implicit none
+ class(psb_c_oacc_csr_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_sync)
+ end function c_oacc_csr_is_sync
+
+ function c_oacc_csr_is_dev(a) result(res)
+ implicit none
+ class(psb_c_oacc_csr_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_dev)
+ end function c_oacc_csr_is_dev
+
+ subroutine c_oacc_csr_set_host(a)
+ implicit none
+ class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_host
+ end subroutine c_oacc_csr_set_host
+
+ subroutine c_oacc_csr_set_sync(a)
+ implicit none
+ class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_sync
+ end subroutine c_oacc_csr_set_sync
+
+ subroutine c_oacc_csr_set_dev(a)
+ implicit none
+ class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_dev
+ end subroutine c_oacc_csr_set_dev
+
+ subroutine c_oacc_csr_sync_space(a)
+ implicit none
+ class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
+ if (allocated(a%val)) then
+ call c_oacc_create_dev(a%val)
+ end if
+ if (allocated(a%ja)) then
+ call i_oacc_create_dev(a%ja)
+ end if
+ if (allocated(a%irp)) then
+ call i_oacc_create_dev(a%irp)
+ end if
+ contains
+ subroutine c_oacc_create_dev(v)
+ implicit none
+ complex(psb_spk_), intent(in) :: v(:)
+ !$acc enter data copyin(v)
+ end subroutine c_oacc_create_dev
+ subroutine i_oacc_create_dev(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc enter data copyin(v)
+ end subroutine i_oacc_create_dev
+ end subroutine c_oacc_csr_sync_space
+
+ subroutine c_oacc_csr_sync(a)
+ implicit none
+ class(psb_c_oacc_csr_sparse_mat), target, intent(in) :: a
+ class(psb_c_oacc_csr_sparse_mat), pointer :: tmpa
+ integer(psb_ipk_) :: info
+
+ tmpa => a
+ if (a%is_dev()) then
+ call c_oacc_csr_to_host(a%val)
+ call i_oacc_csr_to_host(a%ja)
+ call i_oacc_csr_to_host(a%irp)
+ else if (a%is_host()) then
+ call c_oacc_csr_to_dev(a%val)
+ call i_oacc_csr_to_dev(a%ja)
+ call i_oacc_csr_to_dev(a%irp)
+ end if
+ call tmpa%set_sync()
+ end subroutine c_oacc_csr_sync
+
+ subroutine c_oacc_csr_to_dev(v)
+ implicit none
+ complex(psb_spk_), intent(in) :: v(:)
+ !$acc update device(v)
+ end subroutine c_oacc_csr_to_dev
+
+ subroutine c_oacc_csr_to_host(v)
+ implicit none
+ complex(psb_spk_), intent(in) :: v(:)
+ !$acc update self(v)
+ end subroutine c_oacc_csr_to_host
+
+ subroutine i_oacc_csr_to_dev(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc update device(v)
+ end subroutine i_oacc_csr_to_dev
+
+ subroutine i_oacc_csr_to_host(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc update self(v)
+ end subroutine i_oacc_csr_to_host
+
+end module psb_c_oacc_csr_mat_mod
+
diff --git a/openacc/psb_c_oacc_vect_mod.F90 b/openacc/psb_c_oacc_vect_mod.F90
new file mode 100644
index 00000000..6f9545ea
--- /dev/null
+++ b/openacc/psb_c_oacc_vect_mod.F90
@@ -0,0 +1,935 @@
+module psb_c_oacc_vect_mod
+ use iso_c_binding
+ use psb_const_mod
+ use psb_error_mod
+ use psb_c_vect_mod
+ use psb_i_vect_mod
+ use psb_i_oacc_vect_mod
+
+ integer(psb_ipk_), parameter, private :: is_host = -1
+ integer(psb_ipk_), parameter, private :: is_sync = 0
+ integer(psb_ipk_), parameter, private :: is_dev = 1
+
+ type, extends(psb_c_base_vect_type) :: psb_c_vect_oacc
+ integer :: state = is_host
+
+ contains
+ procedure, pass(x) :: get_nrows => c_oacc_get_nrows
+ procedure, nopass :: get_fmt => c_oacc_get_fmt
+
+ procedure, pass(x) :: all => c_oacc_vect_all
+ procedure, pass(x) :: zero => c_oacc_zero
+ procedure, pass(x) :: asb_m => c_oacc_asb_m
+ procedure, pass(x) :: sync => c_oacc_sync
+ procedure, pass(x) :: sync_space => c_oacc_sync_space
+ procedure, pass(x) :: bld_x => c_oacc_bld_x
+ procedure, pass(x) :: bld_mn => c_oacc_bld_mn
+ procedure, pass(x) :: free => c_oacc_vect_free
+ procedure, pass(x) :: ins_a => c_oacc_ins_a
+ procedure, pass(x) :: ins_v => c_oacc_ins_v
+ procedure, pass(x) :: is_host => c_oacc_is_host
+ procedure, pass(x) :: is_dev => c_oacc_is_dev
+ procedure, pass(x) :: is_sync => c_oacc_is_sync
+ procedure, pass(x) :: set_host => c_oacc_set_host
+ procedure, pass(x) :: set_dev => c_oacc_set_dev
+ procedure, pass(x) :: set_sync => c_oacc_set_sync
+ procedure, pass(x) :: set_scal => c_oacc_set_scal
+
+ procedure, pass(x) :: gthzv_x => c_oacc_gthzv_x
+ procedure, pass(x) :: gthzbuf_x => c_oacc_gthzbuf
+ procedure, pass(y) :: sctb => c_oacc_sctb
+ procedure, pass(y) :: sctb_x => c_oacc_sctb_x
+ procedure, pass(y) :: sctb_buf => c_oacc_sctb_buf
+
+ procedure, pass(x) :: get_size => c_oacc_get_size
+ procedure, pass(x) :: dot_v => c_oacc_vect_dot
+ procedure, pass(x) :: dot_a => c_oacc_dot_a
+ procedure, pass(y) :: axpby_v => c_oacc_axpby_v
+ procedure, pass(y) :: axpby_a => c_oacc_axpby_a
+ procedure, pass(z) :: abgdxyz => c_oacc_abgdxyz
+ procedure, pass(y) :: mlt_a => c_oacc_mlt_a
+ procedure, pass(z) :: mlt_a_2 => c_oacc_mlt_a_2
+ procedure, pass(y) :: mlt_v => c_oacc_mlt_v
+ procedure, pass(z) :: mlt_v_2 => c_oacc_mlt_v_2
+ procedure, pass(x) :: scal => c_oacc_scal
+ procedure, pass(x) :: nrm2 => c_oacc_nrm2
+ procedure, pass(x) :: amax => c_oacc_amax
+ procedure, pass(x) :: asum => c_oacc_asum
+ procedure, pass(x) :: absval1 => c_oacc_absval1
+ procedure, pass(x) :: absval2 => c_oacc_absval2
+
+ end type psb_c_vect_oacc
+
+ interface
+ subroutine c_oacc_mlt_v(x, y, info)
+ import
+ implicit none
+ class(psb_c_base_vect_type), intent(inout) :: x
+ class(psb_c_vect_oacc), intent(inout) :: y
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine c_oacc_mlt_v
+ end interface
+
+
+ interface
+ subroutine c_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
+ import
+ implicit none
+ complex(psb_spk_), intent(in) :: alpha, beta
+ class(psb_c_base_vect_type), intent(inout) :: x
+ class(psb_c_base_vect_type), intent(inout) :: y
+ class(psb_c_vect_oacc), intent(inout) :: z
+ integer(psb_ipk_), intent(out) :: info
+ character(len=1), intent(in), optional :: conjgx, conjgy
+ end subroutine c_oacc_mlt_v_2
+ end interface
+
+contains
+
+ subroutine c_oacc_absval1(x)
+ implicit none
+ class(psb_c_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_) :: n, i
+
+ if (x%is_host()) call x%sync_space()
+ n = size(x%v)
+ !$acc parallel loop
+ do i = 1, n
+ x%v(i) = abs(x%v(i))
+ end do
+ call x%set_dev()
+ end subroutine c_oacc_absval1
+
+ subroutine c_oacc_absval2(x, y)
+ implicit none
+ class(psb_c_vect_oacc), intent(inout) :: x
+ class(psb_c_base_vect_type), intent(inout) :: y
+ integer(psb_ipk_) :: n
+ integer(psb_ipk_) :: i
+
+ n = min(size(x%v), size(y%v))
+ select type (yy => y)
+ class is (psb_c_vect_oacc)
+ if (x%is_host()) call x%sync()
+ if (yy%is_host()) call yy%sync()
+ !$acc parallel loop
+ do i = 1, n
+ yy%v(i) = abs(x%v(i))
+ end do
+ class default
+ if (x%is_dev()) call x%sync()
+ if (y%is_dev()) call y%sync()
+ call x%psb_c_base_vect_type%absval(y)
+ end select
+ end subroutine c_oacc_absval2
+
+ subroutine c_oacc_scal(alpha, x)
+ implicit none
+ class(psb_c_vect_oacc), intent(inout) :: x
+ complex(psb_spk_), intent(in) :: alpha
+ integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: i
+
+ if (x%is_host()) call x%sync_space()
+ !$acc parallel loop
+ do i = 1, size(x%v)
+ x%v(i) = alpha * x%v(i)
+ end do
+ call x%set_dev()
+ end subroutine c_oacc_scal
+
+ function c_oacc_nrm2(n, x) result(res)
+ implicit none
+ class(psb_c_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(in) :: n
+ real(psb_spk_) :: res
+ integer(psb_ipk_) :: info
+ real(psb_spk_) :: sum
+ integer(psb_ipk_) :: i
+
+ if (x%is_host()) call x%sync_space()
+ sum = 0.0
+ !$acc parallel loop reduction(+:sum)
+ do i = 1, n
+ sum = sum + abs(x%v(i))**2
+ end do
+ res = sqrt(sum)
+ end function c_oacc_nrm2
+
+ function c_oacc_amax(n, x) result(res)
+ implicit none
+ class(psb_c_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(in) :: n
+ real(psb_spk_) :: res
+ integer(psb_ipk_) :: info
+ real(psb_spk_) :: max_val
+ integer(psb_ipk_) :: i
+
+ if (x%is_host()) call x%sync_space()
+ max_val = -huge(0.0)
+ !$acc parallel loop reduction(max:max_val)
+ do i = 1, n
+ if (abs(x%v(i)) > max_val) max_val = abs(x%v(i))
+ end do
+ res = max_val
+ end function c_oacc_amax
+
+ function c_oacc_asum(n, x) result(res)
+ implicit none
+ class(psb_c_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(in) :: n
+ real(psb_spk_) :: res
+ integer(psb_ipk_) :: info
+ complex(psb_spk_) :: sum
+ integer(psb_ipk_) :: i
+
+ if (x%is_host()) call x%sync_space()
+ sum = 0.0
+ !$acc parallel loop reduction(+:sum)
+ do i = 1, n
+ sum = sum + abs(x%v(i))
+ end do
+ res = sum
+ end function c_oacc_asum
+
+
+ subroutine c_oacc_mlt_a(x, y, info)
+ implicit none
+ complex(psb_spk_), intent(in) :: x(:)
+ class(psb_c_vect_oacc), intent(inout) :: y
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: i, n
+
+ info = 0
+ if (y%is_dev()) call y%sync_space()
+ !$acc parallel loop
+ do i = 1, size(x)
+ y%v(i) = y%v(i) * x(i)
+ end do
+ call y%set_host()
+ end subroutine c_oacc_mlt_a
+
+ subroutine c_oacc_mlt_a_2(alpha, x, y, beta, z, info)
+ implicit none
+ complex(psb_spk_), intent(in) :: alpha, beta
+ complex(psb_spk_), intent(in) :: x(:)
+ complex(psb_spk_), intent(in) :: y(:)
+ class(psb_c_vect_oacc), intent(inout) :: z
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: i, n
+
+ info = 0
+ if (z%is_dev()) call z%sync_space()
+ !$acc parallel loop
+ do i = 1, size(x)
+ z%v(i) = alpha * x(i) * y(i) + beta * z%v(i)
+ end do
+ call z%set_host()
+ end subroutine c_oacc_mlt_a_2
+
+
+!!$ subroutine c_oacc_mlt_v(x, y, info)
+!!$ implicit none
+!!$ class(psb_c_base_vect_type), intent(inout) :: x
+!!$ class(psb_c_vect_oacc), intent(inout) :: y
+!!$ integer(psb_ipk_), intent(out) :: info
+!!$
+!!$ integer(psb_ipk_) :: i, n
+!!$
+!!$ info = 0
+!!$ n = min(x%get_nrows(), y%get_nrows())
+!!$ select type(xx => x)
+!!$ type is (psb_c_base_vect_type)
+!!$ if (y%is_dev()) call y%sync()
+!!$ !$acc parallel loop
+!!$ do i = 1, n
+!!$ y%v(i) = y%v(i) * xx%v(i)
+!!$ end do
+!!$ call y%set_host()
+!!$ class default
+!!$ if (xx%is_dev()) call xx%sync()
+!!$ if (y%is_dev()) call y%sync()
+!!$ !$acc parallel loop
+!!$ do i = 1, n
+!!$ y%v(i) = y%v(i) * xx%v(i)
+!!$ end do
+!!$ call y%set_host()
+!!$ end select
+!!$ end subroutine c_oacc_mlt_v
+!!$
+!!$ subroutine c_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
+!!$ use psi_serial_mod
+!!$ use psb_string_mod
+!!$ implicit none
+!!$ complex(psb_spk_), intent(in) :: alpha, beta
+!!$ class(psb_c_base_vect_type), intent(inout) :: x
+!!$ class(psb_c_base_vect_type), intent(inout) :: y
+!!$ class(psb_c_vect_oacc), intent(inout) :: z
+!!$ integer(psb_ipk_), intent(out) :: info
+!!$ character(len=1), intent(in), optional :: conjgx, conjgy
+!!$ integer(psb_ipk_) :: i, n
+!!$ logical :: conjgx_, conjgy_
+!!$
+!!$ conjgx_ = .false.
+!!$ conjgy_ = .false.
+!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C')
+!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C')
+!!$
+!!$ n = min(x%get_nrows(), y%get_nrows(), z%get_nrows())
+!!$
+!!$ info = 0
+!!$ select type(xx => x)
+!!$ class is (psb_c_vect_oacc)
+!!$ select type (yy => y)
+!!$ class is (psb_c_vect_oacc)
+!!$ if (xx%is_host()) call xx%sync_space()
+!!$ if (yy%is_host()) call yy%sync_space()
+!!$ if ((beta /= czero) .and. (z%is_host())) call z%sync_space()
+!!$ !$acc parallel loop
+!!$ do i = 1, n
+!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
+!!$ end do
+!!$ call z%set_dev()
+!!$ class default
+!!$ if (xx%is_dev()) call xx%sync_space()
+!!$ if (yy%is_dev()) call yy%sync()
+!!$ if ((beta /= czero) .and. (z%is_dev())) call z%sync_space()
+!!$ !$acc parallel loop
+!!$ do i = 1, n
+!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
+!!$ end do
+!!$ call z%set_host()
+!!$ end select
+!!$ class default
+!!$ if (x%is_dev()) call x%sync()
+!!$ if (y%is_dev()) call y%sync()
+!!$ if ((beta /= czero) .and. (z%is_dev())) call z%sync_space()
+!!$ !$acc parallel loop
+!!$ do i = 1, n
+!!$ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i)
+!!$ end do
+!!$ call z%set_host()
+!!$ end select
+!!$ end subroutine c_oacc_mlt_v_2
+
+
+ subroutine c_oacc_axpby_v(m, alpha, x, beta, y, info)
+ !use psi_serial_mod
+ implicit none
+ integer(psb_ipk_), intent(in) :: m
+ class(psb_c_base_vect_type), intent(inout) :: x
+ class(psb_c_vect_oacc), intent(inout) :: y
+ complex(psb_spk_), intent(in) :: alpha, beta
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: nx, ny, i
+
+ info = psb_success_
+
+ select type(xx => x)
+ type is (psb_c_vect_oacc)
+ if ((beta /= czero) .and. y%is_host()) call y%sync_space()
+ if (xx%is_host()) call xx%sync_space()
+ nx = size(xx%v)
+ ny = size(y%v)
+ if ((nx < m) .or. (ny < m)) then
+ info = psb_err_internal_error_
+ else
+ !$acc parallel loop
+ do i = 1, m
+ y%v(i) = alpha * xx%v(i) + beta * y%v(i)
+ end do
+ end if
+ call y%set_dev()
+ class default
+ if ((alpha /= czero) .and. (x%is_dev())) call x%sync()
+ call y%axpby(m, alpha, x%v, beta, info)
+ end select
+ end subroutine c_oacc_axpby_v
+
+ subroutine c_oacc_axpby_a(m, alpha, x, beta, y, info)
+ !use psi_serial_mod
+ implicit none
+ integer(psb_ipk_), intent(in) :: m
+ complex(psb_spk_), intent(in) :: x(:)
+ class(psb_c_vect_oacc), intent(inout) :: y
+ complex(psb_spk_), intent(in) :: alpha, beta
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: i
+
+ if ((beta /= czero) .and. (y%is_dev())) call y%sync_space()
+ !$acc parallel loop
+ do i = 1, m
+ y%v(i) = alpha * x(i) + beta * y%v(i)
+ end do
+ call y%set_host()
+ end subroutine c_oacc_axpby_a
+
+ subroutine c_oacc_abgdxyz(m, alpha, beta, gamma, delta, x, y, z, info)
+ use psi_serial_mod
+ implicit none
+ integer(psb_ipk_), intent(in) :: m
+ class(psb_c_base_vect_type), intent(inout) :: x
+ class(psb_c_base_vect_type), intent(inout) :: y
+ class(psb_c_vect_oacc), intent(inout) :: z
+ complex(psb_spk_), intent(in) :: alpha, beta, gamma, delta
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: nx, ny, nz, i
+ logical :: gpu_done
+
+ info = psb_success_
+ gpu_done = .false.
+
+ select type(xx => x)
+ class is (psb_c_vect_oacc)
+ select type(yy => y)
+ class is (psb_c_vect_oacc)
+ select type(zz => z)
+ class is (psb_c_vect_oacc)
+ if ((beta /= czero) .and. yy%is_host()) call yy%sync_space()
+ if ((delta /= czero) .and. zz%is_host()) call zz%sync_space()
+ if (xx%is_host()) call xx%sync_space()
+ nx = size(xx%v)
+ ny = size(yy%v)
+ nz = size(zz%v)
+ if ((nx < m) .or. (ny < m) .or. (nz < m)) then
+ info = psb_err_internal_error_
+ else
+ !$acc parallel loop
+ do i = 1, m
+ yy%v(i) = alpha * xx%v(i) + beta * yy%v(i)
+ zz%v(i) = gamma * yy%v(i) + delta * zz%v(i)
+ end do
+ end if
+ call yy%set_dev()
+ call zz%set_dev()
+ gpu_done = .true.
+ end select
+ end select
+ end select
+
+ if (.not. gpu_done) then
+ if (x%is_host()) call x%sync()
+ if (y%is_host()) call y%sync()
+ if (z%is_host()) call z%sync()
+ call y%axpby(m, alpha, x, beta, info)
+ call z%axpby(m, gamma, y, delta, info)
+ end if
+ end subroutine c_oacc_abgdxyz
+
+ subroutine c_oacc_sctb_buf(i, n, idx, beta, y)
+ use psb_base_mod
+ implicit none
+ integer(psb_ipk_) :: i, n
+ class(psb_i_base_vect_type) :: idx
+ complex(psb_spk_) :: beta
+ class(psb_c_vect_oacc) :: y
+ integer(psb_ipk_) :: info
+
+ if (.not.allocated(y%combuf)) then
+ call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
+ return
+ end if
+
+ select type(ii => idx)
+ class is (psb_i_vect_oacc)
+ if (ii%is_host()) call ii%sync_space(info)
+ if (y%is_host()) call y%sync_space()
+
+ !$acc parallel loop
+ do i = 1, n
+ y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i)
+ end do
+
+ class default
+ !$acc parallel loop
+ do i = 1, n
+ y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i)
+ end do
+ end select
+ end subroutine c_oacc_sctb_buf
+
+ subroutine c_oacc_sctb_x(i, n, idx, x, beta, y)
+ use psb_base_mod
+ implicit none
+ integer(psb_ipk_):: i, n
+ class(psb_i_base_vect_type) :: idx
+ complex(psb_spk_) :: beta, x(:)
+ class(psb_c_vect_oacc) :: y
+ integer(psb_ipk_) :: info, ni
+
+ select type(ii => idx)
+ class is (psb_i_vect_oacc)
+ if (ii%is_host()) call ii%sync_space(info)
+ class default
+ call psb_errpush(info, 'c_oacc_sctb_x')
+ return
+ end select
+
+ if (y%is_host()) call y%sync_space()
+
+ !$acc parallel loop
+ do i = 1, n
+ y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i)
+ end do
+
+ call y%set_dev()
+ end subroutine c_oacc_sctb_x
+
+
+
+ subroutine c_oacc_sctb(n, idx, x, beta, y)
+ use psb_base_mod
+ implicit none
+ integer(psb_ipk_) :: n
+ integer(psb_ipk_) :: idx(:)
+ complex(psb_spk_) :: beta, x(:)
+ class(psb_c_vect_oacc) :: y
+ integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: i
+
+ if (n == 0) return
+ if (y%is_dev()) call y%sync_space()
+
+ !$acc parallel loop
+ do i = 1, n
+ y%v(idx(i)) = beta * y%v(idx(i)) + x(i)
+ end do
+
+ call y%set_host()
+ end subroutine c_oacc_sctb
+
+
+ subroutine c_oacc_gthzbuf(i, n, idx, x)
+ use psb_base_mod
+ implicit none
+ integer(psb_ipk_) :: i, n
+ class(psb_i_base_vect_type) :: idx
+ class(psb_c_vect_oacc) :: x
+ integer(psb_ipk_) :: info
+
+ info = 0
+ if (.not.allocated(x%combuf)) then
+ call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
+ return
+ end if
+
+ select type(ii => idx)
+ class is (psb_i_vect_oacc)
+ if (ii%is_host()) call ii%sync_space(info)
+ class default
+ call psb_errpush(info, 'c_oacc_gthzbuf')
+ return
+ end select
+
+ if (x%is_host()) call x%sync_space()
+
+ !$acc parallel loop
+ do i = 1, n
+ x%combuf(i) = x%v(idx%v(i))
+ end do
+ end subroutine c_oacc_gthzbuf
+
+ subroutine c_oacc_gthzv_x(i, n, idx, x, y)
+ use psb_base_mod
+ implicit none
+ integer(psb_ipk_) :: i, n
+ class(psb_i_base_vect_type):: idx
+ complex(psb_spk_) :: y(:)
+ class(psb_c_vect_oacc):: x
+ integer(psb_ipk_) :: info
+
+ info = 0
+
+ select type(ii => idx)
+ class is (psb_i_vect_oacc)
+ if (ii%is_host()) call ii%sync_space(info)
+ class default
+ call psb_errpush(info, 'c_oacc_gthzv_x')
+ return
+ end select
+
+ if (x%is_host()) call x%sync_space()
+
+ !$acc parallel loop
+ do i = 1, n
+ y(i) = x%v(idx%v(i))
+ end do
+ end subroutine c_oacc_gthzv_x
+
+ subroutine c_oacc_ins_v(n, irl, val, dupl, x, info)
+ use psi_serial_mod
+ implicit none
+ class(psb_c_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(in) :: n, dupl
+ class(psb_i_base_vect_type), intent(inout) :: irl
+ class(psb_c_base_vect_type), intent(inout) :: val
+ integer(psb_ipk_), intent(out) :: info
+
+ integer(psb_ipk_) :: i, isz
+ logical :: done_oacc
+
+ info = 0
+ if (psb_errstatus_fatal()) return
+
+ done_oacc = .false.
+ select type(virl => irl)
+ type is (psb_i_vect_oacc)
+ select type(vval => val)
+ type is (psb_c_vect_oacc)
+ if (vval%is_host()) call vval%sync_space()
+ if (virl%is_host()) call virl%sync_space(info)
+ if (x%is_host()) call x%sync_space()
+ !$acc parallel loop
+ do i = 1, n
+ x%v(virl%v(i)) = vval%v(i)
+ end do
+ call x%set_dev()
+ done_oacc = .true.
+ end select
+ end select
+
+ if (.not.done_oacc) then
+ select type(virl => irl)
+ type is (psb_i_vect_oacc)
+ if (virl%is_dev()) call virl%sync_space(info)
+ end select
+ select type(vval => val)
+ type is (psb_c_vect_oacc)
+ if (vval%is_dev()) call vval%sync_space()
+ end select
+ call x%ins(n, irl%v, val%v, dupl, info)
+ end if
+
+ if (info /= 0) then
+ call psb_errpush(info, 'oacc_vect_ins')
+ return
+ end if
+
+ end subroutine c_oacc_ins_v
+
+
+
+ subroutine c_oacc_ins_a(n, irl, val, dupl, x, info)
+ use psi_serial_mod
+ implicit none
+ class(psb_c_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(in) :: n, dupl
+ integer(psb_ipk_), intent(in) :: irl(:)
+ complex(psb_spk_), intent(in) :: val(:)
+ integer(psb_ipk_), intent(out) :: info
+
+ integer(psb_ipk_) :: i
+
+ info = 0
+ if (x%is_dev()) call x%sync_space()
+ call x%psb_c_base_vect_type%ins(n, irl, val, dupl, info)
+ call x%set_host()
+ !$acc update device(x%v)
+
+ end subroutine c_oacc_ins_a
+
+
+
+ subroutine c_oacc_bld_mn(x, n)
+ use psb_base_mod
+ implicit none
+ integer(psb_mpk_), intent(in) :: n
+ class(psb_c_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_) :: info
+
+ call x%all(n, info)
+ if (info /= 0) then
+ call psb_errpush(info, 'c_oacc_bld_mn', i_err=(/n, n, n, n, n/))
+ end if
+ call x%set_host()
+ !$acc update device(x%v)
+
+ end subroutine c_oacc_bld_mn
+
+
+ subroutine c_oacc_bld_x(x, this)
+ use psb_base_mod
+ implicit none
+ complex(psb_spk_), intent(in) :: this(:)
+ class(psb_c_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_) :: info
+
+ call psb_realloc(size(this), x%v, info)
+ if (info /= 0) then
+ info = psb_err_alloc_request_
+ call psb_errpush(info, 'c_oacc_bld_x', &
+ i_err=(/size(this), izero, izero, izero, izero/))
+ return
+ end if
+
+ x%v(:) = this(:)
+ call x%set_host()
+ !$acc update device(x%v)
+
+ end subroutine c_oacc_bld_x
+
+
+ subroutine c_oacc_asb_m(n, x, info)
+ use psb_base_mod
+ implicit none
+ integer(psb_mpk_), intent(in) :: n
+ class(psb_c_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_mpk_) :: nd
+
+ info = psb_success_
+
+ if (x%is_dev()) then
+ nd = size(x%v)
+ if (nd < n) then
+ call x%sync()
+ call x%psb_c_base_vect_type%asb(n, info)
+ if (info == psb_success_) call x%sync_space()
+ call x%set_host()
+ end if
+ else
+ if (size(x%v) < n) then
+ call x%psb_c_base_vect_type%asb(n, info)
+ if (info == psb_success_) call x%sync_space()
+ call x%set_host()
+ end if
+ end if
+ end subroutine c_oacc_asb_m
+
+
+
+ subroutine c_oacc_set_scal(x, val, first, last)
+ class(psb_c_vect_oacc), intent(inout) :: x
+ complex(psb_spk_), intent(in) :: val
+ integer(psb_ipk_), optional :: first, last
+
+ integer(psb_ipk_) :: first_, last_
+ first_ = 1
+ last_ = x%get_nrows()
+ if (present(first)) first_ = max(1, first)
+ if (present(last)) last_ = min(last, last_)
+
+ !$acc parallel loop
+ do i = first_, last_
+ x%v(i) = val
+ end do
+ !$acc end parallel loop
+
+ call x%set_dev()
+ end subroutine c_oacc_set_scal
+
+
+
+ subroutine c_oacc_zero(x)
+ use psi_serial_mod
+ implicit none
+ class(psb_c_vect_oacc), intent(inout) :: x
+ call x%set_dev()
+ call x%set_scal(czero)
+ end subroutine c_oacc_zero
+
+ function c_oacc_get_nrows(x) result(res)
+ implicit none
+ class(psb_c_vect_oacc), intent(in) :: x
+ integer(psb_ipk_) :: res
+
+ if (allocated(x%v)) res = size(x%v)
+ end function c_oacc_get_nrows
+
+ function c_oacc_get_fmt() result(res)
+ implicit none
+ character(len=5) :: res
+ res = "cOACC"
+
+ end function c_oacc_get_fmt
+
+ function c_oacc_vect_dot(n, x, y) result(res)
+ implicit none
+ class(psb_c_vect_oacc), intent(inout) :: x
+ class(psb_c_base_vect_type), intent(inout) :: y
+ integer(psb_ipk_), intent(in) :: n
+ complex(psb_spk_) :: res
+ complex(psb_spk_), external :: ddot
+ integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: i
+
+ res = czero
+
+ select type(yy => y)
+ type is (psb_c_base_vect_type)
+ if (x%is_dev()) call x%sync()
+ res = ddot(n, x%v, 1, yy%v, 1)
+ type is (psb_c_vect_oacc)
+ if (x%is_host()) call x%sync()
+ if (yy%is_host()) call yy%sync()
+
+ !$acc parallel loop reduction(+:res) present(x%v, yy%v)
+ do i = 1, n
+ res = res + x%v(i) * yy%v(i)
+ end do
+ !$acc end parallel loop
+
+ class default
+ call x%sync()
+ res = y%dot(n, x%v)
+ end select
+
+ end function c_oacc_vect_dot
+
+
+
+
+ function c_oacc_dot_a(n, x, y) result(res)
+ implicit none
+ class(psb_c_vect_oacc), intent(inout) :: x
+ complex(psb_spk_), intent(in) :: y(:)
+ integer(psb_ipk_), intent(in) :: n
+ complex(psb_spk_) :: res
+ complex(psb_spk_), external :: ddot
+
+ if (x%is_dev()) call x%sync()
+ res = ddot(n, y, 1, x%v, 1)
+
+ end function c_oacc_dot_a
+
+ ! subroutine c_oacc_set_vect(x,y)
+ ! implicit none
+ ! class(psb_c_vect_oacc), intent(inout) :: x
+ ! complex(psb_spk_), intent(in) :: y(:)
+ ! integer(psb_ipk_) :: info
+
+ ! if (size(x%v) /= size(y)) then
+ ! call x%free(info)
+ ! call x%all(size(y),info)
+ ! end if
+ ! x%v(:) = y(:)
+ ! call x%set_host()
+ ! end subroutine c_oacc_set_vect
+
+ subroutine c_oacc_to_dev(v)
+ implicit none
+ complex(psb_spk_) :: v(:)
+ !$acc update device(v)
+ end subroutine c_oacc_to_dev
+
+ subroutine c_oacc_to_host(v)
+ implicit none
+ complex(psb_spk_) :: v(:)
+ !$acc update self(v)
+ end subroutine c_oacc_to_host
+
+ subroutine c_oacc_sync_space(x)
+ implicit none
+ class(psb_c_vect_oacc), intent(inout) :: x
+ if (allocated(x%v)) then
+ call c_oacc_create_dev(x%v)
+ end if
+ contains
+ subroutine c_oacc_create_dev(v)
+ implicit none
+ complex(psb_spk_) :: v(:)
+ !$acc enter data copyin(v)
+ end subroutine c_oacc_create_dev
+ end subroutine c_oacc_sync_space
+
+ subroutine c_oacc_sync(x)
+ implicit none
+ class(psb_c_vect_oacc), intent(inout) :: x
+ if (x%is_dev()) then
+ call c_oacc_to_host(x%v)
+ end if
+ if (x%is_host()) then
+ call c_oacc_to_dev(x%v)
+ end if
+ call x%set_sync()
+ end subroutine c_oacc_sync
+
+ subroutine c_oacc_set_host(x)
+ implicit none
+ class(psb_c_vect_oacc), intent(inout) :: x
+
+ x%state = is_host
+ end subroutine c_oacc_set_host
+
+ subroutine c_oacc_set_dev(x)
+ implicit none
+ class(psb_c_vect_oacc), intent(inout) :: x
+
+ x%state = is_dev
+ end subroutine c_oacc_set_dev
+
+ subroutine c_oacc_set_sync(x)
+ implicit none
+ class(psb_c_vect_oacc), intent(inout) :: x
+
+ x%state = is_sync
+ end subroutine c_oacc_set_sync
+
+ function c_oacc_is_dev(x) result(res)
+ implicit none
+ class(psb_c_vect_oacc), intent(in) :: x
+ logical :: res
+
+ res = (x%state == is_dev)
+ end function c_oacc_is_dev
+
+ function c_oacc_is_host(x) result(res)
+ implicit none
+ class(psb_c_vect_oacc), intent(in) :: x
+ logical :: res
+
+ res = (x%state == is_host)
+ end function c_oacc_is_host
+
+ function c_oacc_is_sync(x) result(res)
+ implicit none
+ class(psb_c_vect_oacc), intent(in) :: x
+ logical :: res
+
+ res = (x%state == is_sync)
+ end function c_oacc_is_sync
+
+ subroutine c_oacc_vect_all(n, x, info)
+ use psi_serial_mod
+ use psb_realloc_mod
+ implicit none
+ integer(psb_ipk_), intent(in) :: n
+ class(psb_c_vect_oacc), intent(out) :: x
+ integer(psb_ipk_), intent(out) :: info
+
+ call psb_realloc(n, x%v, info)
+ if (info == 0) then
+ call x%set_host()
+ !$acc enter data create(x%v)
+ call x%sync_space()
+ end if
+ if (info /= 0) then
+ info = psb_err_alloc_request_
+ call psb_errpush(info, 'c_oacc_all', &
+ i_err=(/n, n, n, n, n/))
+ end if
+ end subroutine c_oacc_vect_all
+
+
+ subroutine c_oacc_vect_free(x, info)
+ implicit none
+ class(psb_c_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(out) :: info
+ info = 0
+ if (allocated(x%v)) then
+ !$acc exit data delete(x%v) finalize
+ deallocate(x%v, stat=info)
+ end if
+
+ end subroutine c_oacc_vect_free
+
+ function c_oacc_get_size(x) result(res)
+ implicit none
+ class(psb_c_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_) :: res
+
+ if (x%is_dev()) call x%sync()
+ res = size(x%v)
+ end function c_oacc_get_size
+
+end module psb_c_oacc_vect_mod
diff --git a/openacc/psb_d_oacc_csr_mat_mod.F90 b/openacc/psb_d_oacc_csr_mat_mod.F90
index ca4199a8..8b7e111e 100644
--- a/openacc/psb_d_oacc_csr_mat_mod.F90
+++ b/openacc/psb_d_oacc_csr_mat_mod.F90
@@ -175,8 +175,6 @@ contains
return
end subroutine d_oacc_csr_free
-
-
function d_oacc_csr_sizeof(a) result(res)
implicit none
class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
@@ -341,6 +339,5 @@ contains
!$acc update self(v)
end subroutine i_oacc_csr_to_host
-
end module psb_d_oacc_csr_mat_mod
diff --git a/openacc/psb_d_oacc_vect_mod.F90 b/openacc/psb_d_oacc_vect_mod.F90
index 3385f1ec..7d51766d 100644
--- a/openacc/psb_d_oacc_vect_mod.F90
+++ b/openacc/psb_d_oacc_vect_mod.F90
@@ -41,7 +41,7 @@ module psb_d_oacc_vect_mod
procedure, pass(y) :: sctb_x => d_oacc_sctb_x
procedure, pass(y) :: sctb_buf => d_oacc_sctb_buf
- procedure, pass(x) :: get_size => oacc_get_size
+ procedure, pass(x) :: get_size => d_oacc_get_size
procedure, pass(x) :: dot_v => d_oacc_vect_dot
procedure, pass(x) :: dot_a => d_oacc_dot_a
procedure, pass(y) :: axpby_v => d_oacc_axpby_v
@@ -49,8 +49,8 @@ module psb_d_oacc_vect_mod
procedure, pass(z) :: abgdxyz => d_oacc_abgdxyz
procedure, pass(y) :: mlt_a => d_oacc_mlt_a
procedure, pass(z) :: mlt_a_2 => d_oacc_mlt_a_2
- procedure, pass(y) :: mlt_v => psb_d_oacc_mlt_v
- procedure, pass(z) :: mlt_v_2 => psb_d_oacc_mlt_v_2
+ procedure, pass(y) :: mlt_v => d_oacc_mlt_v
+ procedure, pass(z) :: mlt_v_2 => d_oacc_mlt_v_2
procedure, pass(x) :: scal => d_oacc_scal
procedure, pass(x) :: nrm2 => d_oacc_nrm2
procedure, pass(x) :: amax => d_oacc_amax
@@ -60,20 +60,20 @@ module psb_d_oacc_vect_mod
end type psb_d_vect_oacc
- real(psb_dpk_), allocatable :: v1(:),v2(:),p(:)
-
interface
- module subroutine psb_d_oacc_mlt_v(x, y, info)
+ subroutine d_oacc_mlt_v(x, y, info)
+ import
implicit none
class(psb_d_base_vect_type), intent(inout) :: x
class(psb_d_vect_oacc), intent(inout) :: y
integer(psb_ipk_), intent(out) :: info
- end subroutine psb_d_oacc_mlt_v
+ end subroutine d_oacc_mlt_v
end interface
interface
- module subroutine psb_d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
+ subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
+ import
implicit none
real(psb_dpk_), intent(in) :: alpha, beta
class(psb_d_base_vect_type), intent(inout) :: x
@@ -81,7 +81,7 @@ module psb_d_oacc_vect_mod
class(psb_d_vect_oacc), intent(inout) :: z
integer(psb_ipk_), intent(out) :: info
character(len=1), intent(in), optional :: conjgx, conjgy
- end subroutine psb_d_oacc_mlt_v_2
+ end subroutine d_oacc_mlt_v_2
end interface
contains
@@ -151,7 +151,7 @@ contains
sum = 0.0
!$acc parallel loop reduction(+:sum)
do i = 1, n
- sum = sum + x%v(i) * x%v(i)
+ sum = sum + abs(x%v(i))**2
end do
res = sqrt(sum)
end function d_oacc_nrm2
@@ -169,7 +169,7 @@ contains
max_val = -huge(0.0)
!$acc parallel loop reduction(max:max_val)
do i = 1, n
- if (x%v(i) > max_val) max_val = x%v(i)
+ if (abs(x%v(i)) > max_val) max_val = abs(x%v(i))
end do
res = max_val
end function d_oacc_amax
@@ -923,41 +923,13 @@ contains
end subroutine d_oacc_vect_free
- function oacc_get_size(x) result(res)
+ function d_oacc_get_size(x) result(res)
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: res
if (x%is_dev()) call x%sync()
res = size(x%v)
- end function oacc_get_size
-
-!!$
-!!$ subroutine initialize(N)
-!!$ integer(psb_ipk_) :: N
-!!$ integer(psb_ipk_) :: i
-!!$ allocate(v1(N),v2(N),p(N))
-!!$ !$acc enter data create(v1,v2,p)
-!!$ !$acc parallel
-!!$ !$acc loop
-!!$ do i=1,n
-!!$ v1(i) = i
-!!$ v2(i) = n+i
-!!$ end do
-!!$ !$acc end parallel
-!!$ end subroutine initialize
-!!$ subroutine finalize_dev()
-!!$ !$acc exit data delete(v1,v2,p)
-!!$ end subroutine finalize_dev
-!!$ subroutine finalize_host()
-!!$ deallocate(v1,v2,p)
-!!$ end subroutine finalize_host
-!!$ subroutine to_dev()
-!!$ !$acc update device(v1,v2)
-!!$ end subroutine to_dev
-!!$ subroutine to_host()
-!!$ !$acc update self(v1,v2)
-!!$ end subroutine to_host
-!!$
+ end function d_oacc_get_size
end module psb_d_oacc_vect_mod
diff --git a/openacc/psb_s_oacc_csr_mat_mod.F90 b/openacc/psb_s_oacc_csr_mat_mod.F90
new file mode 100644
index 00000000..89b10d08
--- /dev/null
+++ b/openacc/psb_s_oacc_csr_mat_mod.F90
@@ -0,0 +1,343 @@
+module psb_s_oacc_csr_mat_mod
+
+ use iso_c_binding
+ use psb_s_mat_mod
+ use psb_s_oacc_vect_mod
+ !use oaccsparse_mod
+
+ integer(psb_ipk_), parameter, private :: is_host = -1
+ integer(psb_ipk_), parameter, private :: is_sync = 0
+ integer(psb_ipk_), parameter, private :: is_dev = 1
+
+ type, extends(psb_s_csr_sparse_mat) :: psb_s_oacc_csr_sparse_mat
+ integer(psb_ipk_) :: devstate = is_host
+ contains
+ procedure, nopass :: get_fmt => s_oacc_csr_get_fmt
+ procedure, pass(a) :: sizeof => s_oacc_csr_sizeof
+ procedure, pass(a) :: vect_mv => psb_s_oacc_csr_vect_mv
+ procedure, pass(a) :: in_vect_sv => psb_s_oacc_csr_inner_vect_sv
+ procedure, pass(a) :: csmm => psb_s_oacc_csr_csmm
+ procedure, pass(a) :: csmv => psb_s_oacc_csr_csmv
+ procedure, pass(a) :: scals => psb_s_oacc_csr_scals
+ procedure, pass(a) :: scalv => psb_s_oacc_csr_scal
+ procedure, pass(a) :: reallocate_nz => psb_s_oacc_csr_reallocate_nz
+ procedure, pass(a) :: allocate_mnnz => psb_s_oacc_csr_allocate_mnnz
+ procedure, pass(a) :: cp_from_coo => psb_s_oacc_csr_cp_from_coo
+ procedure, pass(a) :: cp_from_fmt => psb_s_oacc_csr_cp_from_fmt
+ procedure, pass(a) :: mv_from_coo => psb_s_oacc_csr_mv_from_coo
+ procedure, pass(a) :: mv_from_fmt => psb_s_oacc_csr_mv_from_fmt
+ procedure, pass(a) :: free => s_oacc_csr_free
+ procedure, pass(a) :: mold => psb_s_oacc_csr_mold
+ procedure, pass(a) :: all => s_oacc_csr_all
+ procedure, pass(a) :: is_host => s_oacc_csr_is_host
+ procedure, pass(a) :: is_sync => s_oacc_csr_is_sync
+ procedure, pass(a) :: is_dev => s_oacc_csr_is_dev
+ procedure, pass(a) :: set_host => s_oacc_csr_set_host
+ procedure, pass(a) :: set_sync => s_oacc_csr_set_sync
+ procedure, pass(a) :: set_dev => s_oacc_csr_set_dev
+ procedure, pass(a) :: sync_space => s_oacc_csr_sync_space
+ procedure, pass(a) :: sync => s_oacc_csr_sync
+ end type psb_s_oacc_csr_sparse_mat
+
+ interface
+ module subroutine psb_s_oacc_csr_mold(a,b,info)
+ class(psb_s_oacc_csr_sparse_mat), intent(in) :: a
+ class(psb_s_base_sparse_mat), intent(inout), allocatable :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_s_oacc_csr_mold
+ end interface
+
+ interface
+ module subroutine psb_s_oacc_csr_cp_from_fmt(a,b,info)
+ class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
+ class(psb_s_base_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_s_oacc_csr_cp_from_fmt
+ end interface
+
+ interface
+ module subroutine psb_s_oacc_csr_mv_from_coo(a,b,info)
+ class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
+ class(psb_s_coo_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_s_oacc_csr_mv_from_coo
+ end interface
+
+ interface
+ module subroutine psb_s_oacc_csr_mv_from_fmt(a,b,info)
+ class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
+ class(psb_s_base_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_s_oacc_csr_mv_from_fmt
+ end interface
+
+ interface
+ module subroutine psb_s_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans)
+ class(psb_s_oacc_csr_sparse_mat), intent(in) :: a
+ real(psb_spk_), intent(in) :: alpha, beta
+ class(psb_s_base_vect_type), intent(inout) :: x, y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_s_oacc_csr_vect_mv
+ end interface
+
+ interface
+ module subroutine psb_s_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans)
+ class(psb_s_oacc_csr_sparse_mat), intent(in) :: a
+ real(psb_spk_), intent(in) :: alpha, beta
+ class(psb_s_base_vect_type), intent(inout) :: x,y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_s_oacc_csr_inner_vect_sv
+ end interface
+
+ interface
+ module subroutine psb_s_oacc_csr_csmm(alpha, a, x, beta, y, info, trans)
+ class(psb_s_oacc_csr_sparse_mat), intent(in) :: a
+ real(psb_spk_), intent(in) :: alpha, beta, x(:,:)
+ real(psb_spk_), intent(inout) :: y(:,:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_s_oacc_csr_csmm
+ end interface
+
+ interface
+ module subroutine psb_s_oacc_csr_csmv(alpha, a, x, beta, y, info, trans)
+ class(psb_s_oacc_csr_sparse_mat), intent(in) :: a
+ real(psb_spk_), intent(in) :: alpha, beta, x(:)
+ real(psb_spk_), intent(inout) :: y(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_s_oacc_csr_csmv
+ end interface
+
+ interface
+ module subroutine psb_s_oacc_csr_scals(d, a, info)
+ class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
+ real(psb_spk_), intent(in) :: d
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_s_oacc_csr_scals
+ end interface
+
+ interface
+ module subroutine psb_s_oacc_csr_scal(d,a,info,side)
+ class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
+ real(psb_spk_), intent(in) :: d(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: side
+ end subroutine psb_s_oacc_csr_scal
+ end interface
+
+ interface
+ module subroutine psb_s_oacc_csr_reallocate_nz(nz,a)
+ class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_), intent(in) :: nz
+ end subroutine psb_s_oacc_csr_reallocate_nz
+ end interface
+
+ interface
+ module subroutine psb_s_oacc_csr_allocate_mnnz(m,n,a,nz)
+ class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_), intent(in) :: m,n
+ integer(psb_ipk_), intent(in), optional :: nz
+ end subroutine psb_s_oacc_csr_allocate_mnnz
+ end interface
+
+ interface
+ module subroutine psb_s_oacc_csr_cp_from_coo(a,b,info)
+ class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
+ class(psb_s_coo_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_s_oacc_csr_cp_from_coo
+ end interface
+
+contains
+
+
+ subroutine s_oacc_csr_free(a)
+ use psb_base_mod
+ implicit none
+ class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_) :: info
+
+ if (allocated(a%val)) then
+ !$acc exit data delete(a%val)
+ end if
+ if (allocated(a%ja)) then
+ !$acc exit data delete(a%ja)
+ end if
+ if (allocated(a%irp)) then
+ !$acc exit data delete(a%irp)
+ end if
+
+ call a%psb_s_csr_sparse_mat%free()
+
+ return
+ end subroutine s_oacc_csr_free
+
+ function s_oacc_csr_sizeof(a) result(res)
+ implicit none
+ class(psb_s_oacc_csr_sparse_mat), intent(in) :: a
+ integer(psb_epk_) :: res
+
+ if (a%is_dev()) call a%sync()
+
+ res = 8
+ res = res + psb_sizeof_sp * size(a%val)
+ res = res + psb_sizeof_ip * size(a%irp)
+ res = res + psb_sizeof_ip * size(a%ja)
+
+ end function s_oacc_csr_sizeof
+
+
+ function s_oacc_csr_get_fmt() result(res)
+ implicit none
+ character(len=5) :: res
+ res = 'CSR_oacc'
+ end function s_oacc_csr_get_fmt
+
+ subroutine s_oacc_csr_all(m, n, nz, a, info)
+ implicit none
+ integer(psb_ipk_), intent(in) :: m, n, nz
+ class(psb_s_oacc_csr_sparse_mat), intent(out) :: a
+ integer(psb_ipk_), intent(out) :: info
+
+ info = 0
+ if (allocated(a%val)) then
+ !$acc exit data delete(a%val) finalize
+ deallocate(a%val, stat=info)
+ end if
+ if (allocated(a%ja)) then
+ !$acc exit data delete(a%ja) finalize
+ deallocate(a%ja, stat=info)
+ end if
+ if (allocated(a%irp)) then
+ !$acc exit data delete(a%irp) finalize
+ deallocate(a%irp, stat=info)
+ end if
+
+ call a%set_nrows(m)
+ call a%set_ncols(n)
+
+ allocate(a%val(nz),stat=info)
+ allocate(a%ja(nz),stat=info)
+ allocate(a%irp(m+1),stat=info)
+ if (info == 0) call a%set_host()
+ if (info == 0) call a%sync_space()
+ end subroutine s_oacc_csr_all
+
+ function s_oacc_csr_is_host(a) result(res)
+ implicit none
+ class(psb_s_oacc_csr_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_host)
+ end function s_oacc_csr_is_host
+
+ function s_oacc_csr_is_sync(a) result(res)
+ implicit none
+ class(psb_s_oacc_csr_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_sync)
+ end function s_oacc_csr_is_sync
+
+ function s_oacc_csr_is_dev(a) result(res)
+ implicit none
+ class(psb_s_oacc_csr_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_dev)
+ end function s_oacc_csr_is_dev
+
+ subroutine s_oacc_csr_set_host(a)
+ implicit none
+ class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_host
+ end subroutine s_oacc_csr_set_host
+
+ subroutine s_oacc_csr_set_sync(a)
+ implicit none
+ class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_sync
+ end subroutine s_oacc_csr_set_sync
+
+ subroutine s_oacc_csr_set_dev(a)
+ implicit none
+ class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_dev
+ end subroutine s_oacc_csr_set_dev
+
+ subroutine s_oacc_csr_sync_space(a)
+ implicit none
+ class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
+ if (allocated(a%val)) then
+ call s_oacc_create_dev(a%val)
+ end if
+ if (allocated(a%ja)) then
+ call i_oacc_create_dev(a%ja)
+ end if
+ if (allocated(a%irp)) then
+ call i_oacc_create_dev(a%irp)
+ end if
+ contains
+ subroutine s_oacc_create_dev(v)
+ implicit none
+ real(psb_spk_), intent(in) :: v(:)
+ !$acc enter data copyin(v)
+ end subroutine s_oacc_create_dev
+ subroutine i_oacc_create_dev(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc enter data copyin(v)
+ end subroutine i_oacc_create_dev
+ end subroutine s_oacc_csr_sync_space
+
+ subroutine s_oacc_csr_sync(a)
+ implicit none
+ class(psb_s_oacc_csr_sparse_mat), target, intent(in) :: a
+ class(psb_s_oacc_csr_sparse_mat), pointer :: tmpa
+ integer(psb_ipk_) :: info
+
+ tmpa => a
+ if (a%is_dev()) then
+ call s_oacc_csr_to_host(a%val)
+ call i_oacc_csr_to_host(a%ja)
+ call i_oacc_csr_to_host(a%irp)
+ else if (a%is_host()) then
+ call s_oacc_csr_to_dev(a%val)
+ call i_oacc_csr_to_dev(a%ja)
+ call i_oacc_csr_to_dev(a%irp)
+ end if
+ call tmpa%set_sync()
+ end subroutine s_oacc_csr_sync
+
+ subroutine s_oacc_csr_to_dev(v)
+ implicit none
+ real(psb_spk_), intent(in) :: v(:)
+ !$acc update device(v)
+ end subroutine s_oacc_csr_to_dev
+
+ subroutine s_oacc_csr_to_host(v)
+ implicit none
+ real(psb_spk_), intent(in) :: v(:)
+ !$acc update self(v)
+ end subroutine s_oacc_csr_to_host
+
+ subroutine i_oacc_csr_to_dev(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc update device(v)
+ end subroutine i_oacc_csr_to_dev
+
+ subroutine i_oacc_csr_to_host(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc update self(v)
+ end subroutine i_oacc_csr_to_host
+
+end module psb_s_oacc_csr_mat_mod
+
diff --git a/openacc/psb_s_oacc_vect_mod.F90 b/openacc/psb_s_oacc_vect_mod.F90
new file mode 100644
index 00000000..36ae7da8
--- /dev/null
+++ b/openacc/psb_s_oacc_vect_mod.F90
@@ -0,0 +1,935 @@
+module psb_s_oacc_vect_mod
+ use iso_c_binding
+ use psb_const_mod
+ use psb_error_mod
+ use psb_s_vect_mod
+ use psb_i_vect_mod
+ use psb_i_oacc_vect_mod
+
+ integer(psb_ipk_), parameter, private :: is_host = -1
+ integer(psb_ipk_), parameter, private :: is_sync = 0
+ integer(psb_ipk_), parameter, private :: is_dev = 1
+
+ type, extends(psb_s_base_vect_type) :: psb_s_vect_oacc
+ integer :: state = is_host
+
+ contains
+ procedure, pass(x) :: get_nrows => s_oacc_get_nrows
+ procedure, nopass :: get_fmt => s_oacc_get_fmt
+
+ procedure, pass(x) :: all => s_oacc_vect_all
+ procedure, pass(x) :: zero => s_oacc_zero
+ procedure, pass(x) :: asb_m => s_oacc_asb_m
+ procedure, pass(x) :: sync => s_oacc_sync
+ procedure, pass(x) :: sync_space => s_oacc_sync_space
+ procedure, pass(x) :: bld_x => s_oacc_bld_x
+ procedure, pass(x) :: bld_mn => s_oacc_bld_mn
+ procedure, pass(x) :: free => s_oacc_vect_free
+ procedure, pass(x) :: ins_a => s_oacc_ins_a
+ procedure, pass(x) :: ins_v => s_oacc_ins_v
+ procedure, pass(x) :: is_host => s_oacc_is_host
+ procedure, pass(x) :: is_dev => s_oacc_is_dev
+ procedure, pass(x) :: is_sync => s_oacc_is_sync
+ procedure, pass(x) :: set_host => s_oacc_set_host
+ procedure, pass(x) :: set_dev => s_oacc_set_dev
+ procedure, pass(x) :: set_sync => s_oacc_set_sync
+ procedure, pass(x) :: set_scal => s_oacc_set_scal
+
+ procedure, pass(x) :: gthzv_x => s_oacc_gthzv_x
+ procedure, pass(x) :: gthzbuf_x => s_oacc_gthzbuf
+ procedure, pass(y) :: sctb => s_oacc_sctb
+ procedure, pass(y) :: sctb_x => s_oacc_sctb_x
+ procedure, pass(y) :: sctb_buf => s_oacc_sctb_buf
+
+ procedure, pass(x) :: get_size => s_oacc_get_size
+ procedure, pass(x) :: dot_v => s_oacc_vect_dot
+ procedure, pass(x) :: dot_a => s_oacc_dot_a
+ procedure, pass(y) :: axpby_v => s_oacc_axpby_v
+ procedure, pass(y) :: axpby_a => s_oacc_axpby_a
+ procedure, pass(z) :: abgdxyz => s_oacc_abgdxyz
+ procedure, pass(y) :: mlt_a => s_oacc_mlt_a
+ procedure, pass(z) :: mlt_a_2 => s_oacc_mlt_a_2
+ procedure, pass(y) :: mlt_v => s_oacc_mlt_v
+ procedure, pass(z) :: mlt_v_2 => s_oacc_mlt_v_2
+ procedure, pass(x) :: scal => s_oacc_scal
+ procedure, pass(x) :: nrm2 => s_oacc_nrm2
+ procedure, pass(x) :: amax => s_oacc_amax
+ procedure, pass(x) :: asum => s_oacc_asum
+ procedure, pass(x) :: absval1 => s_oacc_absval1
+ procedure, pass(x) :: absval2 => s_oacc_absval2
+
+ end type psb_s_vect_oacc
+
+ interface
+ subroutine s_oacc_mlt_v(x, y, info)
+ import
+ implicit none
+ class(psb_s_base_vect_type), intent(inout) :: x
+ class(psb_s_vect_oacc), intent(inout) :: y
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine s_oacc_mlt_v
+ end interface
+
+
+ interface
+ subroutine s_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
+ import
+ implicit none
+ real(psb_spk_), intent(in) :: alpha, beta
+ class(psb_s_base_vect_type), intent(inout) :: x
+ class(psb_s_base_vect_type), intent(inout) :: y
+ class(psb_s_vect_oacc), intent(inout) :: z
+ integer(psb_ipk_), intent(out) :: info
+ character(len=1), intent(in), optional :: conjgx, conjgy
+ end subroutine s_oacc_mlt_v_2
+ end interface
+
+contains
+
+ subroutine s_oacc_absval1(x)
+ implicit none
+ class(psb_s_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_) :: n, i
+
+ if (x%is_host()) call x%sync_space()
+ n = size(x%v)
+ !$acc parallel loop
+ do i = 1, n
+ x%v(i) = abs(x%v(i))
+ end do
+ call x%set_dev()
+ end subroutine s_oacc_absval1
+
+ subroutine s_oacc_absval2(x, y)
+ implicit none
+ class(psb_s_vect_oacc), intent(inout) :: x
+ class(psb_s_base_vect_type), intent(inout) :: y
+ integer(psb_ipk_) :: n
+ integer(psb_ipk_) :: i
+
+ n = min(size(x%v), size(y%v))
+ select type (yy => y)
+ class is (psb_s_vect_oacc)
+ if (x%is_host()) call x%sync()
+ if (yy%is_host()) call yy%sync()
+ !$acc parallel loop
+ do i = 1, n
+ yy%v(i) = abs(x%v(i))
+ end do
+ class default
+ if (x%is_dev()) call x%sync()
+ if (y%is_dev()) call y%sync()
+ call x%psb_s_base_vect_type%absval(y)
+ end select
+ end subroutine s_oacc_absval2
+
+ subroutine s_oacc_scal(alpha, x)
+ implicit none
+ class(psb_s_vect_oacc), intent(inout) :: x
+ real(psb_spk_), intent(in) :: alpha
+ integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: i
+
+ if (x%is_host()) call x%sync_space()
+ !$acc parallel loop
+ do i = 1, size(x%v)
+ x%v(i) = alpha * x%v(i)
+ end do
+ call x%set_dev()
+ end subroutine s_oacc_scal
+
+ function s_oacc_nrm2(n, x) result(res)
+ implicit none
+ class(psb_s_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(in) :: n
+ real(psb_spk_) :: res
+ integer(psb_ipk_) :: info
+ real(psb_spk_) :: sum
+ integer(psb_ipk_) :: i
+
+ if (x%is_host()) call x%sync_space()
+ sum = 0.0
+ !$acc parallel loop reduction(+:sum)
+ do i = 1, n
+ sum = sum + abs(x%v(i))**2
+ end do
+ res = sqrt(sum)
+ end function s_oacc_nrm2
+
+ function s_oacc_amax(n, x) result(res)
+ implicit none
+ class(psb_s_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(in) :: n
+ real(psb_spk_) :: res
+ integer(psb_ipk_) :: info
+ real(psb_spk_) :: max_val
+ integer(psb_ipk_) :: i
+
+ if (x%is_host()) call x%sync_space()
+ max_val = -huge(0.0)
+ !$acc parallel loop reduction(max:max_val)
+ do i = 1, n
+ if (abs(x%v(i)) > max_val) max_val = abs(x%v(i))
+ end do
+ res = max_val
+ end function s_oacc_amax
+
+ function s_oacc_asum(n, x) result(res)
+ implicit none
+ class(psb_s_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(in) :: n
+ real(psb_spk_) :: res
+ integer(psb_ipk_) :: info
+ real(psb_spk_) :: sum
+ integer(psb_ipk_) :: i
+
+ if (x%is_host()) call x%sync_space()
+ sum = 0.0
+ !$acc parallel loop reduction(+:sum)
+ do i = 1, n
+ sum = sum + abs(x%v(i))
+ end do
+ res = sum
+ end function s_oacc_asum
+
+
+ subroutine s_oacc_mlt_a(x, y, info)
+ implicit none
+ real(psb_spk_), intent(in) :: x(:)
+ class(psb_s_vect_oacc), intent(inout) :: y
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: i, n
+
+ info = 0
+ if (y%is_dev()) call y%sync_space()
+ !$acc parallel loop
+ do i = 1, size(x)
+ y%v(i) = y%v(i) * x(i)
+ end do
+ call y%set_host()
+ end subroutine s_oacc_mlt_a
+
+ subroutine s_oacc_mlt_a_2(alpha, x, y, beta, z, info)
+ implicit none
+ real(psb_spk_), intent(in) :: alpha, beta
+ real(psb_spk_), intent(in) :: x(:)
+ real(psb_spk_), intent(in) :: y(:)
+ class(psb_s_vect_oacc), intent(inout) :: z
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: i, n
+
+ info = 0
+ if (z%is_dev()) call z%sync_space()
+ !$acc parallel loop
+ do i = 1, size(x)
+ z%v(i) = alpha * x(i) * y(i) + beta * z%v(i)
+ end do
+ call z%set_host()
+ end subroutine s_oacc_mlt_a_2
+
+
+!!$ subroutine s_oacc_mlt_v(x, y, info)
+!!$ implicit none
+!!$ class(psb_s_base_vect_type), intent(inout) :: x
+!!$ class(psb_s_vect_oacc), intent(inout) :: y
+!!$ integer(psb_ipk_), intent(out) :: info
+!!$
+!!$ integer(psb_ipk_) :: i, n
+!!$
+!!$ info = 0
+!!$ n = min(x%get_nrows(), y%get_nrows())
+!!$ select type(xx => x)
+!!$ type is (psb_s_base_vect_type)
+!!$ if (y%is_dev()) call y%sync()
+!!$ !$acc parallel loop
+!!$ do i = 1, n
+!!$ y%v(i) = y%v(i) * xx%v(i)
+!!$ end do
+!!$ call y%set_host()
+!!$ class default
+!!$ if (xx%is_dev()) call xx%sync()
+!!$ if (y%is_dev()) call y%sync()
+!!$ !$acc parallel loop
+!!$ do i = 1, n
+!!$ y%v(i) = y%v(i) * xx%v(i)
+!!$ end do
+!!$ call y%set_host()
+!!$ end select
+!!$ end subroutine s_oacc_mlt_v
+!!$
+!!$ subroutine s_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
+!!$ use psi_serial_mod
+!!$ use psb_string_mod
+!!$ implicit none
+!!$ real(psb_spk_), intent(in) :: alpha, beta
+!!$ class(psb_s_base_vect_type), intent(inout) :: x
+!!$ class(psb_s_base_vect_type), intent(inout) :: y
+!!$ class(psb_s_vect_oacc), intent(inout) :: z
+!!$ integer(psb_ipk_), intent(out) :: info
+!!$ character(len=1), intent(in), optional :: conjgx, conjgy
+!!$ integer(psb_ipk_) :: i, n
+!!$ logical :: conjgx_, conjgy_
+!!$
+!!$ conjgx_ = .false.
+!!$ conjgy_ = .false.
+!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C')
+!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C')
+!!$
+!!$ n = min(x%get_nrows(), y%get_nrows(), z%get_nrows())
+!!$
+!!$ info = 0
+!!$ select type(xx => x)
+!!$ class is (psb_s_vect_oacc)
+!!$ select type (yy => y)
+!!$ class is (psb_s_vect_oacc)
+!!$ if (xx%is_host()) call xx%sync_space()
+!!$ if (yy%is_host()) call yy%sync_space()
+!!$ if ((beta /= szero) .and. (z%is_host())) call z%sync_space()
+!!$ !$acc parallel loop
+!!$ do i = 1, n
+!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
+!!$ end do
+!!$ call z%set_dev()
+!!$ class default
+!!$ if (xx%is_dev()) call xx%sync_space()
+!!$ if (yy%is_dev()) call yy%sync()
+!!$ if ((beta /= szero) .and. (z%is_dev())) call z%sync_space()
+!!$ !$acc parallel loop
+!!$ do i = 1, n
+!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
+!!$ end do
+!!$ call z%set_host()
+!!$ end select
+!!$ class default
+!!$ if (x%is_dev()) call x%sync()
+!!$ if (y%is_dev()) call y%sync()
+!!$ if ((beta /= szero) .and. (z%is_dev())) call z%sync_space()
+!!$ !$acc parallel loop
+!!$ do i = 1, n
+!!$ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i)
+!!$ end do
+!!$ call z%set_host()
+!!$ end select
+!!$ end subroutine s_oacc_mlt_v_2
+
+
+ subroutine s_oacc_axpby_v(m, alpha, x, beta, y, info)
+ !use psi_serial_mod
+ implicit none
+ integer(psb_ipk_), intent(in) :: m
+ class(psb_s_base_vect_type), intent(inout) :: x
+ class(psb_s_vect_oacc), intent(inout) :: y
+ real(psb_spk_), intent(in) :: alpha, beta
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: nx, ny, i
+
+ info = psb_success_
+
+ select type(xx => x)
+ type is (psb_s_vect_oacc)
+ if ((beta /= szero) .and. y%is_host()) call y%sync_space()
+ if (xx%is_host()) call xx%sync_space()
+ nx = size(xx%v)
+ ny = size(y%v)
+ if ((nx < m) .or. (ny < m)) then
+ info = psb_err_internal_error_
+ else
+ !$acc parallel loop
+ do i = 1, m
+ y%v(i) = alpha * xx%v(i) + beta * y%v(i)
+ end do
+ end if
+ call y%set_dev()
+ class default
+ if ((alpha /= szero) .and. (x%is_dev())) call x%sync()
+ call y%axpby(m, alpha, x%v, beta, info)
+ end select
+ end subroutine s_oacc_axpby_v
+
+ subroutine s_oacc_axpby_a(m, alpha, x, beta, y, info)
+ !use psi_serial_mod
+ implicit none
+ integer(psb_ipk_), intent(in) :: m
+ real(psb_spk_), intent(in) :: x(:)
+ class(psb_s_vect_oacc), intent(inout) :: y
+ real(psb_spk_), intent(in) :: alpha, beta
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: i
+
+ if ((beta /= szero) .and. (y%is_dev())) call y%sync_space()
+ !$acc parallel loop
+ do i = 1, m
+ y%v(i) = alpha * x(i) + beta * y%v(i)
+ end do
+ call y%set_host()
+ end subroutine s_oacc_axpby_a
+
+ subroutine s_oacc_abgdxyz(m, alpha, beta, gamma, delta, x, y, z, info)
+ use psi_serial_mod
+ implicit none
+ integer(psb_ipk_), intent(in) :: m
+ class(psb_s_base_vect_type), intent(inout) :: x
+ class(psb_s_base_vect_type), intent(inout) :: y
+ class(psb_s_vect_oacc), intent(inout) :: z
+ real(psb_spk_), intent(in) :: alpha, beta, gamma, delta
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: nx, ny, nz, i
+ logical :: gpu_done
+
+ info = psb_success_
+ gpu_done = .false.
+
+ select type(xx => x)
+ class is (psb_s_vect_oacc)
+ select type(yy => y)
+ class is (psb_s_vect_oacc)
+ select type(zz => z)
+ class is (psb_s_vect_oacc)
+ if ((beta /= szero) .and. yy%is_host()) call yy%sync_space()
+ if ((delta /= szero) .and. zz%is_host()) call zz%sync_space()
+ if (xx%is_host()) call xx%sync_space()
+ nx = size(xx%v)
+ ny = size(yy%v)
+ nz = size(zz%v)
+ if ((nx < m) .or. (ny < m) .or. (nz < m)) then
+ info = psb_err_internal_error_
+ else
+ !$acc parallel loop
+ do i = 1, m
+ yy%v(i) = alpha * xx%v(i) + beta * yy%v(i)
+ zz%v(i) = gamma * yy%v(i) + delta * zz%v(i)
+ end do
+ end if
+ call yy%set_dev()
+ call zz%set_dev()
+ gpu_done = .true.
+ end select
+ end select
+ end select
+
+ if (.not. gpu_done) then
+ if (x%is_host()) call x%sync()
+ if (y%is_host()) call y%sync()
+ if (z%is_host()) call z%sync()
+ call y%axpby(m, alpha, x, beta, info)
+ call z%axpby(m, gamma, y, delta, info)
+ end if
+ end subroutine s_oacc_abgdxyz
+
+ subroutine s_oacc_sctb_buf(i, n, idx, beta, y)
+ use psb_base_mod
+ implicit none
+ integer(psb_ipk_) :: i, n
+ class(psb_i_base_vect_type) :: idx
+ real(psb_spk_) :: beta
+ class(psb_s_vect_oacc) :: y
+ integer(psb_ipk_) :: info
+
+ if (.not.allocated(y%combuf)) then
+ call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
+ return
+ end if
+
+ select type(ii => idx)
+ class is (psb_i_vect_oacc)
+ if (ii%is_host()) call ii%sync_space(info)
+ if (y%is_host()) call y%sync_space()
+
+ !$acc parallel loop
+ do i = 1, n
+ y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i)
+ end do
+
+ class default
+ !$acc parallel loop
+ do i = 1, n
+ y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i)
+ end do
+ end select
+ end subroutine s_oacc_sctb_buf
+
+ subroutine s_oacc_sctb_x(i, n, idx, x, beta, y)
+ use psb_base_mod
+ implicit none
+ integer(psb_ipk_):: i, n
+ class(psb_i_base_vect_type) :: idx
+ real(psb_spk_) :: beta, x(:)
+ class(psb_s_vect_oacc) :: y
+ integer(psb_ipk_) :: info, ni
+
+ select type(ii => idx)
+ class is (psb_i_vect_oacc)
+ if (ii%is_host()) call ii%sync_space(info)
+ class default
+ call psb_errpush(info, 's_oacc_sctb_x')
+ return
+ end select
+
+ if (y%is_host()) call y%sync_space()
+
+ !$acc parallel loop
+ do i = 1, n
+ y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i)
+ end do
+
+ call y%set_dev()
+ end subroutine s_oacc_sctb_x
+
+
+
+ subroutine s_oacc_sctb(n, idx, x, beta, y)
+ use psb_base_mod
+ implicit none
+ integer(psb_ipk_) :: n
+ integer(psb_ipk_) :: idx(:)
+ real(psb_spk_) :: beta, x(:)
+ class(psb_s_vect_oacc) :: y
+ integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: i
+
+ if (n == 0) return
+ if (y%is_dev()) call y%sync_space()
+
+ !$acc parallel loop
+ do i = 1, n
+ y%v(idx(i)) = beta * y%v(idx(i)) + x(i)
+ end do
+
+ call y%set_host()
+ end subroutine s_oacc_sctb
+
+
+ subroutine s_oacc_gthzbuf(i, n, idx, x)
+ use psb_base_mod
+ implicit none
+ integer(psb_ipk_) :: i, n
+ class(psb_i_base_vect_type) :: idx
+ class(psb_s_vect_oacc) :: x
+ integer(psb_ipk_) :: info
+
+ info = 0
+ if (.not.allocated(x%combuf)) then
+ call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
+ return
+ end if
+
+ select type(ii => idx)
+ class is (psb_i_vect_oacc)
+ if (ii%is_host()) call ii%sync_space(info)
+ class default
+ call psb_errpush(info, 's_oacc_gthzbuf')
+ return
+ end select
+
+ if (x%is_host()) call x%sync_space()
+
+ !$acc parallel loop
+ do i = 1, n
+ x%combuf(i) = x%v(idx%v(i))
+ end do
+ end subroutine s_oacc_gthzbuf
+
+ subroutine s_oacc_gthzv_x(i, n, idx, x, y)
+ use psb_base_mod
+ implicit none
+ integer(psb_ipk_) :: i, n
+ class(psb_i_base_vect_type):: idx
+ real(psb_spk_) :: y(:)
+ class(psb_s_vect_oacc):: x
+ integer(psb_ipk_) :: info
+
+ info = 0
+
+ select type(ii => idx)
+ class is (psb_i_vect_oacc)
+ if (ii%is_host()) call ii%sync_space(info)
+ class default
+ call psb_errpush(info, 's_oacc_gthzv_x')
+ return
+ end select
+
+ if (x%is_host()) call x%sync_space()
+
+ !$acc parallel loop
+ do i = 1, n
+ y(i) = x%v(idx%v(i))
+ end do
+ end subroutine s_oacc_gthzv_x
+
+ subroutine s_oacc_ins_v(n, irl, val, dupl, x, info)
+ use psi_serial_mod
+ implicit none
+ class(psb_s_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(in) :: n, dupl
+ class(psb_i_base_vect_type), intent(inout) :: irl
+ class(psb_s_base_vect_type), intent(inout) :: val
+ integer(psb_ipk_), intent(out) :: info
+
+ integer(psb_ipk_) :: i, isz
+ logical :: done_oacc
+
+ info = 0
+ if (psb_errstatus_fatal()) return
+
+ done_oacc = .false.
+ select type(virl => irl)
+ type is (psb_i_vect_oacc)
+ select type(vval => val)
+ type is (psb_s_vect_oacc)
+ if (vval%is_host()) call vval%sync_space()
+ if (virl%is_host()) call virl%sync_space(info)
+ if (x%is_host()) call x%sync_space()
+ !$acc parallel loop
+ do i = 1, n
+ x%v(virl%v(i)) = vval%v(i)
+ end do
+ call x%set_dev()
+ done_oacc = .true.
+ end select
+ end select
+
+ if (.not.done_oacc) then
+ select type(virl => irl)
+ type is (psb_i_vect_oacc)
+ if (virl%is_dev()) call virl%sync_space(info)
+ end select
+ select type(vval => val)
+ type is (psb_s_vect_oacc)
+ if (vval%is_dev()) call vval%sync_space()
+ end select
+ call x%ins(n, irl%v, val%v, dupl, info)
+ end if
+
+ if (info /= 0) then
+ call psb_errpush(info, 'oacc_vect_ins')
+ return
+ end if
+
+ end subroutine s_oacc_ins_v
+
+
+
+ subroutine s_oacc_ins_a(n, irl, val, dupl, x, info)
+ use psi_serial_mod
+ implicit none
+ class(psb_s_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(in) :: n, dupl
+ integer(psb_ipk_), intent(in) :: irl(:)
+ real(psb_spk_), intent(in) :: val(:)
+ integer(psb_ipk_), intent(out) :: info
+
+ integer(psb_ipk_) :: i
+
+ info = 0
+ if (x%is_dev()) call x%sync_space()
+ call x%psb_s_base_vect_type%ins(n, irl, val, dupl, info)
+ call x%set_host()
+ !$acc update device(x%v)
+
+ end subroutine s_oacc_ins_a
+
+
+
+ subroutine s_oacc_bld_mn(x, n)
+ use psb_base_mod
+ implicit none
+ integer(psb_mpk_), intent(in) :: n
+ class(psb_s_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_) :: info
+
+ call x%all(n, info)
+ if (info /= 0) then
+ call psb_errpush(info, 's_oacc_bld_mn', i_err=(/n, n, n, n, n/))
+ end if
+ call x%set_host()
+ !$acc update device(x%v)
+
+ end subroutine s_oacc_bld_mn
+
+
+ subroutine s_oacc_bld_x(x, this)
+ use psb_base_mod
+ implicit none
+ real(psb_spk_), intent(in) :: this(:)
+ class(psb_s_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_) :: info
+
+ call psb_realloc(size(this), x%v, info)
+ if (info /= 0) then
+ info = psb_err_alloc_request_
+ call psb_errpush(info, 's_oacc_bld_x', &
+ i_err=(/size(this), izero, izero, izero, izero/))
+ return
+ end if
+
+ x%v(:) = this(:)
+ call x%set_host()
+ !$acc update device(x%v)
+
+ end subroutine s_oacc_bld_x
+
+
+ subroutine s_oacc_asb_m(n, x, info)
+ use psb_base_mod
+ implicit none
+ integer(psb_mpk_), intent(in) :: n
+ class(psb_s_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_mpk_) :: nd
+
+ info = psb_success_
+
+ if (x%is_dev()) then
+ nd = size(x%v)
+ if (nd < n) then
+ call x%sync()
+ call x%psb_s_base_vect_type%asb(n, info)
+ if (info == psb_success_) call x%sync_space()
+ call x%set_host()
+ end if
+ else
+ if (size(x%v) < n) then
+ call x%psb_s_base_vect_type%asb(n, info)
+ if (info == psb_success_) call x%sync_space()
+ call x%set_host()
+ end if
+ end if
+ end subroutine s_oacc_asb_m
+
+
+
+ subroutine s_oacc_set_scal(x, val, first, last)
+ class(psb_s_vect_oacc), intent(inout) :: x
+ real(psb_spk_), intent(in) :: val
+ integer(psb_ipk_), optional :: first, last
+
+ integer(psb_ipk_) :: first_, last_
+ first_ = 1
+ last_ = x%get_nrows()
+ if (present(first)) first_ = max(1, first)
+ if (present(last)) last_ = min(last, last_)
+
+ !$acc parallel loop
+ do i = first_, last_
+ x%v(i) = val
+ end do
+ !$acc end parallel loop
+
+ call x%set_dev()
+ end subroutine s_oacc_set_scal
+
+
+
+ subroutine s_oacc_zero(x)
+ use psi_serial_mod
+ implicit none
+ class(psb_s_vect_oacc), intent(inout) :: x
+ call x%set_dev()
+ call x%set_scal(szero)
+ end subroutine s_oacc_zero
+
+ function s_oacc_get_nrows(x) result(res)
+ implicit none
+ class(psb_s_vect_oacc), intent(in) :: x
+ integer(psb_ipk_) :: res
+
+ if (allocated(x%v)) res = size(x%v)
+ end function s_oacc_get_nrows
+
+ function s_oacc_get_fmt() result(res)
+ implicit none
+ character(len=5) :: res
+ res = "sOACC"
+
+ end function s_oacc_get_fmt
+
+ function s_oacc_vect_dot(n, x, y) result(res)
+ implicit none
+ class(psb_s_vect_oacc), intent(inout) :: x
+ class(psb_s_base_vect_type), intent(inout) :: y
+ integer(psb_ipk_), intent(in) :: n
+ real(psb_spk_) :: res
+ real(psb_spk_), external :: ddot
+ integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: i
+
+ res = szero
+
+ select type(yy => y)
+ type is (psb_s_base_vect_type)
+ if (x%is_dev()) call x%sync()
+ res = ddot(n, x%v, 1, yy%v, 1)
+ type is (psb_s_vect_oacc)
+ if (x%is_host()) call x%sync()
+ if (yy%is_host()) call yy%sync()
+
+ !$acc parallel loop reduction(+:res) present(x%v, yy%v)
+ do i = 1, n
+ res = res + x%v(i) * yy%v(i)
+ end do
+ !$acc end parallel loop
+
+ class default
+ call x%sync()
+ res = y%dot(n, x%v)
+ end select
+
+ end function s_oacc_vect_dot
+
+
+
+
+ function s_oacc_dot_a(n, x, y) result(res)
+ implicit none
+ class(psb_s_vect_oacc), intent(inout) :: x
+ real(psb_spk_), intent(in) :: y(:)
+ integer(psb_ipk_), intent(in) :: n
+ real(psb_spk_) :: res
+ real(psb_spk_), external :: ddot
+
+ if (x%is_dev()) call x%sync()
+ res = ddot(n, y, 1, x%v, 1)
+
+ end function s_oacc_dot_a
+
+ ! subroutine s_oacc_set_vect(x,y)
+ ! implicit none
+ ! class(psb_s_vect_oacc), intent(inout) :: x
+ ! real(psb_spk_), intent(in) :: y(:)
+ ! integer(psb_ipk_) :: info
+
+ ! if (size(x%v) /= size(y)) then
+ ! call x%free(info)
+ ! call x%all(size(y),info)
+ ! end if
+ ! x%v(:) = y(:)
+ ! call x%set_host()
+ ! end subroutine s_oacc_set_vect
+
+ subroutine s_oacc_to_dev(v)
+ implicit none
+ real(psb_spk_) :: v(:)
+ !$acc update device(v)
+ end subroutine s_oacc_to_dev
+
+ subroutine s_oacc_to_host(v)
+ implicit none
+ real(psb_spk_) :: v(:)
+ !$acc update self(v)
+ end subroutine s_oacc_to_host
+
+ subroutine s_oacc_sync_space(x)
+ implicit none
+ class(psb_s_vect_oacc), intent(inout) :: x
+ if (allocated(x%v)) then
+ call s_oacc_create_dev(x%v)
+ end if
+ contains
+ subroutine s_oacc_create_dev(v)
+ implicit none
+ real(psb_spk_) :: v(:)
+ !$acc enter data copyin(v)
+ end subroutine s_oacc_create_dev
+ end subroutine s_oacc_sync_space
+
+ subroutine s_oacc_sync(x)
+ implicit none
+ class(psb_s_vect_oacc), intent(inout) :: x
+ if (x%is_dev()) then
+ call s_oacc_to_host(x%v)
+ end if
+ if (x%is_host()) then
+ call s_oacc_to_dev(x%v)
+ end if
+ call x%set_sync()
+ end subroutine s_oacc_sync
+
+ subroutine s_oacc_set_host(x)
+ implicit none
+ class(psb_s_vect_oacc), intent(inout) :: x
+
+ x%state = is_host
+ end subroutine s_oacc_set_host
+
+ subroutine s_oacc_set_dev(x)
+ implicit none
+ class(psb_s_vect_oacc), intent(inout) :: x
+
+ x%state = is_dev
+ end subroutine s_oacc_set_dev
+
+ subroutine s_oacc_set_sync(x)
+ implicit none
+ class(psb_s_vect_oacc), intent(inout) :: x
+
+ x%state = is_sync
+ end subroutine s_oacc_set_sync
+
+ function s_oacc_is_dev(x) result(res)
+ implicit none
+ class(psb_s_vect_oacc), intent(in) :: x
+ logical :: res
+
+ res = (x%state == is_dev)
+ end function s_oacc_is_dev
+
+ function s_oacc_is_host(x) result(res)
+ implicit none
+ class(psb_s_vect_oacc), intent(in) :: x
+ logical :: res
+
+ res = (x%state == is_host)
+ end function s_oacc_is_host
+
+ function s_oacc_is_sync(x) result(res)
+ implicit none
+ class(psb_s_vect_oacc), intent(in) :: x
+ logical :: res
+
+ res = (x%state == is_sync)
+ end function s_oacc_is_sync
+
+ subroutine s_oacc_vect_all(n, x, info)
+ use psi_serial_mod
+ use psb_realloc_mod
+ implicit none
+ integer(psb_ipk_), intent(in) :: n
+ class(psb_s_vect_oacc), intent(out) :: x
+ integer(psb_ipk_), intent(out) :: info
+
+ call psb_realloc(n, x%v, info)
+ if (info == 0) then
+ call x%set_host()
+ !$acc enter data create(x%v)
+ call x%sync_space()
+ end if
+ if (info /= 0) then
+ info = psb_err_alloc_request_
+ call psb_errpush(info, 's_oacc_all', &
+ i_err=(/n, n, n, n, n/))
+ end if
+ end subroutine s_oacc_vect_all
+
+
+ subroutine s_oacc_vect_free(x, info)
+ implicit none
+ class(psb_s_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(out) :: info
+ info = 0
+ if (allocated(x%v)) then
+ !$acc exit data delete(x%v) finalize
+ deallocate(x%v, stat=info)
+ end if
+
+ end subroutine s_oacc_vect_free
+
+ function s_oacc_get_size(x) result(res)
+ implicit none
+ class(psb_s_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_) :: res
+
+ if (x%is_dev()) call x%sync()
+ res = size(x%v)
+ end function s_oacc_get_size
+
+end module psb_s_oacc_vect_mod
diff --git a/openacc/psb_z_oacc_csr_mat_mod.F90 b/openacc/psb_z_oacc_csr_mat_mod.F90
new file mode 100644
index 00000000..7842d96c
--- /dev/null
+++ b/openacc/psb_z_oacc_csr_mat_mod.F90
@@ -0,0 +1,343 @@
+module psb_z_oacc_csr_mat_mod
+
+ use iso_c_binding
+ use psb_z_mat_mod
+ use psb_z_oacc_vect_mod
+ !use oaccsparse_mod
+
+ integer(psb_ipk_), parameter, private :: is_host = -1
+ integer(psb_ipk_), parameter, private :: is_sync = 0
+ integer(psb_ipk_), parameter, private :: is_dev = 1
+
+ type, extends(psb_z_csr_sparse_mat) :: psb_z_oacc_csr_sparse_mat
+ integer(psb_ipk_) :: devstate = is_host
+ contains
+ procedure, nopass :: get_fmt => z_oacc_csr_get_fmt
+ procedure, pass(a) :: sizeof => z_oacc_csr_sizeof
+ procedure, pass(a) :: vect_mv => psb_z_oacc_csr_vect_mv
+ procedure, pass(a) :: in_vect_sv => psb_z_oacc_csr_inner_vect_sv
+ procedure, pass(a) :: csmm => psb_z_oacc_csr_csmm
+ procedure, pass(a) :: csmv => psb_z_oacc_csr_csmv
+ procedure, pass(a) :: scals => psb_z_oacc_csr_scals
+ procedure, pass(a) :: scalv => psb_z_oacc_csr_scal
+ procedure, pass(a) :: reallocate_nz => psb_z_oacc_csr_reallocate_nz
+ procedure, pass(a) :: allocate_mnnz => psb_z_oacc_csr_allocate_mnnz
+ procedure, pass(a) :: cp_from_coo => psb_z_oacc_csr_cp_from_coo
+ procedure, pass(a) :: cp_from_fmt => psb_z_oacc_csr_cp_from_fmt
+ procedure, pass(a) :: mv_from_coo => psb_z_oacc_csr_mv_from_coo
+ procedure, pass(a) :: mv_from_fmt => psb_z_oacc_csr_mv_from_fmt
+ procedure, pass(a) :: free => z_oacc_csr_free
+ procedure, pass(a) :: mold => psb_z_oacc_csr_mold
+ procedure, pass(a) :: all => z_oacc_csr_all
+ procedure, pass(a) :: is_host => z_oacc_csr_is_host
+ procedure, pass(a) :: is_sync => z_oacc_csr_is_sync
+ procedure, pass(a) :: is_dev => z_oacc_csr_is_dev
+ procedure, pass(a) :: set_host => z_oacc_csr_set_host
+ procedure, pass(a) :: set_sync => z_oacc_csr_set_sync
+ procedure, pass(a) :: set_dev => z_oacc_csr_set_dev
+ procedure, pass(a) :: sync_space => z_oacc_csr_sync_space
+ procedure, pass(a) :: sync => z_oacc_csr_sync
+ end type psb_z_oacc_csr_sparse_mat
+
+ interface
+ module subroutine psb_z_oacc_csr_mold(a,b,info)
+ class(psb_z_oacc_csr_sparse_mat), intent(in) :: a
+ class(psb_z_base_sparse_mat), intent(inout), allocatable :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_z_oacc_csr_mold
+ end interface
+
+ interface
+ module subroutine psb_z_oacc_csr_cp_from_fmt(a,b,info)
+ class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a
+ class(psb_z_base_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_z_oacc_csr_cp_from_fmt
+ end interface
+
+ interface
+ module subroutine psb_z_oacc_csr_mv_from_coo(a,b,info)
+ class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a
+ class(psb_z_coo_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_z_oacc_csr_mv_from_coo
+ end interface
+
+ interface
+ module subroutine psb_z_oacc_csr_mv_from_fmt(a,b,info)
+ class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a
+ class(psb_z_base_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_z_oacc_csr_mv_from_fmt
+ end interface
+
+ interface
+ module subroutine psb_z_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans)
+ class(psb_z_oacc_csr_sparse_mat), intent(in) :: a
+ complex(psb_dpk_), intent(in) :: alpha, beta
+ class(psb_z_base_vect_type), intent(inout) :: x, y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_z_oacc_csr_vect_mv
+ end interface
+
+ interface
+ module subroutine psb_z_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans)
+ class(psb_z_oacc_csr_sparse_mat), intent(in) :: a
+ complex(psb_dpk_), intent(in) :: alpha, beta
+ class(psb_z_base_vect_type), intent(inout) :: x,y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_z_oacc_csr_inner_vect_sv
+ end interface
+
+ interface
+ module subroutine psb_z_oacc_csr_csmm(alpha, a, x, beta, y, info, trans)
+ class(psb_z_oacc_csr_sparse_mat), intent(in) :: a
+ complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:)
+ complex(psb_dpk_), intent(inout) :: y(:,:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_z_oacc_csr_csmm
+ end interface
+
+ interface
+ module subroutine psb_z_oacc_csr_csmv(alpha, a, x, beta, y, info, trans)
+ class(psb_z_oacc_csr_sparse_mat), intent(in) :: a
+ complex(psb_dpk_), intent(in) :: alpha, beta, x(:)
+ complex(psb_dpk_), intent(inout) :: y(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_z_oacc_csr_csmv
+ end interface
+
+ interface
+ module subroutine psb_z_oacc_csr_scals(d, a, info)
+ class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a
+ complex(psb_dpk_), intent(in) :: d
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_z_oacc_csr_scals
+ end interface
+
+ interface
+ module subroutine psb_z_oacc_csr_scal(d,a,info,side)
+ class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a
+ complex(psb_dpk_), intent(in) :: d(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: side
+ end subroutine psb_z_oacc_csr_scal
+ end interface
+
+ interface
+ module subroutine psb_z_oacc_csr_reallocate_nz(nz,a)
+ class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_), intent(in) :: nz
+ end subroutine psb_z_oacc_csr_reallocate_nz
+ end interface
+
+ interface
+ module subroutine psb_z_oacc_csr_allocate_mnnz(m,n,a,nz)
+ class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_), intent(in) :: m,n
+ integer(psb_ipk_), intent(in), optional :: nz
+ end subroutine psb_z_oacc_csr_allocate_mnnz
+ end interface
+
+ interface
+ module subroutine psb_z_oacc_csr_cp_from_coo(a,b,info)
+ class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a
+ class(psb_z_coo_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_z_oacc_csr_cp_from_coo
+ end interface
+
+contains
+
+
+ subroutine z_oacc_csr_free(a)
+ use psb_base_mod
+ implicit none
+ class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_) :: info
+
+ if (allocated(a%val)) then
+ !$acc exit data delete(a%val)
+ end if
+ if (allocated(a%ja)) then
+ !$acc exit data delete(a%ja)
+ end if
+ if (allocated(a%irp)) then
+ !$acc exit data delete(a%irp)
+ end if
+
+ call a%psb_z_csr_sparse_mat%free()
+
+ return
+ end subroutine z_oacc_csr_free
+
+ function z_oacc_csr_sizeof(a) result(res)
+ implicit none
+ class(psb_z_oacc_csr_sparse_mat), intent(in) :: a
+ integer(psb_epk_) :: res
+
+ if (a%is_dev()) call a%sync()
+
+ res = 8
+ res = res + (2*psb_sizeof_dp) * size(a%val)
+ res = res + psb_sizeof_ip * size(a%irp)
+ res = res + psb_sizeof_ip * size(a%ja)
+
+ end function z_oacc_csr_sizeof
+
+
+ function z_oacc_csr_get_fmt() result(res)
+ implicit none
+ character(len=5) :: res
+ res = 'CSR_oacc'
+ end function z_oacc_csr_get_fmt
+
+ subroutine z_oacc_csr_all(m, n, nz, a, info)
+ implicit none
+ integer(psb_ipk_), intent(in) :: m, n, nz
+ class(psb_z_oacc_csr_sparse_mat), intent(out) :: a
+ integer(psb_ipk_), intent(out) :: info
+
+ info = 0
+ if (allocated(a%val)) then
+ !$acc exit data delete(a%val) finalize
+ deallocate(a%val, stat=info)
+ end if
+ if (allocated(a%ja)) then
+ !$acc exit data delete(a%ja) finalize
+ deallocate(a%ja, stat=info)
+ end if
+ if (allocated(a%irp)) then
+ !$acc exit data delete(a%irp) finalize
+ deallocate(a%irp, stat=info)
+ end if
+
+ call a%set_nrows(m)
+ call a%set_ncols(n)
+
+ allocate(a%val(nz),stat=info)
+ allocate(a%ja(nz),stat=info)
+ allocate(a%irp(m+1),stat=info)
+ if (info == 0) call a%set_host()
+ if (info == 0) call a%sync_space()
+ end subroutine z_oacc_csr_all
+
+ function z_oacc_csr_is_host(a) result(res)
+ implicit none
+ class(psb_z_oacc_csr_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_host)
+ end function z_oacc_csr_is_host
+
+ function z_oacc_csr_is_sync(a) result(res)
+ implicit none
+ class(psb_z_oacc_csr_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_sync)
+ end function z_oacc_csr_is_sync
+
+ function z_oacc_csr_is_dev(a) result(res)
+ implicit none
+ class(psb_z_oacc_csr_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_dev)
+ end function z_oacc_csr_is_dev
+
+ subroutine z_oacc_csr_set_host(a)
+ implicit none
+ class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_host
+ end subroutine z_oacc_csr_set_host
+
+ subroutine z_oacc_csr_set_sync(a)
+ implicit none
+ class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_sync
+ end subroutine z_oacc_csr_set_sync
+
+ subroutine z_oacc_csr_set_dev(a)
+ implicit none
+ class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_dev
+ end subroutine z_oacc_csr_set_dev
+
+ subroutine z_oacc_csr_sync_space(a)
+ implicit none
+ class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a
+ if (allocated(a%val)) then
+ call z_oacc_create_dev(a%val)
+ end if
+ if (allocated(a%ja)) then
+ call i_oacc_create_dev(a%ja)
+ end if
+ if (allocated(a%irp)) then
+ call i_oacc_create_dev(a%irp)
+ end if
+ contains
+ subroutine z_oacc_create_dev(v)
+ implicit none
+ complex(psb_dpk_), intent(in) :: v(:)
+ !$acc enter data copyin(v)
+ end subroutine z_oacc_create_dev
+ subroutine i_oacc_create_dev(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc enter data copyin(v)
+ end subroutine i_oacc_create_dev
+ end subroutine z_oacc_csr_sync_space
+
+ subroutine z_oacc_csr_sync(a)
+ implicit none
+ class(psb_z_oacc_csr_sparse_mat), target, intent(in) :: a
+ class(psb_z_oacc_csr_sparse_mat), pointer :: tmpa
+ integer(psb_ipk_) :: info
+
+ tmpa => a
+ if (a%is_dev()) then
+ call z_oacc_csr_to_host(a%val)
+ call i_oacc_csr_to_host(a%ja)
+ call i_oacc_csr_to_host(a%irp)
+ else if (a%is_host()) then
+ call z_oacc_csr_to_dev(a%val)
+ call i_oacc_csr_to_dev(a%ja)
+ call i_oacc_csr_to_dev(a%irp)
+ end if
+ call tmpa%set_sync()
+ end subroutine z_oacc_csr_sync
+
+ subroutine z_oacc_csr_to_dev(v)
+ implicit none
+ complex(psb_dpk_), intent(in) :: v(:)
+ !$acc update device(v)
+ end subroutine z_oacc_csr_to_dev
+
+ subroutine z_oacc_csr_to_host(v)
+ implicit none
+ complex(psb_dpk_), intent(in) :: v(:)
+ !$acc update self(v)
+ end subroutine z_oacc_csr_to_host
+
+ subroutine i_oacc_csr_to_dev(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc update device(v)
+ end subroutine i_oacc_csr_to_dev
+
+ subroutine i_oacc_csr_to_host(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc update self(v)
+ end subroutine i_oacc_csr_to_host
+
+end module psb_z_oacc_csr_mat_mod
+
diff --git a/openacc/psb_z_oacc_vect_mod.F90 b/openacc/psb_z_oacc_vect_mod.F90
new file mode 100644
index 00000000..5d03b49d
--- /dev/null
+++ b/openacc/psb_z_oacc_vect_mod.F90
@@ -0,0 +1,935 @@
+module psb_z_oacc_vect_mod
+ use iso_c_binding
+ use psb_const_mod
+ use psb_error_mod
+ use psb_z_vect_mod
+ use psb_i_vect_mod
+ use psb_i_oacc_vect_mod
+
+ integer(psb_ipk_), parameter, private :: is_host = -1
+ integer(psb_ipk_), parameter, private :: is_sync = 0
+ integer(psb_ipk_), parameter, private :: is_dev = 1
+
+ type, extends(psb_z_base_vect_type) :: psb_z_vect_oacc
+ integer :: state = is_host
+
+ contains
+ procedure, pass(x) :: get_nrows => z_oacc_get_nrows
+ procedure, nopass :: get_fmt => z_oacc_get_fmt
+
+ procedure, pass(x) :: all => z_oacc_vect_all
+ procedure, pass(x) :: zero => z_oacc_zero
+ procedure, pass(x) :: asb_m => z_oacc_asb_m
+ procedure, pass(x) :: sync => z_oacc_sync
+ procedure, pass(x) :: sync_space => z_oacc_sync_space
+ procedure, pass(x) :: bld_x => z_oacc_bld_x
+ procedure, pass(x) :: bld_mn => z_oacc_bld_mn
+ procedure, pass(x) :: free => z_oacc_vect_free
+ procedure, pass(x) :: ins_a => z_oacc_ins_a
+ procedure, pass(x) :: ins_v => z_oacc_ins_v
+ procedure, pass(x) :: is_host => z_oacc_is_host
+ procedure, pass(x) :: is_dev => z_oacc_is_dev
+ procedure, pass(x) :: is_sync => z_oacc_is_sync
+ procedure, pass(x) :: set_host => z_oacc_set_host
+ procedure, pass(x) :: set_dev => z_oacc_set_dev
+ procedure, pass(x) :: set_sync => z_oacc_set_sync
+ procedure, pass(x) :: set_scal => z_oacc_set_scal
+
+ procedure, pass(x) :: gthzv_x => z_oacc_gthzv_x
+ procedure, pass(x) :: gthzbuf_x => z_oacc_gthzbuf
+ procedure, pass(y) :: sctb => z_oacc_sctb
+ procedure, pass(y) :: sctb_x => z_oacc_sctb_x
+ procedure, pass(y) :: sctb_buf => z_oacc_sctb_buf
+
+ procedure, pass(x) :: get_size => z_oacc_get_size
+ procedure, pass(x) :: dot_v => z_oacc_vect_dot
+ procedure, pass(x) :: dot_a => z_oacc_dot_a
+ procedure, pass(y) :: axpby_v => z_oacc_axpby_v
+ procedure, pass(y) :: axpby_a => z_oacc_axpby_a
+ procedure, pass(z) :: abgdxyz => z_oacc_abgdxyz
+ procedure, pass(y) :: mlt_a => z_oacc_mlt_a
+ procedure, pass(z) :: mlt_a_2 => z_oacc_mlt_a_2
+ procedure, pass(y) :: mlt_v => z_oacc_mlt_v
+ procedure, pass(z) :: mlt_v_2 => z_oacc_mlt_v_2
+ procedure, pass(x) :: scal => z_oacc_scal
+ procedure, pass(x) :: nrm2 => z_oacc_nrm2
+ procedure, pass(x) :: amax => z_oacc_amax
+ procedure, pass(x) :: asum => z_oacc_asum
+ procedure, pass(x) :: absval1 => z_oacc_absval1
+ procedure, pass(x) :: absval2 => z_oacc_absval2
+
+ end type psb_z_vect_oacc
+
+ interface
+ subroutine z_oacc_mlt_v(x, y, info)
+ import
+ implicit none
+ class(psb_z_base_vect_type), intent(inout) :: x
+ class(psb_z_vect_oacc), intent(inout) :: y
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine z_oacc_mlt_v
+ end interface
+
+
+ interface
+ subroutine z_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
+ import
+ implicit none
+ complex(psb_dpk_), intent(in) :: alpha, beta
+ class(psb_z_base_vect_type), intent(inout) :: x
+ class(psb_z_base_vect_type), intent(inout) :: y
+ class(psb_z_vect_oacc), intent(inout) :: z
+ integer(psb_ipk_), intent(out) :: info
+ character(len=1), intent(in), optional :: conjgx, conjgy
+ end subroutine z_oacc_mlt_v_2
+ end interface
+
+contains
+
+ subroutine z_oacc_absval1(x)
+ implicit none
+ class(psb_z_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_) :: n, i
+
+ if (x%is_host()) call x%sync_space()
+ n = size(x%v)
+ !$acc parallel loop
+ do i = 1, n
+ x%v(i) = abs(x%v(i))
+ end do
+ call x%set_dev()
+ end subroutine z_oacc_absval1
+
+ subroutine z_oacc_absval2(x, y)
+ implicit none
+ class(psb_z_vect_oacc), intent(inout) :: x
+ class(psb_z_base_vect_type), intent(inout) :: y
+ integer(psb_ipk_) :: n
+ integer(psb_ipk_) :: i
+
+ n = min(size(x%v), size(y%v))
+ select type (yy => y)
+ class is (psb_z_vect_oacc)
+ if (x%is_host()) call x%sync()
+ if (yy%is_host()) call yy%sync()
+ !$acc parallel loop
+ do i = 1, n
+ yy%v(i) = abs(x%v(i))
+ end do
+ class default
+ if (x%is_dev()) call x%sync()
+ if (y%is_dev()) call y%sync()
+ call x%psb_z_base_vect_type%absval(y)
+ end select
+ end subroutine z_oacc_absval2
+
+ subroutine z_oacc_scal(alpha, x)
+ implicit none
+ class(psb_z_vect_oacc), intent(inout) :: x
+ complex(psb_dpk_), intent(in) :: alpha
+ integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: i
+
+ if (x%is_host()) call x%sync_space()
+ !$acc parallel loop
+ do i = 1, size(x%v)
+ x%v(i) = alpha * x%v(i)
+ end do
+ call x%set_dev()
+ end subroutine z_oacc_scal
+
+ function z_oacc_nrm2(n, x) result(res)
+ implicit none
+ class(psb_z_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(in) :: n
+ real(psb_dpk_) :: res
+ integer(psb_ipk_) :: info
+ real(psb_dpk_) :: sum
+ integer(psb_ipk_) :: i
+
+ if (x%is_host()) call x%sync_space()
+ sum = 0.0
+ !$acc parallel loop reduction(+:sum)
+ do i = 1, n
+ sum = sum + abs(x%v(i))**2
+ end do
+ res = sqrt(sum)
+ end function z_oacc_nrm2
+
+ function z_oacc_amax(n, x) result(res)
+ implicit none
+ class(psb_z_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(in) :: n
+ real(psb_dpk_) :: res
+ integer(psb_ipk_) :: info
+ real(psb_dpk_) :: max_val
+ integer(psb_ipk_) :: i
+
+ if (x%is_host()) call x%sync_space()
+ max_val = -huge(0.0)
+ !$acc parallel loop reduction(max:max_val)
+ do i = 1, n
+ if (abs(x%v(i)) > max_val) max_val = abs(x%v(i))
+ end do
+ res = max_val
+ end function z_oacc_amax
+
+ function z_oacc_asum(n, x) result(res)
+ implicit none
+ class(psb_z_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(in) :: n
+ real(psb_dpk_) :: res
+ integer(psb_ipk_) :: info
+ complex(psb_dpk_) :: sum
+ integer(psb_ipk_) :: i
+
+ if (x%is_host()) call x%sync_space()
+ sum = 0.0
+ !$acc parallel loop reduction(+:sum)
+ do i = 1, n
+ sum = sum + abs(x%v(i))
+ end do
+ res = sum
+ end function z_oacc_asum
+
+
+ subroutine z_oacc_mlt_a(x, y, info)
+ implicit none
+ complex(psb_dpk_), intent(in) :: x(:)
+ class(psb_z_vect_oacc), intent(inout) :: y
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: i, n
+
+ info = 0
+ if (y%is_dev()) call y%sync_space()
+ !$acc parallel loop
+ do i = 1, size(x)
+ y%v(i) = y%v(i) * x(i)
+ end do
+ call y%set_host()
+ end subroutine z_oacc_mlt_a
+
+ subroutine z_oacc_mlt_a_2(alpha, x, y, beta, z, info)
+ implicit none
+ complex(psb_dpk_), intent(in) :: alpha, beta
+ complex(psb_dpk_), intent(in) :: x(:)
+ complex(psb_dpk_), intent(in) :: y(:)
+ class(psb_z_vect_oacc), intent(inout) :: z
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: i, n
+
+ info = 0
+ if (z%is_dev()) call z%sync_space()
+ !$acc parallel loop
+ do i = 1, size(x)
+ z%v(i) = alpha * x(i) * y(i) + beta * z%v(i)
+ end do
+ call z%set_host()
+ end subroutine z_oacc_mlt_a_2
+
+
+!!$ subroutine z_oacc_mlt_v(x, y, info)
+!!$ implicit none
+!!$ class(psb_z_base_vect_type), intent(inout) :: x
+!!$ class(psb_z_vect_oacc), intent(inout) :: y
+!!$ integer(psb_ipk_), intent(out) :: info
+!!$
+!!$ integer(psb_ipk_) :: i, n
+!!$
+!!$ info = 0
+!!$ n = min(x%get_nrows(), y%get_nrows())
+!!$ select type(xx => x)
+!!$ type is (psb_z_base_vect_type)
+!!$ if (y%is_dev()) call y%sync()
+!!$ !$acc parallel loop
+!!$ do i = 1, n
+!!$ y%v(i) = y%v(i) * xx%v(i)
+!!$ end do
+!!$ call y%set_host()
+!!$ class default
+!!$ if (xx%is_dev()) call xx%sync()
+!!$ if (y%is_dev()) call y%sync()
+!!$ !$acc parallel loop
+!!$ do i = 1, n
+!!$ y%v(i) = y%v(i) * xx%v(i)
+!!$ end do
+!!$ call y%set_host()
+!!$ end select
+!!$ end subroutine z_oacc_mlt_v
+!!$
+!!$ subroutine z_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
+!!$ use psi_serial_mod
+!!$ use psb_string_mod
+!!$ implicit none
+!!$ complex(psb_dpk_), intent(in) :: alpha, beta
+!!$ class(psb_z_base_vect_type), intent(inout) :: x
+!!$ class(psb_z_base_vect_type), intent(inout) :: y
+!!$ class(psb_z_vect_oacc), intent(inout) :: z
+!!$ integer(psb_ipk_), intent(out) :: info
+!!$ character(len=1), intent(in), optional :: conjgx, conjgy
+!!$ integer(psb_ipk_) :: i, n
+!!$ logical :: conjgx_, conjgy_
+!!$
+!!$ conjgx_ = .false.
+!!$ conjgy_ = .false.
+!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C')
+!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C')
+!!$
+!!$ n = min(x%get_nrows(), y%get_nrows(), z%get_nrows())
+!!$
+!!$ info = 0
+!!$ select type(xx => x)
+!!$ class is (psb_z_vect_oacc)
+!!$ select type (yy => y)
+!!$ class is (psb_z_vect_oacc)
+!!$ if (xx%is_host()) call xx%sync_space()
+!!$ if (yy%is_host()) call yy%sync_space()
+!!$ if ((beta /= zzero) .and. (z%is_host())) call z%sync_space()
+!!$ !$acc parallel loop
+!!$ do i = 1, n
+!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
+!!$ end do
+!!$ call z%set_dev()
+!!$ class default
+!!$ if (xx%is_dev()) call xx%sync_space()
+!!$ if (yy%is_dev()) call yy%sync()
+!!$ if ((beta /= zzero) .and. (z%is_dev())) call z%sync_space()
+!!$ !$acc parallel loop
+!!$ do i = 1, n
+!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
+!!$ end do
+!!$ call z%set_host()
+!!$ end select
+!!$ class default
+!!$ if (x%is_dev()) call x%sync()
+!!$ if (y%is_dev()) call y%sync()
+!!$ if ((beta /= zzero) .and. (z%is_dev())) call z%sync_space()
+!!$ !$acc parallel loop
+!!$ do i = 1, n
+!!$ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i)
+!!$ end do
+!!$ call z%set_host()
+!!$ end select
+!!$ end subroutine z_oacc_mlt_v_2
+
+
+ subroutine z_oacc_axpby_v(m, alpha, x, beta, y, info)
+ !use psi_serial_mod
+ implicit none
+ integer(psb_ipk_), intent(in) :: m
+ class(psb_z_base_vect_type), intent(inout) :: x
+ class(psb_z_vect_oacc), intent(inout) :: y
+ complex(psb_dpk_), intent(in) :: alpha, beta
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: nx, ny, i
+
+ info = psb_success_
+
+ select type(xx => x)
+ type is (psb_z_vect_oacc)
+ if ((beta /= zzero) .and. y%is_host()) call y%sync_space()
+ if (xx%is_host()) call xx%sync_space()
+ nx = size(xx%v)
+ ny = size(y%v)
+ if ((nx < m) .or. (ny < m)) then
+ info = psb_err_internal_error_
+ else
+ !$acc parallel loop
+ do i = 1, m
+ y%v(i) = alpha * xx%v(i) + beta * y%v(i)
+ end do
+ end if
+ call y%set_dev()
+ class default
+ if ((alpha /= zzero) .and. (x%is_dev())) call x%sync()
+ call y%axpby(m, alpha, x%v, beta, info)
+ end select
+ end subroutine z_oacc_axpby_v
+
+ subroutine z_oacc_axpby_a(m, alpha, x, beta, y, info)
+ !use psi_serial_mod
+ implicit none
+ integer(psb_ipk_), intent(in) :: m
+ complex(psb_dpk_), intent(in) :: x(:)
+ class(psb_z_vect_oacc), intent(inout) :: y
+ complex(psb_dpk_), intent(in) :: alpha, beta
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: i
+
+ if ((beta /= zzero) .and. (y%is_dev())) call y%sync_space()
+ !$acc parallel loop
+ do i = 1, m
+ y%v(i) = alpha * x(i) + beta * y%v(i)
+ end do
+ call y%set_host()
+ end subroutine z_oacc_axpby_a
+
+ subroutine z_oacc_abgdxyz(m, alpha, beta, gamma, delta, x, y, z, info)
+ use psi_serial_mod
+ implicit none
+ integer(psb_ipk_), intent(in) :: m
+ class(psb_z_base_vect_type), intent(inout) :: x
+ class(psb_z_base_vect_type), intent(inout) :: y
+ class(psb_z_vect_oacc), intent(inout) :: z
+ complex(psb_dpk_), intent(in) :: alpha, beta, gamma, delta
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: nx, ny, nz, i
+ logical :: gpu_done
+
+ info = psb_success_
+ gpu_done = .false.
+
+ select type(xx => x)
+ class is (psb_z_vect_oacc)
+ select type(yy => y)
+ class is (psb_z_vect_oacc)
+ select type(zz => z)
+ class is (psb_z_vect_oacc)
+ if ((beta /= zzero) .and. yy%is_host()) call yy%sync_space()
+ if ((delta /= zzero) .and. zz%is_host()) call zz%sync_space()
+ if (xx%is_host()) call xx%sync_space()
+ nx = size(xx%v)
+ ny = size(yy%v)
+ nz = size(zz%v)
+ if ((nx < m) .or. (ny < m) .or. (nz < m)) then
+ info = psb_err_internal_error_
+ else
+ !$acc parallel loop
+ do i = 1, m
+ yy%v(i) = alpha * xx%v(i) + beta * yy%v(i)
+ zz%v(i) = gamma * yy%v(i) + delta * zz%v(i)
+ end do
+ end if
+ call yy%set_dev()
+ call zz%set_dev()
+ gpu_done = .true.
+ end select
+ end select
+ end select
+
+ if (.not. gpu_done) then
+ if (x%is_host()) call x%sync()
+ if (y%is_host()) call y%sync()
+ if (z%is_host()) call z%sync()
+ call y%axpby(m, alpha, x, beta, info)
+ call z%axpby(m, gamma, y, delta, info)
+ end if
+ end subroutine z_oacc_abgdxyz
+
+ subroutine z_oacc_sctb_buf(i, n, idx, beta, y)
+ use psb_base_mod
+ implicit none
+ integer(psb_ipk_) :: i, n
+ class(psb_i_base_vect_type) :: idx
+ complex(psb_dpk_) :: beta
+ class(psb_z_vect_oacc) :: y
+ integer(psb_ipk_) :: info
+
+ if (.not.allocated(y%combuf)) then
+ call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
+ return
+ end if
+
+ select type(ii => idx)
+ class is (psb_i_vect_oacc)
+ if (ii%is_host()) call ii%sync_space(info)
+ if (y%is_host()) call y%sync_space()
+
+ !$acc parallel loop
+ do i = 1, n
+ y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i)
+ end do
+
+ class default
+ !$acc parallel loop
+ do i = 1, n
+ y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i)
+ end do
+ end select
+ end subroutine z_oacc_sctb_buf
+
+ subroutine z_oacc_sctb_x(i, n, idx, x, beta, y)
+ use psb_base_mod
+ implicit none
+ integer(psb_ipk_):: i, n
+ class(psb_i_base_vect_type) :: idx
+ complex(psb_dpk_) :: beta, x(:)
+ class(psb_z_vect_oacc) :: y
+ integer(psb_ipk_) :: info, ni
+
+ select type(ii => idx)
+ class is (psb_i_vect_oacc)
+ if (ii%is_host()) call ii%sync_space(info)
+ class default
+ call psb_errpush(info, 'z_oacc_sctb_x')
+ return
+ end select
+
+ if (y%is_host()) call y%sync_space()
+
+ !$acc parallel loop
+ do i = 1, n
+ y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i)
+ end do
+
+ call y%set_dev()
+ end subroutine z_oacc_sctb_x
+
+
+
+ subroutine z_oacc_sctb(n, idx, x, beta, y)
+ use psb_base_mod
+ implicit none
+ integer(psb_ipk_) :: n
+ integer(psb_ipk_) :: idx(:)
+ complex(psb_dpk_) :: beta, x(:)
+ class(psb_z_vect_oacc) :: y
+ integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: i
+
+ if (n == 0) return
+ if (y%is_dev()) call y%sync_space()
+
+ !$acc parallel loop
+ do i = 1, n
+ y%v(idx(i)) = beta * y%v(idx(i)) + x(i)
+ end do
+
+ call y%set_host()
+ end subroutine z_oacc_sctb
+
+
+ subroutine z_oacc_gthzbuf(i, n, idx, x)
+ use psb_base_mod
+ implicit none
+ integer(psb_ipk_) :: i, n
+ class(psb_i_base_vect_type) :: idx
+ class(psb_z_vect_oacc) :: x
+ integer(psb_ipk_) :: info
+
+ info = 0
+ if (.not.allocated(x%combuf)) then
+ call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
+ return
+ end if
+
+ select type(ii => idx)
+ class is (psb_i_vect_oacc)
+ if (ii%is_host()) call ii%sync_space(info)
+ class default
+ call psb_errpush(info, 'z_oacc_gthzbuf')
+ return
+ end select
+
+ if (x%is_host()) call x%sync_space()
+
+ !$acc parallel loop
+ do i = 1, n
+ x%combuf(i) = x%v(idx%v(i))
+ end do
+ end subroutine z_oacc_gthzbuf
+
+ subroutine z_oacc_gthzv_x(i, n, idx, x, y)
+ use psb_base_mod
+ implicit none
+ integer(psb_ipk_) :: i, n
+ class(psb_i_base_vect_type):: idx
+ complex(psb_dpk_) :: y(:)
+ class(psb_z_vect_oacc):: x
+ integer(psb_ipk_) :: info
+
+ info = 0
+
+ select type(ii => idx)
+ class is (psb_i_vect_oacc)
+ if (ii%is_host()) call ii%sync_space(info)
+ class default
+ call psb_errpush(info, 'z_oacc_gthzv_x')
+ return
+ end select
+
+ if (x%is_host()) call x%sync_space()
+
+ !$acc parallel loop
+ do i = 1, n
+ y(i) = x%v(idx%v(i))
+ end do
+ end subroutine z_oacc_gthzv_x
+
+ subroutine z_oacc_ins_v(n, irl, val, dupl, x, info)
+ use psi_serial_mod
+ implicit none
+ class(psb_z_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(in) :: n, dupl
+ class(psb_i_base_vect_type), intent(inout) :: irl
+ class(psb_z_base_vect_type), intent(inout) :: val
+ integer(psb_ipk_), intent(out) :: info
+
+ integer(psb_ipk_) :: i, isz
+ logical :: done_oacc
+
+ info = 0
+ if (psb_errstatus_fatal()) return
+
+ done_oacc = .false.
+ select type(virl => irl)
+ type is (psb_i_vect_oacc)
+ select type(vval => val)
+ type is (psb_z_vect_oacc)
+ if (vval%is_host()) call vval%sync_space()
+ if (virl%is_host()) call virl%sync_space(info)
+ if (x%is_host()) call x%sync_space()
+ !$acc parallel loop
+ do i = 1, n
+ x%v(virl%v(i)) = vval%v(i)
+ end do
+ call x%set_dev()
+ done_oacc = .true.
+ end select
+ end select
+
+ if (.not.done_oacc) then
+ select type(virl => irl)
+ type is (psb_i_vect_oacc)
+ if (virl%is_dev()) call virl%sync_space(info)
+ end select
+ select type(vval => val)
+ type is (psb_z_vect_oacc)
+ if (vval%is_dev()) call vval%sync_space()
+ end select
+ call x%ins(n, irl%v, val%v, dupl, info)
+ end if
+
+ if (info /= 0) then
+ call psb_errpush(info, 'oacc_vect_ins')
+ return
+ end if
+
+ end subroutine z_oacc_ins_v
+
+
+
+ subroutine z_oacc_ins_a(n, irl, val, dupl, x, info)
+ use psi_serial_mod
+ implicit none
+ class(psb_z_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(in) :: n, dupl
+ integer(psb_ipk_), intent(in) :: irl(:)
+ complex(psb_dpk_), intent(in) :: val(:)
+ integer(psb_ipk_), intent(out) :: info
+
+ integer(psb_ipk_) :: i
+
+ info = 0
+ if (x%is_dev()) call x%sync_space()
+ call x%psb_z_base_vect_type%ins(n, irl, val, dupl, info)
+ call x%set_host()
+ !$acc update device(x%v)
+
+ end subroutine z_oacc_ins_a
+
+
+
+ subroutine z_oacc_bld_mn(x, n)
+ use psb_base_mod
+ implicit none
+ integer(psb_mpk_), intent(in) :: n
+ class(psb_z_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_) :: info
+
+ call x%all(n, info)
+ if (info /= 0) then
+ call psb_errpush(info, 'z_oacc_bld_mn', i_err=(/n, n, n, n, n/))
+ end if
+ call x%set_host()
+ !$acc update device(x%v)
+
+ end subroutine z_oacc_bld_mn
+
+
+ subroutine z_oacc_bld_x(x, this)
+ use psb_base_mod
+ implicit none
+ complex(psb_dpk_), intent(in) :: this(:)
+ class(psb_z_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_) :: info
+
+ call psb_realloc(size(this), x%v, info)
+ if (info /= 0) then
+ info = psb_err_alloc_request_
+ call psb_errpush(info, 'z_oacc_bld_x', &
+ i_err=(/size(this), izero, izero, izero, izero/))
+ return
+ end if
+
+ x%v(:) = this(:)
+ call x%set_host()
+ !$acc update device(x%v)
+
+ end subroutine z_oacc_bld_x
+
+
+ subroutine z_oacc_asb_m(n, x, info)
+ use psb_base_mod
+ implicit none
+ integer(psb_mpk_), intent(in) :: n
+ class(psb_z_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_mpk_) :: nd
+
+ info = psb_success_
+
+ if (x%is_dev()) then
+ nd = size(x%v)
+ if (nd < n) then
+ call x%sync()
+ call x%psb_z_base_vect_type%asb(n, info)
+ if (info == psb_success_) call x%sync_space()
+ call x%set_host()
+ end if
+ else
+ if (size(x%v) < n) then
+ call x%psb_z_base_vect_type%asb(n, info)
+ if (info == psb_success_) call x%sync_space()
+ call x%set_host()
+ end if
+ end if
+ end subroutine z_oacc_asb_m
+
+
+
+ subroutine z_oacc_set_scal(x, val, first, last)
+ class(psb_z_vect_oacc), intent(inout) :: x
+ complex(psb_dpk_), intent(in) :: val
+ integer(psb_ipk_), optional :: first, last
+
+ integer(psb_ipk_) :: first_, last_
+ first_ = 1
+ last_ = x%get_nrows()
+ if (present(first)) first_ = max(1, first)
+ if (present(last)) last_ = min(last, last_)
+
+ !$acc parallel loop
+ do i = first_, last_
+ x%v(i) = val
+ end do
+ !$acc end parallel loop
+
+ call x%set_dev()
+ end subroutine z_oacc_set_scal
+
+
+
+ subroutine z_oacc_zero(x)
+ use psi_serial_mod
+ implicit none
+ class(psb_z_vect_oacc), intent(inout) :: x
+ call x%set_dev()
+ call x%set_scal(zzero)
+ end subroutine z_oacc_zero
+
+ function z_oacc_get_nrows(x) result(res)
+ implicit none
+ class(psb_z_vect_oacc), intent(in) :: x
+ integer(psb_ipk_) :: res
+
+ if (allocated(x%v)) res = size(x%v)
+ end function z_oacc_get_nrows
+
+ function z_oacc_get_fmt() result(res)
+ implicit none
+ character(len=5) :: res
+ res = "zOACC"
+
+ end function z_oacc_get_fmt
+
+ function z_oacc_vect_dot(n, x, y) result(res)
+ implicit none
+ class(psb_z_vect_oacc), intent(inout) :: x
+ class(psb_z_base_vect_type), intent(inout) :: y
+ integer(psb_ipk_), intent(in) :: n
+ complex(psb_dpk_) :: res
+ complex(psb_dpk_), external :: ddot
+ integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: i
+
+ res = zzero
+
+ select type(yy => y)
+ type is (psb_z_base_vect_type)
+ if (x%is_dev()) call x%sync()
+ res = ddot(n, x%v, 1, yy%v, 1)
+ type is (psb_z_vect_oacc)
+ if (x%is_host()) call x%sync()
+ if (yy%is_host()) call yy%sync()
+
+ !$acc parallel loop reduction(+:res) present(x%v, yy%v)
+ do i = 1, n
+ res = res + x%v(i) * yy%v(i)
+ end do
+ !$acc end parallel loop
+
+ class default
+ call x%sync()
+ res = y%dot(n, x%v)
+ end select
+
+ end function z_oacc_vect_dot
+
+
+
+
+ function z_oacc_dot_a(n, x, y) result(res)
+ implicit none
+ class(psb_z_vect_oacc), intent(inout) :: x
+ complex(psb_dpk_), intent(in) :: y(:)
+ integer(psb_ipk_), intent(in) :: n
+ complex(psb_dpk_) :: res
+ complex(psb_dpk_), external :: ddot
+
+ if (x%is_dev()) call x%sync()
+ res = ddot(n, y, 1, x%v, 1)
+
+ end function z_oacc_dot_a
+
+ ! subroutine z_oacc_set_vect(x,y)
+ ! implicit none
+ ! class(psb_z_vect_oacc), intent(inout) :: x
+ ! complex(psb_dpk_), intent(in) :: y(:)
+ ! integer(psb_ipk_) :: info
+
+ ! if (size(x%v) /= size(y)) then
+ ! call x%free(info)
+ ! call x%all(size(y),info)
+ ! end if
+ ! x%v(:) = y(:)
+ ! call x%set_host()
+ ! end subroutine z_oacc_set_vect
+
+ subroutine z_oacc_to_dev(v)
+ implicit none
+ complex(psb_dpk_) :: v(:)
+ !$acc update device(v)
+ end subroutine z_oacc_to_dev
+
+ subroutine z_oacc_to_host(v)
+ implicit none
+ complex(psb_dpk_) :: v(:)
+ !$acc update self(v)
+ end subroutine z_oacc_to_host
+
+ subroutine z_oacc_sync_space(x)
+ implicit none
+ class(psb_z_vect_oacc), intent(inout) :: x
+ if (allocated(x%v)) then
+ call z_oacc_create_dev(x%v)
+ end if
+ contains
+ subroutine z_oacc_create_dev(v)
+ implicit none
+ complex(psb_dpk_) :: v(:)
+ !$acc enter data copyin(v)
+ end subroutine z_oacc_create_dev
+ end subroutine z_oacc_sync_space
+
+ subroutine z_oacc_sync(x)
+ implicit none
+ class(psb_z_vect_oacc), intent(inout) :: x
+ if (x%is_dev()) then
+ call z_oacc_to_host(x%v)
+ end if
+ if (x%is_host()) then
+ call z_oacc_to_dev(x%v)
+ end if
+ call x%set_sync()
+ end subroutine z_oacc_sync
+
+ subroutine z_oacc_set_host(x)
+ implicit none
+ class(psb_z_vect_oacc), intent(inout) :: x
+
+ x%state = is_host
+ end subroutine z_oacc_set_host
+
+ subroutine z_oacc_set_dev(x)
+ implicit none
+ class(psb_z_vect_oacc), intent(inout) :: x
+
+ x%state = is_dev
+ end subroutine z_oacc_set_dev
+
+ subroutine z_oacc_set_sync(x)
+ implicit none
+ class(psb_z_vect_oacc), intent(inout) :: x
+
+ x%state = is_sync
+ end subroutine z_oacc_set_sync
+
+ function z_oacc_is_dev(x) result(res)
+ implicit none
+ class(psb_z_vect_oacc), intent(in) :: x
+ logical :: res
+
+ res = (x%state == is_dev)
+ end function z_oacc_is_dev
+
+ function z_oacc_is_host(x) result(res)
+ implicit none
+ class(psb_z_vect_oacc), intent(in) :: x
+ logical :: res
+
+ res = (x%state == is_host)
+ end function z_oacc_is_host
+
+ function z_oacc_is_sync(x) result(res)
+ implicit none
+ class(psb_z_vect_oacc), intent(in) :: x
+ logical :: res
+
+ res = (x%state == is_sync)
+ end function z_oacc_is_sync
+
+ subroutine z_oacc_vect_all(n, x, info)
+ use psi_serial_mod
+ use psb_realloc_mod
+ implicit none
+ integer(psb_ipk_), intent(in) :: n
+ class(psb_z_vect_oacc), intent(out) :: x
+ integer(psb_ipk_), intent(out) :: info
+
+ call psb_realloc(n, x%v, info)
+ if (info == 0) then
+ call x%set_host()
+ !$acc enter data create(x%v)
+ call x%sync_space()
+ end if
+ if (info /= 0) then
+ info = psb_err_alloc_request_
+ call psb_errpush(info, 'z_oacc_all', &
+ i_err=(/n, n, n, n, n/))
+ end if
+ end subroutine z_oacc_vect_all
+
+
+ subroutine z_oacc_vect_free(x, info)
+ implicit none
+ class(psb_z_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(out) :: info
+ info = 0
+ if (allocated(x%v)) then
+ !$acc exit data delete(x%v) finalize
+ deallocate(x%v, stat=info)
+ end if
+
+ end subroutine z_oacc_vect_free
+
+ function z_oacc_get_size(x) result(res)
+ implicit none
+ class(psb_z_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_) :: res
+
+ if (x%is_dev()) call x%sync()
+ res = size(x%v)
+ end function z_oacc_get_size
+
+end module psb_z_oacc_vect_mod
From 686bac42249fa58fc56c42dccc6b2b1b0e5a069c Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Wed, 17 Jul 2024 13:03:46 +0200
Subject: [PATCH 15/86] Account for S/D/C/Z variants
---
openacc/impl/Makefile | 68 +++++++++++++++++++++++++++++++++++-----
openacc/psb_oacc_mod.F90 | 12 +++++--
2 files changed, 71 insertions(+), 9 deletions(-)
diff --git a/openacc/impl/Makefile b/openacc/impl/Makefile
index 56df9402..32f104dd 100755
--- a/openacc/impl/Makefile
+++ b/openacc/impl/Makefile
@@ -10,13 +10,67 @@ UP=..
FINCLUDES=$(FMFLAG).. $(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FIFLAG)..
LIBNAME=libpsb_openacc.a
-OBJS= psb_d_oacc_csr_vect_mv.o psb_d_oacc_csr_inner_vect_sv.o \
- psb_d_oacc_csr_csmm.o psb_d_oacc_csr_csmv.o psb_d_oacc_csr_scals.o \
- psb_d_oacc_csr_scal.o psb_d_oacc_csr_allocate_mnnz.o \
- psb_d_oacc_csr_reallocate_nz.o psb_d_oacc_csr_cp_from_coo.o \
- psb_d_oacc_csr_cp_from_fmt.o psb_d_oacc_csr_mv_from_coo.o \
- psb_d_oacc_csr_mv_from_fmt.o psb_d_oacc_csr_mold.o \
- psb_d_oacc_mlt_v_2.o psb_d_oacc_mlt_v.o
+OBJS= \
+psb_s_oacc_csr_vect_mv.o \
+psb_s_oacc_csr_inner_vect_sv.o \
+psb_s_oacc_csr_csmm.o \
+psb_s_oacc_csr_csmv.o \
+psb_s_oacc_csr_scals.o \
+psb_s_oacc_csr_scal.o \
+psb_s_oacc_csr_allocate_mnnz.o \
+psb_s_oacc_csr_reallocate_nz.o \
+psb_s_oacc_csr_cp_from_coo.o \
+psb_s_oacc_csr_cp_from_fmt.o \
+psb_s_oacc_csr_mv_from_coo.o \
+psb_s_oacc_csr_mv_from_fmt.o \
+psb_s_oacc_csr_mold.o \
+psb_s_oacc_mlt_v_2.o \
+psb_s_oacc_mlt_v.o \
+psb_d_oacc_csr_vect_mv.o \
+psb_d_oacc_csr_inner_vect_sv.o \
+psb_d_oacc_csr_csmm.o \
+psb_d_oacc_csr_csmv.o \
+psb_d_oacc_csr_scals.o \
+psb_d_oacc_csr_scal.o \
+psb_d_oacc_csr_allocate_mnnz.o \
+psb_d_oacc_csr_reallocate_nz.o \
+psb_d_oacc_csr_cp_from_coo.o \
+psb_d_oacc_csr_cp_from_fmt.o \
+psb_d_oacc_csr_mv_from_coo.o \
+psb_d_oacc_csr_mv_from_fmt.o \
+psb_d_oacc_csr_mold.o \
+psb_d_oacc_mlt_v_2.o \
+psb_d_oacc_mlt_v.o \
+psb_c_oacc_csr_vect_mv.o \
+psb_c_oacc_csr_inner_vect_sv.o \
+psb_c_oacc_csr_csmm.o \
+psb_c_oacc_csr_csmv.o \
+psb_c_oacc_csr_scals.o \
+psb_c_oacc_csr_scal.o \
+psb_c_oacc_csr_allocate_mnnz.o \
+psb_c_oacc_csr_reallocate_nz.o \
+psb_c_oacc_csr_cp_from_coo.o \
+psb_c_oacc_csr_cp_from_fmt.o \
+psb_c_oacc_csr_mv_from_coo.o \
+psb_c_oacc_csr_mv_from_fmt.o \
+psb_c_oacc_csr_mold.o \
+psb_c_oacc_mlt_v_2.o \
+psb_c_oacc_mlt_v.o \
+psb_z_oacc_csr_vect_mv.o \
+psb_z_oacc_csr_inner_vect_sv.o \
+psb_z_oacc_csr_csmm.o \
+psb_z_oacc_csr_csmv.o \
+psb_z_oacc_csr_scals.o \
+psb_z_oacc_csr_scal.o \
+psb_z_oacc_csr_allocate_mnnz.o \
+psb_z_oacc_csr_reallocate_nz.o \
+psb_z_oacc_csr_cp_from_coo.o \
+psb_z_oacc_csr_cp_from_fmt.o \
+psb_z_oacc_csr_mv_from_coo.o \
+psb_z_oacc_csr_mv_from_fmt.o \
+psb_z_oacc_csr_mold.o \
+psb_z_oacc_mlt_v_2.o \
+psb_z_oacc_mlt_v.o
objs: $(OBJS)
diff --git a/openacc/psb_oacc_mod.F90 b/openacc/psb_oacc_mod.F90
index fe827db8..2d8e8b40 100644
--- a/openacc/psb_oacc_mod.F90
+++ b/openacc/psb_oacc_mod.F90
@@ -3,7 +3,15 @@ module psb_oacc_mod
use psb_oacc_env_mod
+ use psb_i_oacc_vect_mod
+ use psb_s_oacc_vect_mod
use psb_d_oacc_vect_mod
- use psb_d_oacc_csr_mat_mod
+ use psb_c_oacc_vect_mod
+ use psb_z_oacc_vect_mod
-end module psb_oacc_mod
\ No newline at end of file
+ use psb_s_oacc_csr_mat_mod
+ use psb_d_oacc_csr_mat_mod
+ use psb_c_oacc_csr_mat_mod
+ use psb_z_oacc_csr_mat_mod
+
+end module psb_oacc_mod
From 9e18545151c8a2d4f944ffbd01bd4848c6b36c2e Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Wed, 17 Jul 2024 13:04:07 +0200
Subject: [PATCH 16/86] Fix typos
---
openacc/impl/psb_c_oacc_csr_csmm.F90 | 2 +-
openacc/impl/psb_c_oacc_csr_csmv.F90 | 2 +-
openacc/impl/psb_c_oacc_csr_inner_vect_sv.F90 | 2 +-
openacc/impl/psb_c_oacc_csr_mold.F90 | 2 +-
openacc/impl/psb_c_oacc_csr_vect_mv.F90 | 2 +-
openacc/impl/psb_s_oacc_csr_csmm.F90 | 2 +-
openacc/impl/psb_s_oacc_csr_csmv.F90 | 2 +-
openacc/impl/psb_s_oacc_csr_inner_vect_sv.F90 | 2 +-
openacc/impl/psb_s_oacc_csr_mold.F90 | 2 +-
openacc/impl/psb_s_oacc_csr_vect_mv.F90 | 2 +-
openacc/impl/psb_z_oacc_csr_csmm.F90 | 2 +-
openacc/impl/psb_z_oacc_csr_csmv.F90 | 2 +-
openacc/impl/psb_z_oacc_csr_inner_vect_sv.F90 | 2 +-
openacc/impl/psb_z_oacc_csr_mold.F90 | 2 +-
openacc/impl/psb_z_oacc_csr_vect_mv.F90 | 2 +-
15 files changed, 15 insertions(+), 15 deletions(-)
diff --git a/openacc/impl/psb_c_oacc_csr_csmm.F90 b/openacc/impl/psb_c_oacc_csr_csmm.F90
index c26df410..5bf0bad4 100644
--- a/openacc/impl/psb_c_oacc_csr_csmm.F90
+++ b/openacc/impl/psb_c_oacc_csr_csmm.F90
@@ -27,7 +27,7 @@ contains
end if
if (.not.a%is_asb()) then
- info = psb_err_invalic_mat_state_
+ info = psb_err_invalid_mat_state_
call psb_errpush(info, name)
goto 9999
endif
diff --git a/openacc/impl/psb_c_oacc_csr_csmv.F90 b/openacc/impl/psb_c_oacc_csr_csmv.F90
index 8f37efb3..e5d5f24e 100644
--- a/openacc/impl/psb_c_oacc_csr_csmv.F90
+++ b/openacc/impl/psb_c_oacc_csr_csmv.F90
@@ -27,7 +27,7 @@ contains
end if
if (.not.a%is_asb()) then
- info = psb_err_invalic_mat_state_
+ info = psb_err_invalid_mat_state_
call psb_errpush(info, name)
goto 9999
endif
diff --git a/openacc/impl/psb_c_oacc_csr_inner_vect_sv.F90 b/openacc/impl/psb_c_oacc_csr_inner_vect_sv.F90
index 2d733f48..1dee9f2e 100644
--- a/openacc/impl/psb_c_oacc_csr_inner_vect_sv.F90
+++ b/openacc/impl/psb_c_oacc_csr_inner_vect_sv.F90
@@ -27,7 +27,7 @@ contains
end if
if (.not.a%is_asb()) then
- info = psb_err_invalic_mat_state_
+ info = psb_err_invalid_mat_state_
call psb_errpush(info, name)
goto 9999
endif
diff --git a/openacc/impl/psb_c_oacc_csr_mold.F90 b/openacc/impl/psb_c_oacc_csr_mold.F90
index 6ee36985..cf32ea94 100644
--- a/openacc/impl/psb_c_oacc_csr_mold.F90
+++ b/openacc/impl/psb_c_oacc_csr_mold.F90
@@ -31,5 +31,5 @@ contains
return
end subroutine psb_c_oacc_csr_mold
-end submodule psb_c_oacc_csr_molc_impl
+end submodule psb_c_oacc_csr_mold_impl
diff --git a/openacc/impl/psb_c_oacc_csr_vect_mv.F90 b/openacc/impl/psb_c_oacc_csr_vect_mv.F90
index b4b79d56..0fd1ed35 100644
--- a/openacc/impl/psb_c_oacc_csr_vect_mv.F90
+++ b/openacc/impl/psb_c_oacc_csr_vect_mv.F90
@@ -18,7 +18,7 @@ contains
if ((n /= size(x%v)) .or. (n /= size(y%v))) then
write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
- info = psb_err_invalic_mat_state_
+ info = psb_err_invalid_mat_state_
return
end if
diff --git a/openacc/impl/psb_s_oacc_csr_csmm.F90 b/openacc/impl/psb_s_oacc_csr_csmm.F90
index 2e7def53..bb8283bf 100644
--- a/openacc/impl/psb_s_oacc_csr_csmm.F90
+++ b/openacc/impl/psb_s_oacc_csr_csmm.F90
@@ -27,7 +27,7 @@ contains
end if
if (.not.a%is_asb()) then
- info = psb_err_invalis_mat_state_
+ info = psb_err_invalid_mat_state_
call psb_errpush(info, name)
goto 9999
endif
diff --git a/openacc/impl/psb_s_oacc_csr_csmv.F90 b/openacc/impl/psb_s_oacc_csr_csmv.F90
index ba673941..c224dc0e 100644
--- a/openacc/impl/psb_s_oacc_csr_csmv.F90
+++ b/openacc/impl/psb_s_oacc_csr_csmv.F90
@@ -27,7 +27,7 @@ contains
end if
if (.not.a%is_asb()) then
- info = psb_err_invalis_mat_state_
+ info = psb_err_invalid_mat_state_
call psb_errpush(info, name)
goto 9999
endif
diff --git a/openacc/impl/psb_s_oacc_csr_inner_vect_sv.F90 b/openacc/impl/psb_s_oacc_csr_inner_vect_sv.F90
index 7af897a7..ec7f4bad 100644
--- a/openacc/impl/psb_s_oacc_csr_inner_vect_sv.F90
+++ b/openacc/impl/psb_s_oacc_csr_inner_vect_sv.F90
@@ -27,7 +27,7 @@ contains
end if
if (.not.a%is_asb()) then
- info = psb_err_invalis_mat_state_
+ info = psb_err_invalid_mat_state_
call psb_errpush(info, name)
goto 9999
endif
diff --git a/openacc/impl/psb_s_oacc_csr_mold.F90 b/openacc/impl/psb_s_oacc_csr_mold.F90
index a85471e5..95bddde8 100644
--- a/openacc/impl/psb_s_oacc_csr_mold.F90
+++ b/openacc/impl/psb_s_oacc_csr_mold.F90
@@ -31,5 +31,5 @@ contains
return
end subroutine psb_s_oacc_csr_mold
-end submodule psb_s_oacc_csr_mols_impl
+end submodule psb_s_oacc_csr_mold_impl
diff --git a/openacc/impl/psb_s_oacc_csr_vect_mv.F90 b/openacc/impl/psb_s_oacc_csr_vect_mv.F90
index 9b15da3b..c2bbd6b1 100644
--- a/openacc/impl/psb_s_oacc_csr_vect_mv.F90
+++ b/openacc/impl/psb_s_oacc_csr_vect_mv.F90
@@ -18,7 +18,7 @@ contains
if ((n /= size(x%v)) .or. (n /= size(y%v))) then
write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
- info = psb_err_invalis_mat_state_
+ info = psb_err_invalid_mat_state_
return
end if
diff --git a/openacc/impl/psb_z_oacc_csr_csmm.F90 b/openacc/impl/psb_z_oacc_csr_csmm.F90
index aeaaab33..97a38deb 100644
--- a/openacc/impl/psb_z_oacc_csr_csmm.F90
+++ b/openacc/impl/psb_z_oacc_csr_csmm.F90
@@ -27,7 +27,7 @@ contains
end if
if (.not.a%is_asb()) then
- info = psb_err_invaliz_mat_state_
+ info = psb_err_invalid_mat_state_
call psb_errpush(info, name)
goto 9999
endif
diff --git a/openacc/impl/psb_z_oacc_csr_csmv.F90 b/openacc/impl/psb_z_oacc_csr_csmv.F90
index f5501b21..8def3c76 100644
--- a/openacc/impl/psb_z_oacc_csr_csmv.F90
+++ b/openacc/impl/psb_z_oacc_csr_csmv.F90
@@ -27,7 +27,7 @@ contains
end if
if (.not.a%is_asb()) then
- info = psb_err_invaliz_mat_state_
+ info = psb_err_invalid_mat_state_
call psb_errpush(info, name)
goto 9999
endif
diff --git a/openacc/impl/psb_z_oacc_csr_inner_vect_sv.F90 b/openacc/impl/psb_z_oacc_csr_inner_vect_sv.F90
index b5d552d3..4975b276 100644
--- a/openacc/impl/psb_z_oacc_csr_inner_vect_sv.F90
+++ b/openacc/impl/psb_z_oacc_csr_inner_vect_sv.F90
@@ -27,7 +27,7 @@ contains
end if
if (.not.a%is_asb()) then
- info = psb_err_invaliz_mat_state_
+ info = psb_err_invalid_mat_state_
call psb_errpush(info, name)
goto 9999
endif
diff --git a/openacc/impl/psb_z_oacc_csr_mold.F90 b/openacc/impl/psb_z_oacc_csr_mold.F90
index e7e9e8b9..93b6bb17 100644
--- a/openacc/impl/psb_z_oacc_csr_mold.F90
+++ b/openacc/impl/psb_z_oacc_csr_mold.F90
@@ -31,5 +31,5 @@ contains
return
end subroutine psb_z_oacc_csr_mold
-end submodule psb_z_oacc_csr_molz_impl
+end submodule psb_z_oacc_csr_mold_impl
diff --git a/openacc/impl/psb_z_oacc_csr_vect_mv.F90 b/openacc/impl/psb_z_oacc_csr_vect_mv.F90
index 437dd70a..b8da5c8f 100644
--- a/openacc/impl/psb_z_oacc_csr_vect_mv.F90
+++ b/openacc/impl/psb_z_oacc_csr_vect_mv.F90
@@ -18,7 +18,7 @@ contains
if ((n /= size(x%v)) .or. (n /= size(y%v))) then
write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
- info = psb_err_invaliz_mat_state_
+ info = psb_err_invalid_mat_state_
return
end if
From b5a8c549dd7f78775c12db811aae243b6658167a Mon Sep 17 00:00:00 2001
From: tloloum
Date: Fri, 19 Jul 2024 11:35:11 +0200
Subject: [PATCH 17/86] psb_d_oacc_pde3d draft
---
test/openacc/Makefile | 4 +-
test/openacc/test.F90 | 617 -----------------
test/pargen/Makefile | 8 +-
test/pargen/psb_d_oacc_pde3d.F90 | 1073 ++++++++++++++++++++++++++++++
4 files changed, 1081 insertions(+), 621 deletions(-)
delete mode 100644 test/openacc/test.F90
create mode 100644 test/pargen/psb_d_oacc_pde3d.F90
diff --git a/test/openacc/Makefile b/test/openacc/Makefile
index 65bed1ad..9d62f3cd 100644
--- a/test/openacc/Makefile
+++ b/test/openacc/Makefile
@@ -9,7 +9,7 @@ INCDIR=$(TOPDIR)/include
MODDIR=$(TOPDIR)/modules
EXEDIR=./runs
-PSBLAS_LIB= -L$(LIBDIR) -L$(PSBLIBDIR) -lpsb_openacc -lpsb_base -lpsb_ext -lpsb_util -lopenblas -lmetis
+PSBLAS_LIB= -L$(LIBDIR) -L$(PSBLIBDIR) -lpsb_openacc -lpsb_base -lpsb_ext -lpsb_util -lpsb_krylov -lpsb_prec -lopenblas -lmetis
LDLIBS=$(PSBGPULDLIBS)
FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FMFLAG). $(FMFLAG)$(PSBMODDIR) $(FMFLAG)$(PSBINCDIR) $(LIBRSB_DEFINES)
@@ -17,7 +17,7 @@ FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FMFLAG). $(FMFLAG)$(PSBMODDIR)
FFLAGS=-O0 -march=native -fopenacc -foffload=nvptx-none="-march=sm_70"
CFLAGS=-O0 -march=native
-SRCS=test.F90 vectoacc.F90 datavect.F90
+SRCS=vectoacc.F90 datavect.F90
CSRC=timers.c
OBJS=$(SRCS:.F90=.o) $(CSRC:.c=.o)
diff --git a/test/openacc/test.F90 b/test/openacc/test.F90
deleted file mode 100644
index 0d0b756f..00000000
--- a/test/openacc/test.F90
+++ /dev/null
@@ -1,617 +0,0 @@
-module psb_d_pde3d_mod
-
-
- use psb_base_mod, only : psb_dpk_, psb_ipk_, psb_lpk_, psb_desc_type,&
- & psb_dspmat_type, psb_d_vect_type, dzero,&
- & psb_d_base_sparse_mat, psb_d_base_vect_type, &
- & psb_i_base_vect_type, psb_l_base_vect_type
-
- interface
- function d_func_3d(x,y,z) result(val)
- import :: psb_dpk_
- real(psb_dpk_), intent(in) :: x,y,z
- real(psb_dpk_) :: val
- end function d_func_3d
- end interface
-
- interface psb_gen_pde3d
- module procedure psb_d_gen_pde3d
- end interface psb_gen_pde3d
-
- contains
-
- function d_null_func_3d(x,y,z) result(val)
-
- real(psb_dpk_), intent(in) :: x,y,z
- real(psb_dpk_) :: val
-
- val = dzero
-
- end function d_null_func_3d
- !
- ! functions parametrizing the differential equation
- !
-
- !
- ! Note: b1, b2 and b3 are the coefficients of the first
- ! derivative of the unknown function. The default
- ! we apply here is to have them zero, so that the resulting
- ! matrix is symmetric/hermitian and suitable for
- ! testing with CG and FCG.
- ! When testing methods for non-hermitian matrices you can
- ! change the B1/B2/B3 functions to e.g. done/sqrt((3*done))
- !
- function b1(x,y,z)
- use psb_base_mod, only : psb_dpk_, done, dzero
- implicit none
- real(psb_dpk_) :: b1
- real(psb_dpk_), intent(in) :: x,y,z
- b1=done/sqrt((3*done))
- end function b1
- function b2(x,y,z)
- use psb_base_mod, only : psb_dpk_, done, dzero
- implicit none
- real(psb_dpk_) :: b2
- real(psb_dpk_), intent(in) :: x,y,z
- b2=done/sqrt((3*done))
- end function b2
- function b3(x,y,z)
- use psb_base_mod, only : psb_dpk_, done, dzero
- implicit none
- real(psb_dpk_) :: b3
- real(psb_dpk_), intent(in) :: x,y,z
- b3=done/sqrt((3*done))
- end function b3
- function c(x,y,z)
- use psb_base_mod, only : psb_dpk_, done, dzero
- implicit none
- real(psb_dpk_) :: c
- real(psb_dpk_), intent(in) :: x,y,z
- c=dzero
- end function c
- function a1(x,y,z)
- use psb_base_mod, only : psb_dpk_, done, dzero
- implicit none
- real(psb_dpk_) :: a1
- real(psb_dpk_), intent(in) :: x,y,z
- a1=done/80
- end function a1
- function a2(x,y,z)
- use psb_base_mod, only : psb_dpk_, done, dzero
- implicit none
- real(psb_dpk_) :: a2
- real(psb_dpk_), intent(in) :: x,y,z
- a2=done/80
- end function a2
- function a3(x,y,z)
- use psb_base_mod, only : psb_dpk_, done, dzero
- implicit none
- real(psb_dpk_) :: a3
- real(psb_dpk_), intent(in) :: x,y,z
- a3=done/80
- end function a3
- function g(x,y,z)
- use psb_base_mod, only : psb_dpk_, done, dzero
- implicit none
- real(psb_dpk_) :: g
- real(psb_dpk_), intent(in) :: x,y,z
- g = dzero
- if (x == done) then
- g = done
- else if (x == dzero) then
- g = exp(y**2-z**2)
- end if
- end function g
-
-
- !
- ! subroutine to allocate and fill in the coefficient matrix and
- ! the rhs.
- !
- subroutine psb_d_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,afmt,info,&
- & f,amold,vmold,imold,partition,nrl,iv,tnd)
- use psb_base_mod
- use psb_util_mod
- !
- ! Discretizes the partial differential equation
- !
- ! a1 dd(u) a2 dd(u) a3 dd(u) b1 d(u) b2 d(u) b3 d(u)
- ! - ------ - ------ - ------ + ----- + ------ + ------ + c u = f
- ! dxdx dydy dzdz dx dy dz
- !
- ! with Dirichlet boundary conditions
- ! u = g
- !
- ! on the unit cube 0<=x,y,z<=1.
- !
- !
- ! Note that if b1=b2=b3=c=0., the PDE is the Laplace equation.
- !
- implicit none
- integer(psb_ipk_) :: idim
- type(psb_dspmat_type) :: a
- type(psb_d_vect_type) :: xv,bv
- type(psb_desc_type) :: desc_a
- type(psb_ctxt_type) :: ctxt
- integer(psb_ipk_) :: info
- character(len=*) :: afmt
- procedure(d_func_3d), optional :: f
- class(psb_d_base_sparse_mat), optional :: amold
- class(psb_d_base_vect_type), optional :: vmold
- class(psb_i_base_vect_type), optional :: imold
- integer(psb_ipk_), optional :: partition, nrl,iv(:)
- logical, optional :: tnd
- ! Local variables.
-
- integer(psb_ipk_), parameter :: nb=20
- type(psb_d_csc_sparse_mat) :: acsc
- type(psb_d_coo_sparse_mat) :: acoo
- type(psb_d_csr_sparse_mat) :: acsr
- real(psb_dpk_) :: zt(nb),x,y,z
- integer(psb_ipk_) :: nnz,nr,nlr,i,j,ii,ib,k, partition_
- integer(psb_lpk_) :: m,n,glob_row,nt
- integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner
- ! For 3D partition
- ! Note: integer control variables going directly into an MPI call
- ! must be 4 bytes, i.e. psb_mpk_
- integer(psb_mpk_) :: npdims(3), npp, minfo
- integer(psb_ipk_) :: npx,npy,npz, iamx,iamy,iamz,mynx,myny,mynz
- integer(psb_ipk_), allocatable :: bndx(:),bndy(:),bndz(:)
- ! Process grid
- integer(psb_ipk_) :: np, iam
- integer(psb_ipk_) :: icoeff
- integer(psb_lpk_), allocatable :: irow(:),icol(:),myidx(:)
- real(psb_dpk_), allocatable :: val(:)
- ! deltah dimension of each grid cell
- ! deltat discretization time
- real(psb_dpk_) :: deltah, sqdeltah, deltah2
- real(psb_dpk_), parameter :: rhs=dzero,one=done,zero=dzero
- real(psb_dpk_) :: t0, t1, t2, t3, tasb, talc, ttot, tgen, tcdasb
- integer(psb_ipk_) :: err_act
- procedure(d_func_3d), pointer :: f_
- logical :: tnd_
- character(len=20) :: name, ch_err,tmpfmt
-
- info = psb_success_
- name = 'create_matrix'
- call psb_erractionsave(err_act)
-
- call psb_info(ctxt, iam, np)
-
-
- if (present(f)) then
- f_ => f
- else
- f_ => d_null_func_3d
- end if
-
- deltah = done/(idim+2)
- sqdeltah = deltah*deltah
- deltah2 = (2*done)* deltah
-
- if (present(partition)) then
- if ((1<= partition).and.(partition <= 3)) then
- partition_ = partition
- else
- write(*,*) 'Invalid partition choice ',partition,' defaulting to 3'
- partition_ = 3
- end if
- else
- partition_ = 3
- end if
-
- ! initialize array descriptor and sparse matrix storage. provide an
- ! estimate of the number of non zeroes
-
- m = (1_psb_lpk_*idim)*idim*idim
- n = m
- nnz = ((n*7)/(np))
- if(iam == psb_root_) write(psb_out_unit,'("Generating Matrix (size=",i0,")...")')n
- t0 = psb_wtime()
- select case(partition_)
- case(1)
- ! A BLOCK partition
- if (present(nrl)) then
- nr = nrl
- else
- !
- ! Using a simple BLOCK distribution.
- !
- nt = (m+np-1)/np
- nr = max(0,min(nt,m-(iam*nt)))
- end if
-
- nt = nr
- call psb_sum(ctxt,nt)
- if (nt /= m) then
- write(psb_err_unit,*) iam, 'Initialization error ',nr,nt,m
- info = -1
- call psb_barrier(ctxt)
- call psb_abort(ctxt)
- return
- end if
-
- !
- ! First example of use of CDALL: specify for each process a number of
- ! contiguous rows
- !
- call psb_cdall(ctxt,desc_a,info,nl=nr)
- myidx = desc_a%get_global_indices()
- nlr = size(myidx)
-
- case(2)
- ! A partition defined by the user through IV
-
- if (present(iv)) then
- if (size(iv) /= m) then
- write(psb_err_unit,*) iam, 'Initialization error: wrong IV size',size(iv),m
- info = -1
- call psb_barrier(ctxt)
- call psb_abort(ctxt)
- return
- end if
- else
- write(psb_err_unit,*) iam, 'Initialization error: IV not present'
- info = -1
- call psb_barrier(ctxt)
- call psb_abort(ctxt)
- return
- end if
-
- !
- ! Second example of use of CDALL: specify for each row the
- ! process that owns it
- !
- call psb_cdall(ctxt,desc_a,info,vg=iv)
- myidx = desc_a%get_global_indices()
- nlr = size(myidx)
-
- case(3)
- ! A 3-dimensional partition
-
- ! A nifty MPI function will split the process list
- npdims = 0
- call mpi_dims_create(np,3,npdims,info)
- npx = npdims(1)
- npy = npdims(2)
- npz = npdims(3)
-
- allocate(bndx(0:npx),bndy(0:npy),bndz(0:npz))
- ! We can reuse idx2ijk for process indices as well.
- call idx2ijk(iamx,iamy,iamz,iam,npx,npy,npz,base=0)
- ! Now let's split the 3D cube in hexahedra
- call dist1Didx(bndx,idim,npx)
- mynx = bndx(iamx+1)-bndx(iamx)
- call dist1Didx(bndy,idim,npy)
- myny = bndy(iamy+1)-bndy(iamy)
- call dist1Didx(bndz,idim,npz)
- mynz = bndz(iamz+1)-bndz(iamz)
-
- ! How many indices do I own?
- nlr = mynx*myny*mynz
- allocate(myidx(nlr))
- ! Now, let's generate the list of indices I own
- nr = 0
- do i=bndx(iamx),bndx(iamx+1)-1
- do j=bndy(iamy),bndy(iamy+1)-1
- do k=bndz(iamz),bndz(iamz+1)-1
- nr = nr + 1
- call ijk2idx(myidx(nr),i,j,k,idim,idim,idim)
- end do
- end do
- end do
- if (nr /= nlr) then
- write(psb_err_unit,*) iam,iamx,iamy,iamz, 'Initialization error: NR vs NLR ',&
- & nr,nlr,mynx,myny,mynz
- info = -1
- call psb_barrier(ctxt)
- call psb_abort(ctxt)
- end if
-
- !
- ! Third example of use of CDALL: specify for each process
- ! the set of global indices it owns.
- !
- call psb_cdall(ctxt,desc_a,info,vl=myidx)
-
- case default
- write(psb_err_unit,*) iam, 'Initialization error: should not get here'
- info = -1
- call psb_barrier(ctxt)
- call psb_abort(ctxt)
- return
- end select
-
-
- if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz,&
- & dupl=psb_dupl_err_)
- ! define rhs from boundary conditions; also build initial guess
- if (info == psb_success_) call psb_geall(xv,desc_a,info)
- if (info == psb_success_) call psb_geall(bv,desc_a,info)
-
- call psb_barrier(ctxt)
- talc = psb_wtime()-t0
-
- if (info /= psb_success_) then
- info=psb_err_from_subroutine_
- ch_err='allocation rout.'
- call psb_errpush(info,name,a_err=ch_err)
- goto 9999
- end if
-
- ! we build an auxiliary matrix consisting of one row at a
- ! time; just a small matrix. might be extended to generate
- ! a bunch of rows per call.
- !
- allocate(val(20*nb),irow(20*nb),&
- &icol(20*nb),stat=info)
- if (info /= psb_success_ ) then
- info=psb_err_alloc_dealloc_
- call psb_errpush(info,name)
- goto 9999
- endif
-
-
- ! loop over rows belonging to current process in a block
- ! distribution.
-
- call psb_barrier(ctxt)
- t1 = psb_wtime()
- do ii=1, nlr,nb
- ib = min(nb,nlr-ii+1)
- icoeff = 1
- do k=1,ib
- i=ii+k-1
- ! local matrix pointer
- glob_row=myidx(i)
- ! compute gridpoint coordinates
- call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim)
- ! x, y, z coordinates
- x = (ix-1)*deltah
- y = (iy-1)*deltah
- z = (iz-1)*deltah
- zt(k) = f_(x,y,z)
- ! internal point: build discretization
- !
- ! term depending on (x-1,y,z)
- !
- val(icoeff) = -a1(x,y,z)/sqdeltah-b1(x,y,z)/deltah2
- if (ix == 1) then
- zt(k) = g(dzero,y,z)*(-val(icoeff)) + zt(k)
- else
- call ijk2idx(icol(icoeff),ix-1,iy,iz,idim,idim,idim)
- irow(icoeff) = glob_row
- icoeff = icoeff+1
- endif
- ! term depending on (x,y-1,z)
- val(icoeff) = -a2(x,y,z)/sqdeltah-b2(x,y,z)/deltah2
- if (iy == 1) then
- zt(k) = g(x,dzero,z)*(-val(icoeff)) + zt(k)
- else
- call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim)
- irow(icoeff) = glob_row
- icoeff = icoeff+1
- endif
- ! term depending on (x,y,z-1)
- val(icoeff)=-a3(x,y,z)/sqdeltah-b3(x,y,z)/deltah2
- if (iz == 1) then
- zt(k) = g(x,y,dzero)*(-val(icoeff)) + zt(k)
- else
- call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim)
- irow(icoeff) = glob_row
- icoeff = icoeff+1
- endif
-
- ! term depending on (x,y,z)
- val(icoeff)=(2*done)*(a1(x,y,z)+a2(x,y,z)+a3(x,y,z))/sqdeltah &
- & + c(x,y,z)
- call ijk2idx(icol(icoeff),ix,iy,iz,idim,idim,idim)
- irow(icoeff) = glob_row
- icoeff = icoeff+1
- ! term depending on (x,y,z+1)
- val(icoeff)=-a3(x,y,z)/sqdeltah+b3(x,y,z)/deltah2
- if (iz == idim) then
- zt(k) = g(x,y,done)*(-val(icoeff)) + zt(k)
- else
- call ijk2idx(icol(icoeff),ix,iy,iz+1,idim,idim,idim)
- irow(icoeff) = glob_row
- icoeff = icoeff+1
- endif
- ! term depending on (x,y+1,z)
- val(icoeff)=-a2(x,y,z)/sqdeltah+b2(x,y,z)/deltah2
- if (iy == idim) then
- zt(k) = g(x,done,z)*(-val(icoeff)) + zt(k)
- else
- call ijk2idx(icol(icoeff),ix,iy+1,iz,idim,idim,idim)
- irow(icoeff) = glob_row
- icoeff = icoeff+1
- endif
- ! term depending on (x+1,y,z)
- val(icoeff)=-a1(x,y,z)/sqdeltah+b1(x,y,z)/deltah2
- if (ix==idim) then
- zt(k) = g(done,y,z)*(-val(icoeff)) + zt(k)
- else
- call ijk2idx(icol(icoeff),ix+1,iy,iz,idim,idim,idim)
- irow(icoeff) = glob_row
- icoeff = icoeff+1
- endif
-
- end do
- call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info)
- if(info /= psb_success_) exit
- call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info)
- if(info /= psb_success_) exit
- zt(:)=dzero
- call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info)
- if(info /= psb_success_) exit
- end do
-
- tgen = psb_wtime()-t1
- if(info /= psb_success_) then
- info=psb_err_from_subroutine_
- ch_err='insert rout.'
- call psb_errpush(info,name,a_err=ch_err)
- goto 9999
- end if
-
- deallocate(val,irow,icol)
-
- call psb_barrier(ctxt)
- t1 = psb_wtime()
- call psb_cdasb(desc_a,info,mold=imold)
- tcdasb = psb_wtime()-t1
- call psb_barrier(ctxt)
- t1 = psb_wtime()
- if (info == psb_success_) then
- if (present(amold)) then
- call psb_spasb(a,desc_a,info,mold=amold,bld_and=tnd)
- else
- call psb_spasb(a,desc_a,info,afmt=afmt,bld_and=tnd)
- end if
- end if
- call psb_barrier(ctxt)
- if(info /= psb_success_) then
- info=psb_err_from_subroutine_
- ch_err='asb rout.'
- call psb_errpush(info,name,a_err=ch_err)
- goto 9999
- end if
- if (info == psb_success_) call psb_geasb(xv,desc_a,info,mold=vmold)
- if (info == psb_success_) call psb_geasb(bv,desc_a,info,mold=vmold)
- if(info /= psb_success_) then
- info=psb_err_from_subroutine_
- ch_err='asb rout.'
- call psb_errpush(info,name,a_err=ch_err)
- goto 9999
- end if
- tasb = psb_wtime()-t1
- call psb_barrier(ctxt)
- ttot = psb_wtime() - t0
-
- call psb_amx(ctxt,talc)
- call psb_amx(ctxt,tgen)
- call psb_amx(ctxt,tasb)
- call psb_amx(ctxt,ttot)
- if(iam == psb_root_) then
- tmpfmt = a%get_fmt()
- write(psb_out_unit,'("The matrix has been generated and assembled in ",a3," format.")')&
- & tmpfmt
- write(psb_out_unit,'("-allocation time : ",es12.5)') talc
- write(psb_out_unit,'("-coeff. gen. time : ",es12.5)') tgen
- write(psb_out_unit,'("-desc asbly time : ",es12.5)') tcdasb
- write(psb_out_unit,'("- mat asbly time : ",es12.5)') tasb
- write(psb_out_unit,'("-total time : ",es12.5)') ttot
-
- end if
- call psb_erractionrestore(err_act)
- return
-
- 9999 call psb_error_handler(ctxt,err_act)
-
- return
- end subroutine psb_d_gen_pde3d
-
-
-end module psb_d_pde3d_mod
-
-
-
-program test
- use psb_base_mod
- use psb_ext_mod
- use psb_oacc_mod
- use psb_d_pde3d_mod
-
- implicit none
- integer(psb_ipk_) :: n, i, info, m, nrm, nz
- integer(psb_ipk_), parameter :: ntests=80, ngpu=20
- real(psb_dpk_) :: dot_dev, dot_host
- type(psb_d_vect_oacc) :: tx, ty
- type(psb_d_oacc_csr_sparse_mat) :: aacsr
- real(psb_dpk_) :: t0, t1, t2, t3, csflp, elflp
- double precision, external :: etime
-
- type(psb_dspmat_type) :: a
- type(psb_desc_type) :: desc_a
- type(psb_d_vect_type) :: xxv, bv
- type(psb_d_csr_sparse_mat) :: acsr
- character(len=5) :: afmt='csr'
- real(psb_dpk_), allocatable :: vv(:), ydev(:), yhost(:)
- type(psb_ctxt_type) :: ctxt
- integer(psb_ipk_) :: iam, np, nth, idim
- integer(psb_epk_) :: neq
-
- call psb_init(ctxt)
- call psb_info(ctxt, iam, np)
-
- write(*,*) 'Enter size :'
- read(*,*) idim
- idim = max(1, idim)
-
- n = idim**3
- call psb_gen_pde3d(ctxt, idim, a, bv, xxv, desc_a, afmt, info)
- call a%cp_to(acsr)
- m = acsr%get_nrows()
- n = acsr%get_ncols()
- nz = acsr%get_nzeros()
- call aacsr%all(m, n, nz, info)
- aacsr%val = (acsr%val)
- aacsr%ja = (acsr%ja)
- aacsr%irp = (acsr%irp)
- call aacsr%set_host()
- call aacsr%sync()
-
- call initialize(n)
-
- call to_host()
- t2 = etime()
- do i = 1, ntests
- dot_host = h_dot(n)
- end do
- t3 = etime()
-
- call tx%all(n, info)
- call ty%all(n, info)
- vv = bv%get_vect()
- call bv%set_vect(v1)
- call tx%set_vect(v1)
- call ty%set_vect(v2)
- t0 = etime()
- do i = 1, ntests * ngpu
- dot_dev = tx%dot_v(n, ty)
- end do
- !$acc wait
- t1 = etime()
- write(*,*) ' Dot Results : dev:', dot_dev, ' host:', dot_host
- write(*,*) ' Timing : dev:', t1 - t0, (t1 - t0) / (ntests * ngpu), &
- ' host:', t3 - t2, (t3 - t2) / ntests
-
- call a%mv_from(acsr)
- t2 = etime()
- do i = 1, ntests
- call a%spmm(done, bv, dzero, xxv, info)
- end do
- t3 = etime()
- yhost = xxv%get_vect()
- t0 = etime()
- do i = 1, ntests * ngpu
- call aacsr%vect_mv(done, tx, dzero, ty, info)
- end do
- !$acc wait
- t1 = etime()
- ydev = ty%get_vect()
- write(*,*) 'Correctness check: ', maxval(abs(ydev(:) - yhost(:)))
- write(*,*) ' CSR PROD '
- write(*, '(2(a,f12.3,2x))') ' Timing (ms): '
- csflp = 2.d0 * nz / ((t1 - t0) / (ntests * ngpu))
- write(*, '(2(a,f12.3,2x))') ' dev:', 1e3 * (t1 - t0) / (ntests * ngpu), ' :', csflp / 1.d6
- csflp = 2.d0 * nz / ((t3 - t2) / (ntests))
- write(*, '(2(a,f12.3,2x))') ' host:', 1e3 * (t3 - t2) / ntests, ' :', csflp / 1.d6
- write(*,*) 'Done'
-
- call tx%free(info)
- call ty%free(info)
- call finalize_dev()
- call finalize_host()
- call psb_exit(ctxt)
-end program test
diff --git a/test/pargen/Makefile b/test/pargen/Makefile
index 20a95c0b..9b720ac2 100644
--- a/test/pargen/Makefile
+++ b/test/pargen/Makefile
@@ -5,7 +5,7 @@ include $(INCDIR)/Make.inc.psblas
#
# Libraries used
LIBDIR=$(INSTALLDIR)/lib
-PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base
+PSBLAS_LIB= -L$(LIBDIR) -lpsb_openacc -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base
LDLIBS=$(PSBLDLIBS)
#
# Compilers and such
@@ -25,6 +25,10 @@ psb_d_pde3d: psb_d_pde3d.o
$(FLINK) psb_d_pde3d.o -o psb_d_pde3d $(PSBLAS_LIB) $(LDLIBS)
/bin/mv psb_d_pde3d $(EXEDIR)
+psb_d_oacc_pde3d:
+ mpifort -fallow-argument-mismatch -frecursive -g -O3 -frecursive -I../../modules/ -I. -DOPENACC -DHAVE_LAPACK -DHAVE_FLUSH_STMT -DLPK8 -DIPK4 -DMPI_MOD -c psb_d_oacc_pde3d.F90 -o psb_d_oacc_pde3d.o
+ $(FLINK) -fopenacc -DOPENACC psb_d_oacc_pde3d.o -o psb_d_oacc_pde3d $(PSBLAS_LIB) $(LDLIBS)
+ /bin/mv psb_d_oacc_pde3d $(EXEDIR)
psb_s_pde3d: psb_s_pde3d.o
$(FLINK) psb_s_pde3d.o -o psb_s_pde3d $(PSBLAS_LIB) $(LDLIBS)
@@ -41,7 +45,7 @@ psb_s_pde2d: psb_s_pde2d.o
clean:
- /bin/rm -f psb_d_pde3d.o psb_s_pde3d.o psb_d_pde2d.o psb_s_pde2d.o *$(.mod) \
+ /bin/rm -f psb_d_pde3d.o psb_d_oacc_pde3d.o psb_s_pde3d.o psb_d_pde2d.o psb_s_pde2d.o *$(.mod) \
$(EXEDIR)/psb_d_pde3d $(EXEDIR)/psb_s_pde3d $(EXEDIR)/psb_d_pde2d $(EXEDIR)/psb_s_pde2d
verycleanlib:
(cd ../..; make veryclean)
diff --git a/test/pargen/psb_d_oacc_pde3d.F90 b/test/pargen/psb_d_oacc_pde3d.F90
new file mode 100644
index 00000000..ac992884
--- /dev/null
+++ b/test/pargen/psb_d_oacc_pde3d.F90
@@ -0,0 +1,1073 @@
+!
+! Parallel Sparse BLAS version 3.5
+! (C) Copyright 2006-2018
+! Salvatore Filippone
+! Alfredo Buttari
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions
+! are met:
+! 1. Redistributions of source code must retain the above copyright
+! notice, this list of conditions and the following disclaimer.
+! 2. Redistributions in binary form must reproduce the above copyright
+! notice, this list of conditions, and the following disclaimer in the
+! documentation and/or other materials provided with the distribution.
+! 3. The name of the PSBLAS group or the names of its contributors may
+! not be used to endorse or promote products derived from this
+! software without specific written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+! POSSIBILITY OF SUCH DAMAGE.
+!
+!
+! File: psb_d_pde3d.f90
+!
+! Program: psb_d_pde3d
+! This sample program solves a linear system obtained by discretizing a
+! PDE with Dirichlet BCs.
+!
+!
+! The PDE is a general second order equation in 3d
+!
+! a1 dd(u) a2 dd(u) a3 dd(u) b1 d(u) b2 d(u) b3 d(u)
+! - ------ - ------ - ------ + ----- + ------ + ------ + c u = f
+! dxdx dydy dzdz dx dy dz
+!
+! with Dirichlet boundary conditions
+! u = g
+!
+! on the unit cube 0<=x,y,z<=1.
+!
+!
+! Note that if b1=b2=b3=c=0., the PDE is the Laplace equation.
+!
+! There are three choices available for data distribution:
+! 1. A simple BLOCK distribution
+! 2. A ditribution based on arbitrary assignment of indices to processes,
+! typically from a graph partitioner
+! 3. A 3D distribution in which the unit cube is partitioned
+! into subcubes, each one assigned to a process.
+!
+!
+module psb_d_pde3d_mod
+
+
+ use psb_base_mod, only : psb_dpk_, psb_ipk_, psb_lpk_, psb_desc_type,&
+ & psb_dspmat_type, psb_d_vect_type, dzero,&
+ & psb_d_base_sparse_mat, psb_d_base_vect_type, &
+ & psb_i_base_vect_type, psb_l_base_vect_type
+
+ interface
+ function d_func_3d(x,y,z) result(val)
+ import :: psb_dpk_
+ real(psb_dpk_), intent(in) :: x,y,z
+ real(psb_dpk_) :: val
+ end function d_func_3d
+ end interface
+
+ interface psb_gen_pde3d
+ module procedure psb_d_gen_pde3d
+ end interface psb_gen_pde3d
+
+contains
+
+ function d_null_func_3d(x,y,z) result(val)
+
+ real(psb_dpk_), intent(in) :: x,y,z
+ real(psb_dpk_) :: val
+
+ val = dzero
+
+ end function d_null_func_3d
+ !
+ ! functions parametrizing the differential equation
+ !
+
+ !
+ ! Note: b1, b2 and b3 are the coefficients of the first
+ ! derivative of the unknown function. The default
+ ! we apply here is to have them zero, so that the resulting
+ ! matrix is symmetric/hermitian and suitable for
+ ! testing with CG and FCG.
+ ! When testing methods for non-hermitian matrices you can
+ ! change the B1/B2/B3 functions to e.g. done/sqrt((3*done))
+ !
+ function b1(x,y,z)
+ use psb_base_mod, only : psb_dpk_, done, dzero
+ implicit none
+ real(psb_dpk_) :: b1
+ real(psb_dpk_), intent(in) :: x,y,z
+ b1=dzero
+ end function b1
+ function b2(x,y,z)
+ use psb_base_mod, only : psb_dpk_, done, dzero
+ implicit none
+ real(psb_dpk_) :: b2
+ real(psb_dpk_), intent(in) :: x,y,z
+ b2=dzero
+ end function b2
+ function b3(x,y,z)
+ use psb_base_mod, only : psb_dpk_, done, dzero
+ implicit none
+ real(psb_dpk_) :: b3
+ real(psb_dpk_), intent(in) :: x,y,z
+ b3=dzero
+ end function b3
+ function c(x,y,z)
+ use psb_base_mod, only : psb_dpk_, done, dzero
+ implicit none
+ real(psb_dpk_) :: c
+ real(psb_dpk_), intent(in) :: x,y,z
+ c=dzero
+ end function c
+ function a1(x,y,z)
+ use psb_base_mod, only : psb_dpk_, done, dzero
+ implicit none
+ real(psb_dpk_) :: a1
+ real(psb_dpk_), intent(in) :: x,y,z
+ a1=done/80
+ end function a1
+ function a2(x,y,z)
+ use psb_base_mod, only : psb_dpk_, done, dzero
+ implicit none
+ real(psb_dpk_) :: a2
+ real(psb_dpk_), intent(in) :: x,y,z
+ a2=done/80
+ end function a2
+ function a3(x,y,z)
+ use psb_base_mod, only : psb_dpk_, done, dzero
+ implicit none
+ real(psb_dpk_) :: a3
+ real(psb_dpk_), intent(in) :: x,y,z
+ a3=done/80
+ end function a3
+ function g(x,y,z)
+ use psb_base_mod, only : psb_dpk_, done, dzero
+ implicit none
+ real(psb_dpk_) :: g
+ real(psb_dpk_), intent(in) :: x,y,z
+ g = dzero
+ if (x == done) then
+ g = done
+ else if (x == dzero) then
+ g = exp(y**2-z**2)
+ end if
+ end function g
+
+
+ !
+ ! subroutine to allocate and fill in the coefficient matrix and
+ ! the rhs.
+ !
+ subroutine psb_d_gen_pde3d(ctxt,idim,a,bv,xv,desc_a,afmt,info,&
+ & f,amold,vmold,imold,partition,nrl,iv)
+ use psb_base_mod
+ use psb_util_mod
+#if defined(OPENMP)
+ use omp_lib
+#endif
+ !
+ ! Discretizes the partial differential equation
+ !
+ ! a1 dd(u) a2 dd(u) a3 dd(u) b1 d(u) b2 d(u) b3 d(u)
+ ! - ------ - ------ - ------ + ----- + ------ + ------ + c u = f
+ ! dxdx dydy dzdz dx dy dz
+ !
+ ! with Dirichlet boundary conditions
+ ! u = g
+ !
+ ! on the unit cube 0<=x,y,z<=1.
+ !
+ !
+ ! Note that if b1=b2=b3=c=0., the PDE is the Laplace equation.
+ !
+ implicit none
+ integer(psb_ipk_) :: idim
+ type(psb_dspmat_type) :: a
+ type(psb_d_vect_type) :: xv,bv
+ type(psb_desc_type) :: desc_a
+ type(psb_ctxt_type) :: ctxt
+ integer(psb_ipk_) :: info
+ character(len=*) :: afmt
+ procedure(d_func_3d), optional :: f
+ class(psb_d_base_sparse_mat), optional :: amold
+ class(psb_d_base_vect_type), optional :: vmold
+ class(psb_i_base_vect_type), optional :: imold
+ integer(psb_ipk_), optional :: partition, nrl,iv(:)
+
+ ! Local variables.
+
+ integer(psb_ipk_), parameter :: nb=20
+ type(psb_d_csc_sparse_mat) :: acsc
+ type(psb_d_coo_sparse_mat) :: acoo
+ type(psb_d_csr_sparse_mat) :: acsr
+ real(psb_dpk_) :: zt(nb),x,y,z
+ integer(psb_ipk_) :: nnz,nr,nlr,i,j,ii,ib,k, partition_, mysz
+ integer(psb_lpk_) :: m,n,glob_row,nt
+ integer(psb_ipk_) :: ix,iy,iz,ia,indx_owner
+ ! For 3D partition
+ ! Note: integer control variables going directly into an MPI call
+ ! must be 4 bytes, i.e. psb_mpk_
+ integer(psb_mpk_) :: npdims(3), npp, minfo
+ integer(psb_ipk_) :: npx,npy,npz, iamx,iamy,iamz,mynx,myny,mynz
+ integer(psb_ipk_), allocatable :: bndx(:),bndy(:),bndz(:)
+ ! Process grid
+ integer(psb_ipk_) :: np, iam
+ integer(psb_ipk_) :: icoeff
+ integer(psb_lpk_), allocatable :: myidx(:)
+ ! deltah dimension of each grid cell
+ ! deltat discretization time
+ real(psb_dpk_) :: deltah, sqdeltah, deltah2
+ real(psb_dpk_), parameter :: rhs=dzero,one=done,zero=dzero
+ real(psb_dpk_) :: t0, t1, t2, t3, tasb, talc, ttot, tgen, tcdasb
+ integer(psb_ipk_) :: err_act
+ procedure(d_func_3d), pointer :: f_
+ character(len=20) :: name, ch_err,tmpfmt
+
+ info = psb_success_
+ name = 'create_matrix'
+ call psb_erractionsave(err_act)
+
+ call psb_info(ctxt, iam, np)
+
+
+ if (present(f)) then
+ f_ => f
+ else
+ f_ => d_null_func_3d
+ end if
+
+ deltah = done/(idim+1)
+ sqdeltah = deltah*deltah
+ deltah2 = (2*done)* deltah
+
+ if (present(partition)) then
+ if ((1<= partition).and.(partition <= 3)) then
+ partition_ = partition
+ else
+ write(*,*) 'Invalid partition choice ',partition,' defaulting to 3'
+ partition_ = 3
+ end if
+ else
+ partition_ = 3
+ end if
+
+ ! initialize array descriptor and sparse matrix storage. provide an
+ ! estimate of the number of non zeroes
+
+ m = (1_psb_lpk_*idim)*idim*idim
+ n = m
+ nnz = ((n*7)/(np))
+ if(iam == psb_root_) write(psb_out_unit,'("Generating Matrix (size=",i0,")...")')n
+ t0 = psb_wtime()
+ select case(partition_)
+ case(1)
+ ! A BLOCK partition
+ if (present(nrl)) then
+ nr = nrl
+ else
+ !
+ ! Using a simple BLOCK distribution.
+ !
+ nt = (m+np-1)/np
+ nr = max(0,min(nt,m-(iam*nt)))
+ end if
+
+ nt = nr
+ call psb_sum(ctxt,nt)
+ if (nt /= m) then
+ write(psb_err_unit,*) iam, 'Initialization error ',nr,nt,m
+ info = -1
+ call psb_barrier(ctxt)
+ call psb_abort(ctxt)
+ return
+ end if
+
+ !
+ ! First example of use of CDALL: specify for each process a number of
+ ! contiguous rows
+ !
+ call psb_cdall(ctxt,desc_a,info,nl=nr)
+ myidx = desc_a%get_global_indices()
+ nlr = size(myidx)
+
+ case(2)
+ ! A partition defined by the user through IV
+
+ if (present(iv)) then
+ if (size(iv) /= m) then
+ write(psb_err_unit,*) iam, 'Initialization error: wrong IV size',size(iv),m
+ info = -1
+ call psb_barrier(ctxt)
+ call psb_abort(ctxt)
+ return
+ end if
+ else
+ write(psb_err_unit,*) iam, 'Initialization error: IV not present'
+ info = -1
+ call psb_barrier(ctxt)
+ call psb_abort(ctxt)
+ return
+ end if
+
+ !
+ ! Second example of use of CDALL: specify for each row the
+ ! process that owns it
+ !
+ call psb_cdall(ctxt,desc_a,info,vg=iv)
+ myidx = desc_a%get_global_indices()
+ nlr = size(myidx)
+
+ case(3)
+ ! A 3-dimensional partition
+
+ ! A nifty MPI function will split the process list
+ npdims = 0
+#if defined(SERIAL_MPI)
+ npdims = 1
+#else
+ call mpi_dims_create(np,3,npdims,info)
+#endif
+ npx = npdims(1)
+ npy = npdims(2)
+ npz = npdims(3)
+
+ allocate(bndx(0:npx),bndy(0:npy),bndz(0:npz))
+ ! We can reuse idx2ijk for process indices as well.
+ call idx2ijk(iamx,iamy,iamz,iam,npx,npy,npz,base=0)
+ ! Now let's split the 3D cube in hexahedra
+ call dist1Didx(bndx,idim,npx)
+ mynx = bndx(iamx+1)-bndx(iamx)
+ call dist1Didx(bndy,idim,npy)
+ myny = bndy(iamy+1)-bndy(iamy)
+ call dist1Didx(bndz,idim,npz)
+ mynz = bndz(iamz+1)-bndz(iamz)
+
+ ! How many indices do I own?
+ nlr = mynx*myny*mynz
+ allocate(myidx(nlr))
+ ! Now, let's generate the list of indices I own
+ nr = 0
+ do i=bndx(iamx),bndx(iamx+1)-1
+ do j=bndy(iamy),bndy(iamy+1)-1
+ do k=bndz(iamz),bndz(iamz+1)-1
+ nr = nr + 1
+ call ijk2idx(myidx(nr),i,j,k,idim,idim,idim)
+ end do
+ end do
+ end do
+ if (nr /= nlr) then
+ write(psb_err_unit,*) iam,iamx,iamy,iamz, 'Initialization error: NR vs NLR ',&
+ & nr,nlr,mynx,myny,mynz
+ info = -1
+ call psb_barrier(ctxt)
+ call psb_abort(ctxt)
+ end if
+
+ !
+ ! Third example of use of CDALL: specify for each process
+ ! the set of global indices it owns.
+ !
+ call psb_cdall(ctxt,desc_a,info,vl=myidx)
+
+ !
+ ! Specify process topology
+ !
+ block
+ !
+ ! Use adjcncy methods
+ !
+ integer(psb_mpk_), allocatable :: neighbours(:)
+ integer(psb_mpk_) :: cnt
+ logical, parameter :: debug_adj=.true.
+ if (debug_adj.and.(np > 1)) then
+ cnt = 0
+ allocate(neighbours(np))
+ if (iamx < npx-1) then
+ cnt = cnt + 1
+ call ijk2idx(neighbours(cnt),iamx+1,iamy,iamz,npx,npy,npz,base=0)
+ end if
+ if (iamy < npy-1) then
+ cnt = cnt + 1
+ call ijk2idx(neighbours(cnt),iamx,iamy+1,iamz,npx,npy,npz,base=0)
+ end if
+ if (iamz < npz-1) then
+ cnt = cnt + 1
+ call ijk2idx(neighbours(cnt),iamx,iamy,iamz+1,npx,npy,npz,base=0)
+ end if
+ if (iamx >0) then
+ cnt = cnt + 1
+ call ijk2idx(neighbours(cnt),iamx-1,iamy,iamz,npx,npy,npz,base=0)
+ end if
+ if (iamy >0) then
+ cnt = cnt + 1
+ call ijk2idx(neighbours(cnt),iamx,iamy-1,iamz,npx,npy,npz,base=0)
+ end if
+ if (iamz >0) then
+ cnt = cnt + 1
+ call ijk2idx(neighbours(cnt),iamx,iamy,iamz-1,npx,npy,npz,base=0)
+ end if
+ call psb_realloc(cnt, neighbours,info)
+ call desc_a%set_p_adjcncy(neighbours)
+ !write(0,*) iam,' Check on neighbours: ',desc_a%get_p_adjcncy()
+ end if
+ end block
+
+ case default
+ write(psb_err_unit,*) iam, 'Initialization error: should not get here'
+ info = -1
+ call psb_barrier(ctxt)
+ call psb_abort(ctxt)
+ return
+ end select
+
+
+ if (info == psb_success_) call psb_spall(a,desc_a,info,nnz=nnz, &
+ & bldmode=psb_matbld_remote_,dupl=psb_dupl_add_)
+ ! define rhs from boundary conditions; also build initial guess
+ if (info == psb_success_) call psb_geall(xv,desc_a,info)
+ if (info == psb_success_) call psb_geall(bv,desc_a,info,&
+ & bldmode=psb_matbld_remote_,dupl=psb_dupl_add_)
+
+ call psb_barrier(ctxt)
+ talc = psb_wtime()-t0
+
+ if (info /= psb_success_) then
+ info=psb_err_from_subroutine_
+ ch_err='allocation rout.'
+ call psb_errpush(info,name,a_err=ch_err)
+ goto 9999
+ end if
+
+ call psb_barrier(ctxt)
+ t1 = psb_wtime()
+ !$omp parallel shared(deltah,myidx,a,desc_a)
+ !
+ block
+ integer(psb_ipk_) :: i,j,k,ii,ib,icoeff, ix,iy,iz, ith,nth
+ integer(psb_lpk_) :: glob_row
+ integer(psb_lpk_), allocatable :: irow(:),icol(:)
+ real(psb_dpk_), allocatable :: val(:)
+ real(psb_dpk_) :: x,y,z, zt(nb)
+#if defined(OPENMP)
+ nth = omp_get_num_threads()
+ ith = omp_get_thread_num()
+#else
+ nth = 1
+ ith = 0
+#endif
+ allocate(val(20*nb),irow(20*nb),&
+ &icol(20*nb),stat=info)
+ if (info /= psb_success_ ) then
+ info=psb_err_alloc_dealloc_
+ call psb_errpush(info,name)
+ !goto 9999
+ endif
+
+ !$omp do schedule(dynamic)
+ !
+ do ii=1, nlr, nb
+ if(info /= psb_success_) cycle
+ ib = min(nb,nlr-ii+1)
+ !ib = min(nb,mysz-ii+1)
+ icoeff = 1
+ do k=1,ib
+ i=ii+k-1
+ ! local matrix pointer
+ glob_row=myidx(i)
+ ! compute gridpoint coordinates
+ call idx2ijk(ix,iy,iz,glob_row,idim,idim,idim)
+ ! x, y, z coordinates
+ x = (ix-1)*deltah
+ y = (iy-1)*deltah
+ z = (iz-1)*deltah
+ zt(k) = f_(x,y,z)
+ ! internal point: build discretization
+ !
+ ! term depending on (x-1,y,z)
+ !
+ val(icoeff) = -a1(x,y,z)/sqdeltah-b1(x,y,z)/deltah2
+ if (ix == 1) then
+ zt(k) = g(dzero,y,z)*(-val(icoeff)) + zt(k)
+ else
+ call ijk2idx(icol(icoeff),ix-1,iy,iz,idim,idim,idim)
+ irow(icoeff) = glob_row
+ icoeff = icoeff+1
+ endif
+ ! term depending on (x,y-1,z)
+ val(icoeff) = -a2(x,y,z)/sqdeltah-b2(x,y,z)/deltah2
+ if (iy == 1) then
+ zt(k) = g(x,dzero,z)*(-val(icoeff)) + zt(k)
+ else
+ call ijk2idx(icol(icoeff),ix,iy-1,iz,idim,idim,idim)
+ irow(icoeff) = glob_row
+ icoeff = icoeff+1
+ endif
+ ! term depending on (x,y,z-1)
+ val(icoeff)=-a3(x,y,z)/sqdeltah-b3(x,y,z)/deltah2
+ if (iz == 1) then
+ zt(k) = g(x,y,dzero)*(-val(icoeff)) + zt(k)
+ else
+ call ijk2idx(icol(icoeff),ix,iy,iz-1,idim,idim,idim)
+ irow(icoeff) = glob_row
+ icoeff = icoeff+1
+ endif
+
+ ! term depending on (x,y,z)
+ val(icoeff)=(2*done)*(a1(x,y,z)+a2(x,y,z)+a3(x,y,z))/sqdeltah &
+ & + c(x,y,z)
+ call ijk2idx(icol(icoeff),ix,iy,iz,idim,idim,idim)
+ irow(icoeff) = glob_row
+ icoeff = icoeff+1
+ ! term depending on (x,y,z+1)
+ val(icoeff)=-a3(x,y,z)/sqdeltah+b3(x,y,z)/deltah2
+ if (iz == idim) then
+ zt(k) = g(x,y,done)*(-val(icoeff)) + zt(k)
+ else
+ call ijk2idx(icol(icoeff),ix,iy,iz+1,idim,idim,idim)
+ irow(icoeff) = glob_row
+ icoeff = icoeff+1
+ endif
+ ! term depending on (x,y+1,z)
+ val(icoeff)=-a2(x,y,z)/sqdeltah+b2(x,y,z)/deltah2
+ if (iy == idim) then
+ zt(k) = g(x,done,z)*(-val(icoeff)) + zt(k)
+ else
+ call ijk2idx(icol(icoeff),ix,iy+1,iz,idim,idim,idim)
+ irow(icoeff) = glob_row
+ icoeff = icoeff+1
+ endif
+ ! term depending on (x+1,y,z)
+ val(icoeff)=-a1(x,y,z)/sqdeltah+b1(x,y,z)/deltah2
+ if (ix==idim) then
+ zt(k) = g(done,y,z)*(-val(icoeff)) + zt(k)
+ else
+ call ijk2idx(icol(icoeff),ix+1,iy,iz,idim,idim,idim)
+ irow(icoeff) = glob_row
+ icoeff = icoeff+1
+ endif
+
+ end do
+#if defined(OPENMP)
+!!$ write(0,*) omp_get_thread_num(),' Check insertion ',&
+!!$ & irow(1:icoeff-1),':',icol(1:icoeff-1)
+#endif
+ call psb_spins(icoeff-1,irow,icol,val,a,desc_a,info)
+ if(info /= psb_success_) cycle
+ call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),bv,desc_a,info)
+ if(info /= psb_success_) cycle
+ zt(:)=dzero
+ call psb_geins(ib,myidx(ii:ii+ib-1),zt(1:ib),xv,desc_a,info)
+ if(info /= psb_success_) cycle
+ end do
+ !$omp end do
+ deallocate(val,irow,icol)
+ end block
+ !$omp end parallel
+
+ tgen = psb_wtime()-t1
+ if(info /= psb_success_) then
+ info=psb_err_from_subroutine_
+ ch_err='insert rout.'
+ call psb_errpush(info,name,a_err=ch_err)
+ goto 9999
+ end if
+
+
+ call psb_barrier(ctxt)
+ t1 = psb_wtime()
+ call psb_cdasb(desc_a,info,mold=imold)
+ tcdasb = psb_wtime()-t1
+
+ call psb_barrier(ctxt)
+ t1 = psb_wtime()
+ if (info == psb_success_) then
+ if (present(amold)) then
+ call psb_spasb(a,desc_a,info,mold=amold)
+ else
+ call psb_spasb(a,desc_a,info,afmt=afmt)
+ end if
+ end if
+ call psb_barrier(ctxt)
+ if(info /= psb_success_) then
+ info=psb_err_from_subroutine_
+ ch_err='asb rout.'
+ call psb_errpush(info,name,a_err=ch_err)
+ goto 9999
+ end if
+ if (info == psb_success_) call psb_geasb(xv,desc_a,info,mold=vmold)
+ if (info == psb_success_) call psb_geasb(bv,desc_a,info,mold=vmold)
+ if(info /= psb_success_) then
+ info=psb_err_from_subroutine_
+ ch_err='asb rout.'
+ call psb_errpush(info,name,a_err=ch_err)
+ goto 9999
+ end if
+ tasb = psb_wtime()-t1
+ call psb_barrier(ctxt)
+ ttot = psb_wtime() - t0
+
+ call psb_amx(ctxt,talc)
+ call psb_amx(ctxt,tgen)
+ call psb_amx(ctxt,tasb)
+ call psb_amx(ctxt,ttot)
+ if(iam == psb_root_) then
+ tmpfmt = a%get_fmt()
+ write(psb_out_unit,'("The matrix has been generated and assembled in ",a3," format.")')&
+ & tmpfmt
+ write(psb_out_unit,'("-allocation time : ",es12.5)') talc
+ write(psb_out_unit,'("-coeff. gen. time : ",es12.5)') tgen
+ write(psb_out_unit,'("-desc asbly time : ",es12.5)') tcdasb
+ write(psb_out_unit,'("- mat asbly time : ",es12.5)') tasb
+ write(psb_out_unit,'("-total time : ",es12.5)') ttot
+
+ end if
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(ctxt,err_act)
+
+ return
+ end subroutine psb_d_gen_pde3d
+ function outside(i,j,k,bndx,bndy,bndz,iamx,iamy,iamz) result(res)
+ logical :: res
+ integer(psb_ipk_), intent(in) :: i,j,k,iamx,iamy,iamz
+ integer(psb_ipk_), intent(in) :: bndx(0:),bndy(0:),bndz(0:)
+
+ res = (i=bndx(iamx+1)) &
+ & .or.(j=bndy(iamy+1)) &
+ & .or.(k=bndz(iamz+1))
+ end function outside
+end module psb_d_pde3d_mod
+
+program psb_d_oacc_pde3d
+ use psb_base_mod
+ use psb_prec_mod
+ use psb_krylov_mod
+ use psb_util_mod
+ use psb_d_pde3d_mod
+#if defined(OPENACC)
+ use psb_oacc_mod
+#endif
+ implicit none
+
+ ! input parameters
+ character(len=20) :: kmethd, ptype
+ character(len=5) :: afmt, agfmt
+ integer(psb_ipk_) :: idim
+ integer(psb_epk_) :: system_size
+
+ ! miscellaneous
+ real(psb_dpk_), parameter :: one = done
+ real(psb_dpk_) :: t1, t2, tprec
+
+ ! sparse matrix and preconditioner
+ type(psb_dspmat_type) :: a, agpu
+ type(psb_dprec_type) :: prec
+ ! descriptor
+ type(psb_desc_type) :: desc_a
+ ! dense vectors
+ type(psb_d_vect_type), target :: xxv, bv, xg, bg
+#ifdef OPENACC
+ type(psb_d_vect_oacc) :: vmold
+ type(psb_i_vect_oacc) :: imold
+ type(psb_d_oacc_csr_sparse_mat) :: acsrg
+#endif
+ real(psb_dpk_), allocatable :: x0(:)
+ ! parallel environment
+ type(psb_ctxt_type) :: ctxt
+ integer(psb_ipk_) :: iam, np, nth
+
+ ! solver parameters
+ integer(psb_ipk_) :: iter, itmax, itrace, istopc, irst, ipart
+ integer(psb_epk_) :: amatsize, precsize, descsize, d2size
+ real(psb_dpk_) :: err, eps
+
+ ! Parameters for solvers in Block-Jacobi preconditioner
+ type ainvparms
+ character(len=12) :: alg, orth_alg, ilu_alg, ilut_scale
+ integer(psb_ipk_) :: fill, inv_fill
+ real(psb_dpk_) :: thresh, inv_thresh
+ end type ainvparms
+ type(ainvparms) :: parms
+
+ ! other variables
+ integer(psb_ipk_) :: info, i
+ character(len=20) :: name, ch_err
+ character(len=40) :: fname
+
+ info = psb_success_
+
+ call psb_init(ctxt)
+ call psb_info(ctxt, iam, np)
+
+#if defined(OPENACC)
+ call psb_oacc_init(ctxt)
+#endif
+
+ nth = 1
+
+ if (iam < 0) then
+ ! This should not happen, but just in case
+ call psb_exit(ctxt)
+ stop
+ endif
+ if (psb_errstatus_fatal()) goto 9999
+ name = 'pde3d90_oacc'
+ call psb_set_errverbosity(itwo)
+
+ ! Hello world
+ if (iam == psb_root_) then
+ write(*,*) 'Welcome to PSBLAS version: ', psb_version_string_
+ write(*,*) 'This is the ', trim(name), ' sample program'
+ end if
+
+ ! get parameters
+ call get_parms(ctxt, kmethd, ptype, afmt, agfmt, idim, istopc, itmax, itrace, irst, ipart, parms)
+
+ ! allocate and fill in the coefficient matrix, rhs and initial guess
+ call psb_barrier(ctxt)
+ t1 = psb_wtime()
+ call psb_gen_pde3d(ctxt, idim, a, bv, xxv, desc_a, afmt, info, partition = ipart)
+ call psb_barrier(ctxt)
+ t2 = psb_wtime() - t1
+ if (info /= psb_success_) then
+ info = psb_err_from_subroutine_
+ ch_err = 'psb_gen_pde3d'
+ call psb_errpush(info, name, a_err = ch_err)
+ goto 9999
+ end if
+ if (iam == psb_root_) write(psb_out_unit, '("Overall matrix creation time : ", es12.5)') t2
+ if (iam == psb_root_) write(psb_out_unit, '(" ")')
+
+#ifdef OPENACC
+ ! Convert matrix to GPU format
+ call a%cscnv(agpu, info, mold = acsrg)
+ if ((info /= 0) .or. (psb_get_errstatus() /= 0)) then
+ write(0,*) 'From cscnv ', info
+ call psb_error()
+ stop
+ end if
+ call desc_a%cnv(mold = imold)
+ call psb_geasb(bg, desc_a, info, scratch = .true., mold = vmold)
+ call psb_geasb(xg, desc_a, info, scratch = .true., mold = vmold)
+#endif
+
+ ! prepare the preconditioner.
+ if (iam == psb_root_) write(psb_out_unit, '("Setting preconditioner to : ", a)') ptype
+ call prec%init(ctxt, ptype, info)
+
+ ! Set the options for the BJAC preconditioner
+ if (psb_toupper(ptype) == "BJAC") then
+ call prec%set('sub_solve', parms%alg, info)
+ select case (psb_toupper(parms%alg))
+ case ("ILU")
+ call prec%set('sub_fillin', parms%fill, info)
+ call prec%set('ilu_alg', parms%ilu_alg, info)
+ case ("ILUT")
+ call prec%set('sub_fillin', parms%fill, info)
+ call prec%set('sub_iluthrs', parms%thresh, info)
+ call prec%set('ilut_scale', parms%ilut_scale, info)
+ case ("AINV")
+ call prec%set('inv_thresh', parms%inv_thresh, info)
+ call prec%set('inv_fillin', parms%inv_fill, info)
+ call prec%set('ilut_scale', parms%ilut_scale, info)
+ call prec%set('ainv_alg', parms%orth_alg, info)
+ case ("INVK")
+ call prec%set('sub_fillin', parms%fill, info)
+ call prec%set('inv_fillin', parms%inv_fill, info)
+ call prec%set('ilut_scale', parms%ilut_scale, info)
+ case ("INVT")
+ call prec%set('sub_fillin', parms%fill, info)
+ call prec%set('inv_fillin', parms%inv_fill, info)
+ call prec%set('sub_iluthrs', parms%thresh, info)
+ call prec%set('inv_thresh', parms%inv_thresh, info)
+ call prec%set('ilut_scale', parms%ilut_scale, info)
+ case default
+ ! Do nothing, use default setting in the init routine
+ end select
+ else
+ ! nothing to set for NONE or DIAG preconditioner
+ end if
+
+ call psb_barrier(ctxt)
+ t1 = psb_wtime()
+ call prec%build(a, desc_a, info)
+ if (info /= psb_success_) then
+ info = psb_err_from_subroutine_
+ ch_err = 'psb_precbld'
+ call psb_errpush(info, name, a_err = ch_err)
+ goto 9999
+ end if
+
+ tprec = psb_wtime() - t1
+
+ call psb_amx(ctxt, tprec)
+
+ if (iam == psb_root_) write(psb_out_unit, '("Preconditioner time : ", es12.5)') tprec
+ if (iam == psb_root_) write(psb_out_unit, '(" ")')
+ call prec%descr(info)
+
+ ! iterative method parameters
+ if (iam == psb_root_) write(psb_out_unit, '("Calling iterative method ", a)') kmethd
+ call psb_barrier(ctxt)
+ t1 = psb_wtime()
+ eps = 1.d-6
+
+#ifdef OPENACC
+ call psb_krylov(kmethd, agpu, prec, bv, xxv, eps, desc_a, info, &
+ itmax = itmax, iter = iter, err = err, itrace = itrace, istop = istopc, irst = irst)
+#else
+ call psb_krylov(kmethd, a, prec, bv, xxv, eps, desc_a, info, &
+ itmax = itmax, iter = iter, err = err, itrace = itrace, istop = istopc, irst = irst)
+#endif
+
+ if (info /= psb_success_) then
+ info = psb_err_from_subroutine_
+ ch_err = 'solver routine'
+ call psb_errpush(info, name, a_err = ch_err)
+ goto 9999
+ end if
+
+ call psb_barrier(ctxt)
+ t2 = psb_wtime() - t1
+ call psb_amx(ctxt, t2)
+ amatsize = a%sizeof()
+ descsize = desc_a%sizeof()
+ precsize = prec%sizeof()
+ system_size = desc_a%get_global_rows()
+ call psb_sum(ctxt, amatsize)
+ call psb_sum(ctxt, descsize)
+ call psb_sum(ctxt, precsize)
+
+ if (iam == psb_root_) then
+ write(psb_out_unit, '(" ")')
+ write(psb_out_unit, '("Number of processes : ", i12)') np
+ write(psb_out_unit, '("Number of threads : ", i12)') nth
+ write(psb_out_unit, '("Total number of tasks : ", i12)') nth * np
+ write(psb_out_unit, '("Linear system size : ", i12)') system_size
+ write(psb_out_unit, '("Time to solve system : ", es12.5)') t2
+ write(psb_out_unit, '("Time per iteration : ", es12.5)') t2 / iter
+ write(psb_out_unit, '("Number of iterations : ", i12)') iter
+ write(psb_out_unit, '("Convergence indicator on exit : ", es12.5)') err
+ write(psb_out_unit, '("Info on exit : ", i12)') info
+ write(psb_out_unit, '("Total memory occupation for A: ", i12)') amatsize
+ write(psb_out_unit, '("Total memory occupation for PREC: ", i12)') precsize
+ write(psb_out_unit, '("Total memory occupation for DESC_A: ", i12)') descsize
+ write(psb_out_unit, '("Storage format for A: ", a)') a%get_fmt()
+ write(psb_out_unit, '("Storage format for DESC_A: ", a)') desc_a%get_fmt()
+ end if
+
+ ! cleanup storage and exit
+ call psb_gefree(bv, desc_a, info)
+ call psb_gefree(xxv, desc_a, info)
+ call psb_spfree(a, desc_a, info)
+ call prec%free(info)
+ call psb_cdfree(desc_a, info)
+ if (info /= psb_success_) then
+ info = psb_err_from_subroutine_
+ ch_err = 'free routine'
+ call psb_errpush(info, name, a_err = ch_err)
+ goto 9999
+ end if
+
+#ifdef OPENACC
+ call psb_oacc_exit()
+#endif
+ call psb_exit(ctxt)
+ stop
+
+9999 call psb_error(ctxt)
+
+ stop
+
+contains
+ ! get iteration parameters from standard input
+ subroutine get_parms(ctxt, kmethd, ptype, afmt, agfmt, idim, istopc, itmax, itrace, irst, ipart, parms)
+ type(psb_ctxt_type) :: ctxt
+ character(len = *) :: kmethd, ptype, afmt, agfmt
+ integer(psb_ipk_) :: idim, istopc, itmax, itrace, irst, ipart
+ integer(psb_ipk_) :: np, iam
+ integer(psb_ipk_) :: ip, inp_unit
+ character(len = 1024) :: filename
+ type(ainvparms) :: parms
+
+ call psb_info(ctxt, iam, np)
+
+ if (iam == 0) then
+ if (command_argument_count() > 0) then
+ call get_command_argument(1, filename)
+ inp_unit = 30
+ open(inp_unit, file = filename, action = 'read', iostat = info)
+ if (info /= 0) then
+ write(psb_err_unit, *) 'Could not open file ', filename, ' for input'
+ call psb_abort(ctxt)
+ stop
+ else
+ write(psb_err_unit, *) 'Opened file ', trim(filename), ' for input'
+ end if
+ else
+ inp_unit = psb_inp_unit
+ end if
+ read(inp_unit, *) ip
+ if (ip >= 3) then
+ read(inp_unit, *) kmethd
+ read(inp_unit, *) ptype
+ read(inp_unit, *) afmt
+ read(inp_unit, *) agfmt
+ read(inp_unit, *) idim
+ if (ip >= 4) then
+ read(inp_unit, *) ipart
+ else
+ ipart = 3
+ endif
+ if (ip >= 5) then
+ read(inp_unit, *) istopc
+ else
+ istopc = 1
+ endif
+ if (ip >= 6) then
+ read(inp_unit, *) itmax
+ else
+ itmax = 500
+ endif
+ if (ip >= 7) then
+ read(inp_unit, *) itrace
+ else
+ itrace = -1
+ endif
+ if (ip >= 8) then
+ read(inp_unit, *) irst
+ else
+ irst = 1
+ endif
+ if (ip >= 9) then
+ read(inp_unit, *) parms%alg
+ read(inp_unit, *) parms%ilu_alg
+ read(inp_unit, *) parms%ilut_scale
+ read(inp_unit, *) parms%fill
+ read(inp_unit, *) parms%inv_fill
+ read(inp_unit, *) parms%thresh
+ read(inp_unit, *) parms%inv_thresh
+ read(inp_unit, *) parms%orth_alg
+ else
+ parms%alg = 'ILU' ! Block Solver ILU, ILUT, INVK, AINVT, AORTH
+ parms%ilu_alg = 'NONE' ! If ILU : MILU or NONE otherwise ignored
+ parms%ilut_scale = 'NONE' ! If ILUT: NONE, MAXVAL, DIAG, ARWSUM, ACLSUM, ARCSUM
+ parms%fill = 0 ! Level of fill for forward factorization
+ parms%inv_fill = 1 ! Level of fill for inverse factorization (only INVK)
+ parms%thresh = 1E-1_psb_dpk_ ! Threshold for forward factorization
+ parms%inv_thresh = 1E-1_psb_dpk_ ! Threshold for inverse factorization
+ parms%orth_alg = 'LLK' ! What orthogonalization algorithm?
+ endif
+
+ write(psb_out_unit, '("Solving matrix : ell1")')
+ write(psb_out_unit, &
+ '("Grid dimensions : ", i4, " x ", i4, " x ", i4)') &
+ idim, idim, idim
+ write(psb_out_unit, '("Number of processors : ", i0)') np
+ select case (ipart)
+ case (1)
+ write(psb_out_unit, '("Data distribution : BLOCK")')
+ case (3)
+ write(psb_out_unit, '("Data distribution : 3D")')
+ case default
+ ipart = 3
+ write(psb_out_unit, '("Unknown data distrbution, defaulting to 3D")')
+ end select
+ write(psb_out_unit, '("Preconditioner : ", a)') ptype
+ if (psb_toupper(ptype) == "BJAC") then
+ write(psb_out_unit, '("Block subsolver : ", a)') parms%alg
+ select case (psb_toupper(parms%alg))
+ case ('ILU')
+ write(psb_out_unit, '("Fill in : ", i0)') parms%fill
+ write(psb_out_unit, '("MILU : ", a)') parms%ilu_alg
+ case ('ILUT')
+ write(psb_out_unit, '("Fill in : ", i0)') parms%fill
+ write(psb_out_unit, '("Threshold : ", es12.5)') parms%thresh
+ write(psb_out_unit, '("Scaling : ", a)') parms%ilut_scale
+ case ('INVK')
+ write(psb_out_unit, '("Fill in : ", i0)') parms%fill
+ write(psb_out_unit, '("Invese Fill in : ", i0)') parms%inv_fill
+ write(psb_out_unit, '("Scaling : ", a)') parms%ilut_scale
+ case ('INVT')
+ write(psb_out_unit, '("Fill in : ", i0)') parms%fill
+ write(psb_out_unit, '("Threshold : ", es12.5)') parms%thresh
+ write(psb_out_unit, '("Invese Fill in : ", i0)') parms%inv_fill
+ write(psb_out_unit, '("Inverse Threshold : ", es12.5)') parms%inv_thresh
+ write(psb_out_unit, '("Scaling : ", a)') parms%ilut_scale
+ case ('AINV', 'AORTH')
+ write(psb_out_unit, '("Inverse Threshold : ", es12.5)') parms%inv_thresh
+ write(psb_out_unit, '("Invese Fill in : ", i0)') parms%inv_fill
+ write(psb_out_unit, '("Orthogonalization : ", a)') parms%orth_alg
+ write(psb_out_unit, '("Scaling : ", a)') parms%ilut_scale
+ case default
+ write(psb_out_unit, '("Unknown diagonal solver")')
+ end select
+ end if
+ write(psb_out_unit, '("Iterative method : ", a)') kmethd
+ write(psb_out_unit, '(" ")')
+ else
+ ! wrong number of parameter, print an error message and exit
+ call pr_usage(izero)
+ call psb_abort(ctxt)
+ stop 1
+ endif
+ if (inp_unit /= psb_inp_unit) then
+ close(inp_unit)
+ end if
+ end if
+ ! broadcast parameters to all processors
+ call psb_bcast(ctxt, kmethd)
+ call psb_bcast(ctxt, afmt)
+ call psb_bcast(ctxt, agfmt)
+ call psb_bcast(ctxt, ptype)
+ call psb_bcast(ctxt, idim)
+ call psb_bcast(ctxt, ipart)
+ call psb_bcast(ctxt, istopc)
+ call psb_bcast(ctxt, itmax)
+ call psb_bcast(ctxt, itrace)
+ call psb_bcast(ctxt, irst)
+ call psb_bcast(ctxt, parms%alg)
+ call psb_bcast(ctxt, parms%fill)
+ call psb_bcast(ctxt, parms%inv_fill)
+ call psb_bcast(ctxt, parms%thresh)
+ call psb_bcast(ctxt, parms%inv_thresh)
+ call psb_bcast(ctxt, parms%orth_alg)
+ call psb_bcast(ctxt, parms%ilut_scale)
+
+ return
+
+ end subroutine get_parms
+
+ ! print an error message
+ subroutine pr_usage(iout)
+ integer(psb_ipk_) :: iout
+ write(iout, *) 'incorrect parameter(s) found'
+ write(iout, *) ' usage: pde3d90 methd prec dim &'
+ write(iout, *) '[istop itmax itrace]'
+ write(iout, *) ' where:'
+ write(iout, *) ' methd: cgstab cgs rgmres bicgstabl'
+ write(iout, *) ' prec : bjac diag none'
+ write(iout, *) ' dim number of points along each axis'
+ write(iout, *) ' the size of the resulting linear '
+ write(iout, *) ' system is dim**3'
+ write(iout, *) ' ipart data partition 1 3 '
+ write(iout, *) ' istop stopping criterion 1, 2 '
+ write(iout, *) ' itmax maximum number of iterations [500] '
+ write(iout, *) ' itrace <=0 (no tracing, default) or '
+ write(iout, *) ' >= 1 do tracing every itrace'
+ write(iout, *) ' iterations '
+ end subroutine pr_usage
+
+end program psb_d_oacc_pde3d
From 08a9744413475b756341bbd9cd47e3b28aefd069 Mon Sep 17 00:00:00 2001
From: tloloum
Date: Fri, 19 Jul 2024 13:01:07 +0200
Subject: [PATCH 18/86] moving test
---
test/openacc/Makefile | 7 ++++++-
test/{pargen => openacc}/psb_d_oacc_pde3d.F90 | 0
test/pargen/Makefile | 7 +------
3 files changed, 7 insertions(+), 7 deletions(-)
rename test/{pargen => openacc}/psb_d_oacc_pde3d.F90 (100%)
diff --git a/test/openacc/Makefile b/test/openacc/Makefile
index 9d62f3cd..a2827b30 100644
--- a/test/openacc/Makefile
+++ b/test/openacc/Makefile
@@ -9,7 +9,7 @@ INCDIR=$(TOPDIR)/include
MODDIR=$(TOPDIR)/modules
EXEDIR=./runs
-PSBLAS_LIB= -L$(LIBDIR) -L$(PSBLIBDIR) -lpsb_openacc -lpsb_base -lpsb_ext -lpsb_util -lpsb_krylov -lpsb_prec -lopenblas -lmetis
+PSBLAS_LIB= -L$(LIBDIR) -L$(PSBLIBDIR) -lpsb_openacc -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base
LDLIBS=$(PSBGPULDLIBS)
FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FMFLAG). $(FMFLAG)$(PSBMODDIR) $(FMFLAG)$(PSBINCDIR) $(LIBRSB_DEFINES)
@@ -39,6 +39,11 @@ dir:
%.o: %.c
$(CC) $(CFLAGS) $(FINCLUDES) -c $< -o $@
+psb_d_oacc_pde3d:
+ mpifort -fallow-argument-mismatch -frecursive -g -O3 -frecursive -I../../modules/ -I. -DOPENACC -DHAVE_LAPACK -DHAVE_FLUSH_STMT -DLPK8 -DIPK4 -DMPI_MOD -c psb_d_oacc_pde3d.F90 -o psb_d_oacc_pde3d.o
+ $(FLINK) -fopenacc -DOPENACC psb_d_oacc_pde3d.o -o psb_d_oacc_pde3d $(PSBLAS_LIB) $(LDLIBS)
+ /bin/mv psb_d_oacc_pde3d $(EXEDIR)
+
clean:
/bin/rm -fr *.o *.mod $(EXEDIR)/*
diff --git a/test/pargen/psb_d_oacc_pde3d.F90 b/test/openacc/psb_d_oacc_pde3d.F90
similarity index 100%
rename from test/pargen/psb_d_oacc_pde3d.F90
rename to test/openacc/psb_d_oacc_pde3d.F90
diff --git a/test/pargen/Makefile b/test/pargen/Makefile
index 9b720ac2..c0a1c375 100644
--- a/test/pargen/Makefile
+++ b/test/pargen/Makefile
@@ -5,7 +5,7 @@ include $(INCDIR)/Make.inc.psblas
#
# Libraries used
LIBDIR=$(INSTALLDIR)/lib
-PSBLAS_LIB= -L$(LIBDIR) -lpsb_openacc -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base
+PSBLAS_LIB= -L$(LIBDIR) -lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base
LDLIBS=$(PSBLDLIBS)
#
# Compilers and such
@@ -25,11 +25,6 @@ psb_d_pde3d: psb_d_pde3d.o
$(FLINK) psb_d_pde3d.o -o psb_d_pde3d $(PSBLAS_LIB) $(LDLIBS)
/bin/mv psb_d_pde3d $(EXEDIR)
-psb_d_oacc_pde3d:
- mpifort -fallow-argument-mismatch -frecursive -g -O3 -frecursive -I../../modules/ -I. -DOPENACC -DHAVE_LAPACK -DHAVE_FLUSH_STMT -DLPK8 -DIPK4 -DMPI_MOD -c psb_d_oacc_pde3d.F90 -o psb_d_oacc_pde3d.o
- $(FLINK) -fopenacc -DOPENACC psb_d_oacc_pde3d.o -o psb_d_oacc_pde3d $(PSBLAS_LIB) $(LDLIBS)
- /bin/mv psb_d_oacc_pde3d $(EXEDIR)
-
psb_s_pde3d: psb_s_pde3d.o
$(FLINK) psb_s_pde3d.o -o psb_s_pde3d $(PSBLAS_LIB) $(LDLIBS)
/bin/mv psb_s_pde3d $(EXEDIR)
From 162b2fc78fee4bf3596d5098af69ea10e5a3f760 Mon Sep 17 00:00:00 2001
From: tloloum
Date: Mon, 22 Jul 2024 15:27:33 +0200
Subject: [PATCH 19/86] beginning of oacc_ell
---
openacc/Makefile | 4 +-
openacc/psb_d_oacc_ell_mat_mod.F90 | 218 +++++++++++++++++++++++++++++
2 files changed, 221 insertions(+), 1 deletion(-)
create mode 100644 openacc/psb_d_oacc_ell_mat_mod.F90
diff --git a/openacc/Makefile b/openacc/Makefile
index 3a752ac7..cba6b1f7 100644
--- a/openacc/Makefile
+++ b/openacc/Makefile
@@ -22,6 +22,7 @@ FOBJS= psb_i_oacc_vect_mod.o \
psb_d_oacc_vect_mod.o psb_d_oacc_csr_mat_mod.o \
psb_c_oacc_vect_mod.o psb_c_oacc_csr_mat_mod.o \
psb_z_oacc_vect_mod.o psb_z_oacc_csr_mat_mod.o \
+ psb_d_oacc_ell_mat_mod.o \
psb_oacc_mod.o psb_oacc_env_mod.o
@@ -48,6 +49,7 @@ psb_oacc_mod.o : psb_i_oacc_vect_mod.o \
psb_d_oacc_vect_mod.o psb_d_oacc_csr_mat_mod.o \
psb_c_oacc_vect_mod.o psb_c_oacc_csr_mat_mod.o \
psb_z_oacc_vect_mod.o psb_z_oacc_csr_mat_mod.o \
+ psb_d_oacc_ell_mat_mod.o \
psb_oacc_env_mod.o
psb_s_oacc_vect_mod.o psb_d_oacc_vect_mod.o \
@@ -58,7 +60,7 @@ psb_s_oacc_csr_mat_mod.o: psb_s_oacc_vect_mod.o
psb_d_oacc_csr_mat_mod.o: psb_d_oacc_vect_mod.o
psb_c_oacc_csr_mat_mod.o: psb_c_oacc_vect_mod.o
psb_z_oacc_csr_mat_mod.o: psb_z_oacc_vect_mod.o
-
+psb_d_oacc_ell_mat_mod.o: psb_d_oacc_vect_mod.o
clean: cclean iclean
diff --git a/openacc/psb_d_oacc_ell_mat_mod.F90 b/openacc/psb_d_oacc_ell_mat_mod.F90
new file mode 100644
index 00000000..846707af
--- /dev/null
+++ b/openacc/psb_d_oacc_ell_mat_mod.F90
@@ -0,0 +1,218 @@
+module psb_d_oacc_ell_mat_mod
+ use iso_c_binding
+ use psb_d_mat_mod
+ use psb_d_ell_mat_mod
+ use psb_d_oacc_vect_mod
+
+ integer(psb_ipk_), parameter, private :: is_host = -1
+ integer(psb_ipk_), parameter, private :: is_sync = 0
+ integer(psb_ipk_), parameter, private :: is_dev = 1
+
+ type, extends(psb_d_ell_sparse_mat) :: psb_d_oacc_ell_sparse_mat
+ integer(psb_ipk_) :: devstate = is_host
+ contains
+ procedure, nopass :: get_fmt => d_oacc_ell_get_fmt
+ procedure, pass(a) :: sizeof => d_oacc_ell_sizeof
+ procedure, pass(a) :: is_host => d_oacc_ell_is_host
+ procedure, pass(a) :: is_sync => d_oacc_ell_is_sync
+ procedure, pass(a) :: is_dev => d_oacc_ell_is_dev
+ procedure, pass(a) :: set_host => d_oacc_ell_set_host
+ procedure, pass(a) :: set_sync => d_oacc_ell_set_sync
+ procedure, pass(a) :: set_dev => d_oacc_ell_set_dev
+ procedure, pass(a) :: sync_space => d_oacc_ell_sync_space
+ procedure, pass(a) :: sync => d_oacc_ell_sync
+ procedure, pass(a) :: free => d_oacc_ell_free
+
+ end type psb_d_oacc_ell_sparse_mat
+
+ contains
+
+ subroutine d_oacc_ell_free(a)
+ use psb_base_mod
+ implicit none
+ class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_) :: info
+
+ if (allocated(a%val)) then
+ !$acc exit data delete(a%val)
+ end if
+ if (allocated(a%ja)) then
+ !$acc exit data delete(a%ja)
+ end if
+ if (allocated(a%irn)) then
+ !$acc exit data delete(a%irn)
+ end if
+ if (allocated(a%idiag)) then
+ !$acc exit data delete(a%idiag)
+ end if
+
+ call a%psb_d_ell_sparse_mat%free()
+
+ return
+ end subroutine d_oacc_ell_free
+
+
+ function d_oacc_ell_sizeof(a) result(res)
+ implicit none
+ class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
+ integer(psb_epk_) :: res
+
+ if (a%is_dev()) call a%sync()
+
+ res = 8
+ res = res + psb_sizeof_dp * size(a%val)
+ res = res + psb_sizeof_ip * size(a%ja)
+ res = res + psb_sizeof_ip * size(a%irn)
+ res = res + psb_sizeof_ip * size(a%idiag)
+
+ end function d_oacc_ell_sizeof
+
+ subroutine d_oacc_ell_sync_space(a)
+ implicit none
+ class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
+
+ if (allocated(a%val)) then
+ call d_oacc_create_dev(a%val)
+ end if
+ if (allocated(a%ja)) then
+ call i_oacc_create_dev(a%ja)
+ end if
+ if (allocated(a%irn)) then
+ call i_oacc_create_dev_scalar(a%irn)
+ end if
+ if (allocated(a%idiag)) then
+ call i_oacc_create_dev_scalar(a%idiag)
+ end if
+
+ contains
+ subroutine d_oacc_create_dev(v)
+ implicit none
+ real(psb_dpk_), intent(in) :: v(:,:)
+ !$acc enter data copyin(v)
+ end subroutine d_oacc_create_dev
+
+ subroutine i_oacc_create_dev(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:,:)
+ !$acc enter data copyin(v)
+ end subroutine i_oacc_create_dev
+
+ subroutine i_oacc_create_dev_scalar(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc enter data copyin(v)
+ end subroutine i_oacc_create_dev_scalar
+ end subroutine d_oacc_ell_sync_space
+
+
+
+ function d_oacc_ell_is_host(a) result(res)
+ implicit none
+ class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_host)
+ end function d_oacc_ell_is_host
+
+ function d_oacc_ell_is_sync(a) result(res)
+ implicit none
+ class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_sync)
+ end function d_oacc_ell_is_sync
+
+ function d_oacc_ell_is_dev(a) result(res)
+ implicit none
+ class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_dev)
+ end function d_oacc_ell_is_dev
+
+ subroutine d_oacc_ell_set_host(a)
+ implicit none
+ class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_host
+ end subroutine d_oacc_ell_set_host
+
+ subroutine d_oacc_ell_set_sync(a)
+ implicit none
+ class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_sync
+ end subroutine d_oacc_ell_set_sync
+
+ subroutine d_oacc_ell_set_dev(a)
+ implicit none
+ class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_dev
+ end subroutine d_oacc_ell_set_dev
+
+ function d_oacc_ell_get_fmt() result(res)
+ implicit none
+ character(len=5) :: res
+ res = 'ELL_oacc'
+ end function d_oacc_ell_get_fmt
+
+ subroutine d_oacc_ell_sync(a)
+ implicit none
+ class(psb_d_oacc_ell_sparse_mat), target, intent(in) :: a
+ class(psb_d_oacc_ell_sparse_mat), pointer :: tmpa
+ integer(psb_ipk_) :: info
+
+ tmpa => a
+ if (a%is_dev()) then
+ call d_oacc_ell_to_host(a%val)
+ call i_oacc_ell_to_host(a%ja)
+ call i_oacc_ell_to_host_scalar(a%irn)
+ call i_oacc_ell_to_host_scalar(a%idiag)
+ else if (a%is_host()) then
+ call d_oacc_ell_to_dev(a%val)
+ call i_oacc_ell_to_dev(a%ja)
+ call i_oacc_ell_to_dev_scalar(a%irn)
+ call i_oacc_ell_to_dev_scalar(a%idiag)
+ end if
+ call tmpa%set_sync()
+ end subroutine d_oacc_ell_sync
+
+ subroutine d_oacc_ell_to_host(v)
+ implicit none
+ real(psb_dpk_), intent(in) :: v(:,:)
+ !$acc update self(v)
+ end subroutine d_oacc_ell_to_host
+
+ subroutine d_oacc_ell_to_host_scalar(v)
+ implicit none
+ real(psb_dpk_), intent(in) :: v(:)
+ !$acc update self(v)
+ end subroutine d_oacc_ell_to_host_scalar
+
+ subroutine i_oacc_ell_to_dev(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:,:)
+ !$acc update device(v)
+ end subroutine i_oacc_ell_to_dev
+
+ subroutine i_oacc_ell_to_dev_scalar(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc update device(v)
+ end subroutine i_oacc_ell_to_dev_scalar
+
+ subroutine i_oacc_ell_to_host(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:,:)
+ !$acc update self(v)
+ end subroutine i_oacc_ell_to_host
+
+ subroutine i_oacc_ell_to_host_scalar(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc update self(v)
+ end subroutine i_oacc_ell_to_host_scalar
+
+
+end module psb_d_oacc_ell_mat_mod
\ No newline at end of file
From 10ec5eafabe405d9ee2ebb1c0be5a9bdca13f9c8 Mon Sep 17 00:00:00 2001
From: tloloum
Date: Fri, 26 Jul 2024 09:56:32 +0200
Subject: [PATCH 20/86] ELL oacc impl
---
openacc/impl/Makefile | 15 ++-
openacc/impl/psb_d_oacc_ell_allocate_mnnz.F90 | 47 +++++++
openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 | 78 +++++++++++
openacc/impl/psb_d_oacc_ell_cp_from_fmt.F90 | 24 ++++
openacc/impl/psb_d_oacc_ell_csmm.F90 | 86 ++++++++++++
openacc/impl/psb_d_oacc_ell_csmv.F90 | 82 ++++++++++++
openacc/impl/psb_d_oacc_ell_inner_vect_sv.F90 | 85 ++++++++++++
openacc/impl/psb_d_oacc_ell_mold.F90 | 34 +++++
openacc/impl/psb_d_oacc_ell_mv_from_coo.F90 | 25 ++++
openacc/impl/psb_d_oacc_ell_mv_from_fmt.F90 | 24 ++++
openacc/impl/psb_d_oacc_ell_reallocate_nz.F90 | 28 ++++
openacc/impl/psb_d_oacc_ell_scal.F90 | 58 ++++++++
openacc/impl/psb_d_oacc_ell_scals.F90 | 39 ++++++
openacc/impl/psb_d_oacc_ell_vect_mv.F90 | 66 +++++++++
openacc/psb_d_oacc_ell_mat_mod.F90 | 125 ++++++++++++++++++
15 files changed, 815 insertions(+), 1 deletion(-)
create mode 100644 openacc/impl/psb_d_oacc_ell_allocate_mnnz.F90
create mode 100644 openacc/impl/psb_d_oacc_ell_cp_from_coo.F90
create mode 100644 openacc/impl/psb_d_oacc_ell_cp_from_fmt.F90
create mode 100644 openacc/impl/psb_d_oacc_ell_csmm.F90
create mode 100644 openacc/impl/psb_d_oacc_ell_csmv.F90
create mode 100644 openacc/impl/psb_d_oacc_ell_inner_vect_sv.F90
create mode 100644 openacc/impl/psb_d_oacc_ell_mold.F90
create mode 100644 openacc/impl/psb_d_oacc_ell_mv_from_coo.F90
create mode 100644 openacc/impl/psb_d_oacc_ell_mv_from_fmt.F90
create mode 100644 openacc/impl/psb_d_oacc_ell_reallocate_nz.F90
create mode 100644 openacc/impl/psb_d_oacc_ell_scal.F90
create mode 100644 openacc/impl/psb_d_oacc_ell_scals.F90
create mode 100644 openacc/impl/psb_d_oacc_ell_vect_mv.F90
diff --git a/openacc/impl/Makefile b/openacc/impl/Makefile
index 32f104dd..a38c703d 100755
--- a/openacc/impl/Makefile
+++ b/openacc/impl/Makefile
@@ -70,7 +70,20 @@ psb_z_oacc_csr_mv_from_coo.o \
psb_z_oacc_csr_mv_from_fmt.o \
psb_z_oacc_csr_mold.o \
psb_z_oacc_mlt_v_2.o \
-psb_z_oacc_mlt_v.o
+psb_z_oacc_mlt_v.o \
+psb_d_oacc_ell_vect_mv.o \
+psb_d_oacc_ell_inner_vect_sv.o \
+psb_d_oacc_ell_csmm.o \
+psb_d_oacc_ell_csmv.o \
+psb_d_oacc_ell_scals.o \
+psb_d_oacc_ell_scal.o \
+psb_d_oacc_ell_reallocate_nz.o \
+psb_d_oacc_ell_allocate_mnnz.o \
+psb_d_oacc_ell_cp_from_coo.o \
+psb_d_oacc_ell_cp_from_fmt.o \
+psb_d_oacc_ell_mv_from_coo.o \
+psb_d_oacc_ell_mv_from_fmt.o \
+psb_d_oacc_ell_mold.o \
objs: $(OBJS)
diff --git a/openacc/impl/psb_d_oacc_ell_allocate_mnnz.F90 b/openacc/impl/psb_d_oacc_ell_allocate_mnnz.F90
new file mode 100644
index 00000000..4923e12c
--- /dev/null
+++ b/openacc/impl/psb_d_oacc_ell_allocate_mnnz.F90
@@ -0,0 +1,47 @@
+submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_allocate_mnnz_impl
+ use psb_base_mod
+contains
+ module subroutine psb_d_oacc_ell_allocate_mnnz(m, n, a, nz)
+ implicit none
+ integer(psb_ipk_), intent(in) :: m, n
+ class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_), intent(in), optional :: nz
+ integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: err_act, nz_
+ character(len=20) :: name='allocate_mnnz'
+ logical, parameter :: debug=.false.
+
+ call psb_erractionsave(err_act)
+ info = psb_success_
+
+ if (present(nz)) then
+ nz_ = nz
+ else
+ nz_ = 10
+ end if
+
+ call a%psb_d_ell_sparse_mat%allocate(m, n, nz_)
+
+ if (.not.allocated(a%val)) then
+ allocate(a%val(m, nz_))
+ allocate(a%ja(m, nz_))
+ allocate(a%irn(m))
+ allocate(a%idiag(m))
+ end if
+
+ a%val = 0.0_psb_dpk_
+ a%ja = -1
+ a%irn = 0
+ a%idiag = 0
+
+ call a%set_dev()
+ if (info /= 0) goto 9999
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_d_oacc_ell_allocate_mnnz
+end submodule psb_d_oacc_ell_allocate_mnnz_impl
diff --git a/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 b/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90
new file mode 100644
index 00000000..560d3451
--- /dev/null
+++ b/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90
@@ -0,0 +1,78 @@
+submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_cp_from_coo_impl
+ use psb_base_mod
+contains
+ module subroutine psb_d_oacc_ell_cp_from_coo(a, b, info)
+ implicit none
+
+ class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
+ class(psb_d_coo_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ integer(psb_ipk_) :: i, j, k, row, col, nz_per_row
+ real(psb_dpk_) :: value
+ integer(psb_ipk_), allocatable :: row_counts(:)
+ integer(psb_ipk_) :: hacksize, nza
+
+ info = psb_success_
+ hacksize = 1
+
+ call a%set_nrows(b%get_nrows())
+ call a%set_ncols(b%get_ncols())
+ nz_per_row = a%nzt
+
+ if (.not.allocated(a%val)) then
+ allocate(a%val(a%get_nrows(), nz_per_row))
+ allocate(a%ja(a%get_nrows(), nz_per_row))
+ allocate(a%irn(a%get_nrows()))
+ allocate(a%idiag(a%get_nrows()))
+ end if
+ a%val = 0.0_psb_dpk_
+ a%ja = -1
+ a%irn = 0
+ a%idiag = 0
+
+ allocate(row_counts(a%get_nrows()))
+ row_counts = 0
+
+ nza = b%get_nzeros()
+
+ !$acc parallel loop present(b, a, row_counts)
+ do k = 1, nza
+ row = b%ia(k)
+ col = b%ja(k)
+ value = b%val(k)
+ if (row_counts(row) < nz_per_row) then
+ a%val(row, row_counts(row) + 1) = value
+ a%ja(row, row_counts(row) + 1) = col
+ row_counts(row) = row_counts(row) + 1
+ else
+ info = psb_err_invalid_mat_state_
+ goto 9999
+ end if
+ end do
+
+ a%irn = row_counts
+
+ !$acc parallel loop present(a)
+ do i = 1, a%get_nrows()
+ do j = 1, nz_per_row
+ if (a%ja(i, j) == i) then
+ a%idiag(i) = j
+ exit
+ end if
+ end do
+ end do
+
+ deallocate(row_counts)
+
+ call a%set_dev()
+ if (info /= 0) goto 9999
+
+ return
+
+9999 continue
+ info = psb_err_alloc_dealloc_
+ return
+
+ end subroutine psb_d_oacc_ell_cp_from_coo
+end submodule psb_d_oacc_ell_cp_from_coo_impl
diff --git a/openacc/impl/psb_d_oacc_ell_cp_from_fmt.F90 b/openacc/impl/psb_d_oacc_ell_cp_from_fmt.F90
new file mode 100644
index 00000000..b62a40d5
--- /dev/null
+++ b/openacc/impl/psb_d_oacc_ell_cp_from_fmt.F90
@@ -0,0 +1,24 @@
+submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_cp_from_fmt_impl
+ use psb_base_mod
+contains
+ module subroutine psb_d_oacc_ell_cp_from_fmt(a, b, info)
+ implicit none
+
+ class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
+ class(psb_d_base_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ info = psb_success_
+
+ select type(b)
+ type is (psb_d_coo_sparse_mat)
+ call a%cp_from_coo(b, info)
+ class default
+ call a%psb_d_ell_sparse_mat%cp_from_fmt(b, info)
+ if (info /= 0) return
+
+ !$acc update device(a%val, a%ja, a%irn, a%idiag)
+ end select
+
+ end subroutine psb_d_oacc_ell_cp_from_fmt
+end submodule psb_d_oacc_ell_cp_from_fmt_impl
diff --git a/openacc/impl/psb_d_oacc_ell_csmm.F90 b/openacc/impl/psb_d_oacc_ell_csmm.F90
new file mode 100644
index 00000000..6515a306
--- /dev/null
+++ b/openacc/impl/psb_d_oacc_ell_csmm.F90
@@ -0,0 +1,86 @@
+submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_csmm_impl
+ use psb_base_mod
+contains
+ module subroutine psb_d_oacc_ell_csmm(alpha, a, x, beta, y, info, trans)
+ implicit none
+ class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
+ real(psb_dpk_), intent(in) :: alpha, beta
+ real(psb_dpk_), intent(in) :: x(:,:)
+ real(psb_dpk_), intent(inout) :: y(:,:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ character :: trans_
+ integer(psb_ipk_) :: i, j, m, n, k, nxy, nzt
+ logical :: tra
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 'd_oacc_ell_csmm'
+ logical, parameter :: debug = .false.
+
+ info = psb_success_
+ call psb_erractionsave(err_act)
+
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+
+ if (.not.a%is_asb()) then
+ info = psb_err_invalid_mat_state_
+ call psb_errpush(info, name)
+ goto 9999
+ endif
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
+
+ if (tra) then
+ m = a%get_ncols()
+ n = a%get_nrows()
+ else
+ n = a%get_ncols()
+ m = a%get_nrows()
+ end if
+
+ if (size(x,1) < n) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (size(y,1) < m) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (tra) then
+ call a%psb_d_ell_sparse_mat%spmm(alpha, x, beta, y, info, trans)
+ else
+ nxy = min(size(x,2), size(y,2))
+ nzt = a%nzt
+
+ !$acc parallel loop collapse(2) present(a, x, y)
+ do j = 1, nxy
+ do i = 1, m
+ y(i,j) = beta * y(i,j)
+ end do
+ end do
+
+ !$acc parallel loop collapse(2) present(a, x, y)
+ do j = 1, nxy
+ do i = 1, n
+ do k = 1, nzt
+ y(i, j) = y(i, j) + alpha * a%val(i, k) * x(a%ja(i, k), j)
+ end do
+ end do
+ end do
+ endif
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_d_oacc_ell_csmm
+end submodule psb_d_oacc_ell_csmm_impl
diff --git a/openacc/impl/psb_d_oacc_ell_csmv.F90 b/openacc/impl/psb_d_oacc_ell_csmv.F90
new file mode 100644
index 00000000..8d5e2aaa
--- /dev/null
+++ b/openacc/impl/psb_d_oacc_ell_csmv.F90
@@ -0,0 +1,82 @@
+submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_csmv_impl
+ use psb_base_mod
+contains
+ module subroutine psb_d_oacc_ell_csmv(alpha, a, x, beta, y, info, trans)
+ implicit none
+ class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
+ real(psb_dpk_), intent(in) :: alpha, beta
+ real(psb_dpk_), intent(in) :: x(:)
+ real(psb_dpk_), intent(inout) :: y(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ character :: trans_
+ integer(psb_ipk_) :: i, j, m, n, nzt
+ logical :: tra
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 'd_oacc_ell_csmv'
+ logical, parameter :: debug = .false.
+
+ call psb_erractionsave(err_act)
+ info = psb_success_
+
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+
+ if (.not.a%is_asb()) then
+ info = psb_err_invalid_mat_state_
+ call psb_errpush(info, name)
+ goto 9999
+ endif
+
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
+
+ if (tra) then
+ m = a%get_ncols()
+ n = a%get_nrows()
+ else
+ n = a%get_ncols()
+ m = a%get_nrows()
+ end if
+
+ if (size(x,1) < n) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (size(y,1) < m) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (tra) then
+ call a%psb_d_ell_sparse_mat%spmm(alpha, x, beta, y, info, trans)
+ else
+ nzt = a%nzt
+
+ !$acc parallel loop present(a, x, y)
+ do i = 1, m
+ y(i) = beta * y(i)
+ end do
+
+ !$acc parallel loop present(a, x, y)
+ do i = 1, m
+ do j = 1, nzt
+ y(i) = y(i) + alpha * a%val(i, j) * x(a%ja(i, j))
+ end do
+ end do
+ endif
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_d_oacc_ell_csmv
+end submodule psb_d_oacc_ell_csmv_impl
diff --git a/openacc/impl/psb_d_oacc_ell_inner_vect_sv.F90 b/openacc/impl/psb_d_oacc_ell_inner_vect_sv.F90
new file mode 100644
index 00000000..2c5563ce
--- /dev/null
+++ b/openacc/impl/psb_d_oacc_ell_inner_vect_sv.F90
@@ -0,0 +1,85 @@
+submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_inner_vect_sv_impl
+ use psb_base_mod
+contains
+ module subroutine psb_d_oacc_ell_inner_vect_sv(alpha, a, x, beta, y, info, trans)
+ implicit none
+ class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
+ real(psb_dpk_), intent(in) :: alpha, beta
+ class(psb_d_base_vect_type), intent(inout) :: x, y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ real(psb_dpk_), allocatable :: rx(:), ry(:)
+ logical :: tra
+ character :: trans_
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 'd_oacc_ell_inner_vect_sv'
+ logical, parameter :: debug = .false.
+ integer(psb_ipk_) :: i, j, nzt
+
+ call psb_get_erraction(err_act)
+ info = psb_success_
+
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+
+ if (.not.a%is_asb()) then
+ info = psb_err_invalid_mat_state_
+ call psb_errpush(info, name)
+ goto 9999
+ endif
+
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
+
+ if (tra .or. (beta /= dzero)) then
+ call x%sync()
+ call y%sync()
+ call a%psb_d_ell_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans)
+ call y%set_host()
+ else
+ select type (xx => x)
+ type is (psb_d_vect_oacc)
+ select type(yy => y)
+ type is (psb_d_vect_oacc)
+ if (xx%is_host()) call xx%sync()
+ if (beta /= dzero) then
+ if (yy%is_host()) call yy%sync()
+ end if
+ nzt = a%nzt
+ !$acc parallel loop present(a, xx, yy)
+ do i = 1, size(a%val, 1)
+ do j = 1, nzt
+ yy%v(i) = alpha * a%val(i, j) * xx%v(a%ja(i, j)) + beta * yy%v(i)
+ end do
+ end do
+ call yy%set_dev()
+ class default
+ rx = xx%get_vect()
+ ry = y%get_vect()
+ call a%psb_d_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
+ call y%bld(ry)
+ end select
+ class default
+ rx = x%get_vect()
+ ry = y%get_vect()
+ call a%psb_d_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
+ call y%bld(ry)
+ end select
+ endif
+
+ if (info /= psb_success_) then
+ info = psb_err_from_subroutine_
+ call psb_errpush(info, name, a_err = 'ell_vect_sv')
+ goto 9999
+ endif
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+ end subroutine psb_d_oacc_ell_inner_vect_sv
+end submodule psb_d_oacc_ell_inner_vect_sv_impl
diff --git a/openacc/impl/psb_d_oacc_ell_mold.F90 b/openacc/impl/psb_d_oacc_ell_mold.F90
new file mode 100644
index 00000000..767e7f13
--- /dev/null
+++ b/openacc/impl/psb_d_oacc_ell_mold.F90
@@ -0,0 +1,34 @@
+submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_mold_impl
+ use psb_base_mod
+contains
+ module subroutine psb_d_oacc_ell_mold(a, b, info)
+ implicit none
+ class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
+ class(psb_d_base_sparse_mat), intent(inout), allocatable :: b
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 'ell_mold'
+ logical, parameter :: debug = .false.
+
+ call psb_get_erraction(err_act)
+
+ info = 0
+ if (allocated(b)) then
+ call b%free()
+ deallocate(b, stat=info)
+ end if
+ if (info == 0) allocate(psb_d_oacc_ell_sparse_mat :: b, stat=info)
+
+ if (info /= psb_success_) then
+ info = psb_err_alloc_dealloc_
+ call psb_errpush(info, name)
+ goto 9999
+ end if
+ return
+
+9999 call psb_error_handler(err_act)
+
+ return
+
+ end subroutine psb_d_oacc_ell_mold
+end submodule psb_d_oacc_ell_mold_impl
diff --git a/openacc/impl/psb_d_oacc_ell_mv_from_coo.F90 b/openacc/impl/psb_d_oacc_ell_mv_from_coo.F90
new file mode 100644
index 00000000..688d182b
--- /dev/null
+++ b/openacc/impl/psb_d_oacc_ell_mv_from_coo.F90
@@ -0,0 +1,25 @@
+submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_mv_from_coo_impl
+ use psb_base_mod
+contains
+ module subroutine psb_d_oacc_ell_mv_from_coo(a, b, info)
+ implicit none
+
+ class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
+ class(psb_d_coo_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ info = psb_success_
+
+ call a%psb_d_ell_sparse_mat%mv_from_coo(b, info)
+ if (info /= 0) goto 9999
+
+ !$acc update device(a%val, a%ja, a%irn, a%idiag)
+
+ return
+
+9999 continue
+ info = psb_err_alloc_dealloc_
+ return
+
+ end subroutine psb_d_oacc_ell_mv_from_coo
+end submodule psb_d_oacc_ell_mv_from_coo_impl
diff --git a/openacc/impl/psb_d_oacc_ell_mv_from_fmt.F90 b/openacc/impl/psb_d_oacc_ell_mv_from_fmt.F90
new file mode 100644
index 00000000..8bda6e6e
--- /dev/null
+++ b/openacc/impl/psb_d_oacc_ell_mv_from_fmt.F90
@@ -0,0 +1,24 @@
+submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_mv_from_fmt_impl
+ use psb_base_mod
+contains
+ module subroutine psb_d_oacc_ell_mv_from_fmt(a, b, info)
+ implicit none
+
+ class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
+ class(psb_d_base_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ info = psb_success_
+
+ select type(b)
+ type is (psb_d_coo_sparse_mat)
+ call a%mv_from_coo(b, info)
+ class default
+ call a%psb_d_ell_sparse_mat%mv_from_fmt(b, info)
+ if (info /= 0) return
+
+ !$acc update device(a%val, a%ja, a%irn, a%idiag)
+ end select
+
+ end subroutine psb_d_oacc_ell_mv_from_fmt
+end submodule psb_d_oacc_ell_mv_from_fmt_impl
diff --git a/openacc/impl/psb_d_oacc_ell_reallocate_nz.F90 b/openacc/impl/psb_d_oacc_ell_reallocate_nz.F90
new file mode 100644
index 00000000..11332472
--- /dev/null
+++ b/openacc/impl/psb_d_oacc_ell_reallocate_nz.F90
@@ -0,0 +1,28 @@
+submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_reallocate_nz_impl
+ use psb_base_mod
+contains
+ module subroutine psb_d_oacc_ell_reallocate_nz(nz, a)
+ implicit none
+ integer(psb_ipk_), intent(in) :: nz
+ class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name='d_oacc_ell_reallocate_nz'
+ logical, parameter :: debug=.false.
+
+ call psb_erractionsave(err_act)
+ info = psb_success_
+
+ call a%psb_d_ell_sparse_mat%reallocate(nz)
+
+ call a%set_dev()
+ if (info /= 0) goto 9999
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_d_oacc_ell_reallocate_nz
+end submodule psb_d_oacc_ell_reallocate_nz_impl
diff --git a/openacc/impl/psb_d_oacc_ell_scal.F90 b/openacc/impl/psb_d_oacc_ell_scal.F90
new file mode 100644
index 00000000..39948d5f
--- /dev/null
+++ b/openacc/impl/psb_d_oacc_ell_scal.F90
@@ -0,0 +1,58 @@
+submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_scal_impl
+ use psb_base_mod
+contains
+ module subroutine psb_d_oacc_ell_scal(d, a, info, side)
+ implicit none
+ class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
+ real(psb_dpk_), intent(in) :: d(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, intent(in), optional :: side
+
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name='scal'
+ logical, parameter :: debug=.false.
+ integer(psb_ipk_) :: i, j, m, nzt
+
+ info = psb_success_
+ call psb_erractionsave(err_act)
+
+ if (a%is_host()) call a%sync()
+
+ m = a%get_nrows()
+ nzt = a%nzt
+
+ if (present(side)) then
+ if (side == 'L') then
+ !$acc parallel loop collapse(2) present(a, d)
+ do i = 1, m
+ do j = 1, nzt
+ a%val(i, j) = a%val(i, j) * d(i)
+ end do
+ end do
+ else if (side == 'R') then
+ !$acc parallel loop collapse(2) present(a, d)
+ do i = 1, m
+ do j = 1, nzt
+ a%val(i, j) = a%val(i, j) * d(a%ja(i, j))
+ end do
+ end do
+ end if
+ else
+ !$acc parallel loop collapse(2) present(a, d)
+ do i = 1, m
+ do j = 1, nzt
+ a%val(i, j) = a%val(i, j) * d(j)
+ end do
+ end do
+ end if
+
+ call a%set_dev()
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_d_oacc_ell_scal
+end submodule psb_d_oacc_ell_scal_impl
diff --git a/openacc/impl/psb_d_oacc_ell_scals.F90 b/openacc/impl/psb_d_oacc_ell_scals.F90
new file mode 100644
index 00000000..a6292b72
--- /dev/null
+++ b/openacc/impl/psb_d_oacc_ell_scals.F90
@@ -0,0 +1,39 @@
+submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_scals_impl
+ use psb_base_mod
+contains
+ module subroutine psb_d_oacc_ell_scals(d, a, info)
+ implicit none
+ class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
+ real(psb_dpk_), intent(in) :: d
+ integer(psb_ipk_), intent(out) :: info
+
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name='scal'
+ logical, parameter :: debug=.false.
+ integer(psb_ipk_) :: i, j, nzt, m
+
+ info = psb_success_
+ call psb_erractionsave(err_act)
+
+ if (a%is_host()) call a%sync()
+
+ m = a%get_nrows()
+ nzt = a%nzt
+
+ !$acc parallel loop collapse(2) present(a)
+ do i = 1, m
+ do j = 1, nzt
+ a%val(i, j) = a%val(i, j) * d
+ end do
+ end do
+
+ call a%set_dev()
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_d_oacc_ell_scals
+end submodule psb_d_oacc_ell_scals_impl
diff --git a/openacc/impl/psb_d_oacc_ell_vect_mv.F90 b/openacc/impl/psb_d_oacc_ell_vect_mv.F90
new file mode 100644
index 00000000..ebcdb405
--- /dev/null
+++ b/openacc/impl/psb_d_oacc_ell_vect_mv.F90
@@ -0,0 +1,66 @@
+submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_vect_mv_impl
+ use psb_base_mod
+contains
+ module subroutine psb_d_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans)
+ implicit none
+
+ real(psb_dpk_), intent(in) :: alpha, beta
+ class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
+ class(psb_d_base_vect_type), intent(inout) :: x, y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ integer(psb_ipk_) :: m, n, nzt
+
+ info = psb_success_
+ m = a%get_nrows()
+ n = a%get_ncols()
+ nzt = a%nzt
+
+ if ((n /= size(x%v)) .or. (m /= size(y%v))) then
+ write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
+ info = psb_err_invalid_mat_state_
+ return
+ end if
+
+ if (a%is_host()) call a%sync()
+ if (x%is_host()) call x%sync()
+ if (y%is_host()) call y%sync()
+
+ call inner_spmv(m, n, nzt, alpha, a%val, a%ja, x%v, beta, y%v, info)
+ call y%set_dev()
+
+ contains
+
+ subroutine inner_spmv(m, n, nzt, alpha, val, ja, x, beta, y, info)
+ implicit none
+ integer(psb_ipk_) :: m, n, nzt
+ real(psb_dpk_), intent(in) :: alpha, beta
+ real(psb_dpk_) :: val(:,:), x(:), y(:)
+ integer(psb_ipk_) :: ja(:,:)
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: i, j, ii, isz
+ real(psb_dpk_) :: tmp
+ integer(psb_ipk_), parameter :: vsz = 256
+
+ info = 0
+
+ !$acc parallel loop vector_length(vsz) private(isz)
+ do ii = 1, m, vsz
+ isz = min(vsz, m - ii + 1)
+ !$acc loop independent private(tmp)
+ do i = ii, ii + isz - 1
+ tmp = 0.0_psb_dpk_
+ !$acc loop seq
+ do j = 1, nzt
+ if (ja(i,j) > 0) then
+ tmp = tmp + val(i,j) * x(ja(i,j))
+ end if
+ end do
+ y(i) = alpha * tmp + beta * y(i)
+ end do
+ end do
+ end subroutine inner_spmv
+
+ end subroutine psb_d_oacc_ell_vect_mv
+end submodule psb_d_oacc_ell_vect_mv_impl
diff --git a/openacc/psb_d_oacc_ell_mat_mod.F90 b/openacc/psb_d_oacc_ell_mat_mod.F90
index 846707af..8c5946ba 100644
--- a/openacc/psb_d_oacc_ell_mat_mod.F90
+++ b/openacc/psb_d_oacc_ell_mat_mod.F90
@@ -22,9 +22,134 @@ module psb_d_oacc_ell_mat_mod
procedure, pass(a) :: sync_space => d_oacc_ell_sync_space
procedure, pass(a) :: sync => d_oacc_ell_sync
procedure, pass(a) :: free => d_oacc_ell_free
+ procedure, pass(a) :: vect_mv => psb_d_oacc_ell_vect_mv
+ procedure, pass(a) :: in_vect_sv => psb_d_oacc_ell_inner_vect_sv
+ procedure, pass(a) :: csmm => psb_d_oacc_ell_csmm
+ procedure, pass(a) :: csmv => psb_d_oacc_ell_csmv
+ procedure, pass(a) :: scals => psb_d_oacc_ell_scals
+ procedure, pass(a) :: scalv => psb_d_oacc_ell_scal
+ procedure, pass(a) :: reallocate_nz => psb_d_oacc_ell_reallocate_nz
+ procedure, pass(a) :: allocate_mnnz => psb_d_oacc_ell_allocate_mnnz
+ procedure, pass(a) :: cp_from_coo => psb_d_oacc_ell_cp_from_coo
+ procedure, pass(a) :: cp_from_fmt => psb_d_oacc_ell_cp_from_fmt
+ procedure, pass(a) :: mv_from_coo => psb_d_oacc_ell_mv_from_coo
+ procedure, pass(a) :: mv_from_fmt => psb_d_oacc_ell_mv_from_fmt
+ procedure, pass(a) :: mold => psb_d_oacc_ell_mold
end type psb_d_oacc_ell_sparse_mat
+ interface
+ module subroutine psb_d_oacc_ell_mold(a,b,info)
+ class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
+ class(psb_d_base_sparse_mat), intent(inout), allocatable :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_d_oacc_ell_mold
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_ell_cp_from_fmt(a,b,info)
+ class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
+ class(psb_d_base_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_d_oacc_ell_cp_from_fmt
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_ell_mv_from_coo(a,b,info)
+ class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
+ class(psb_d_coo_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_d_oacc_ell_mv_from_coo
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_ell_mv_from_fmt(a,b,info)
+ class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
+ class(psb_d_base_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_d_oacc_ell_mv_from_fmt
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans)
+ class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
+ real(psb_dpk_), intent(in) :: alpha, beta
+ class(psb_d_base_vect_type), intent(inout) :: x, y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_d_oacc_ell_vect_mv
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_ell_inner_vect_sv(alpha, a, x, beta, y, info, trans)
+ class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
+ real(psb_dpk_), intent(in) :: alpha, beta
+ class(psb_d_base_vect_type), intent(inout) :: x,y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_d_oacc_ell_inner_vect_sv
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_ell_csmm(alpha, a, x, beta, y, info, trans)
+ class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
+ real(psb_dpk_), intent(in) :: alpha, beta, x(:,:)
+ real(psb_dpk_), intent(inout) :: y(:,:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_d_oacc_ell_csmm
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_ell_csmv(alpha, a, x, beta, y, info, trans)
+ class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
+ real(psb_dpk_), intent(in) :: alpha, beta, x(:)
+ real(psb_dpk_), intent(inout) :: y(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_d_oacc_ell_csmv
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_ell_scals(d, a, info)
+ class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
+ real(psb_dpk_), intent(in) :: d
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_d_oacc_ell_scals
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_ell_scal(d,a,info,side)
+ class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
+ real(psb_dpk_), intent(in) :: d(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: side
+ end subroutine psb_d_oacc_ell_scal
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_ell_reallocate_nz(nz,a)
+ class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_), intent(in) :: nz
+ end subroutine psb_d_oacc_ell_reallocate_nz
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_ell_allocate_mnnz(m,n,a,nz)
+ class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_), intent(in) :: m,n
+ integer(psb_ipk_), intent(in), optional :: nz
+ end subroutine psb_d_oacc_ell_allocate_mnnz
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_ell_cp_from_coo(a,b,info)
+ class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
+ class(psb_d_coo_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_d_oacc_ell_cp_from_coo
+ end interface
+
contains
subroutine d_oacc_ell_free(a)
From e6fa1d17a2f134b792f7968d2a95bb61a4a95e84 Mon Sep 17 00:00:00 2001
From: tloloum
Date: Tue, 30 Jul 2024 14:25:01 +0200
Subject: [PATCH 21/86] oacc hll
---
openacc/Makefile | 7 +-
openacc/impl/Makefile | 13 +
openacc/impl/psb_d_oacc_hll_allocate_mnnz.F90 | 53 +++
openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 | 85 +++++
openacc/impl/psb_d_oacc_hll_cp_from_fmt.F90 | 24 ++
openacc/impl/psb_d_oacc_hll_csmm.F90 | 86 +++++
openacc/impl/psb_d_oacc_hll_csmv.F90 | 83 +++++
openacc/impl/psb_d_oacc_hll_inner_vect_sv.F90 | 86 +++++
openacc/impl/psb_d_oacc_hll_mold.F90 | 34 ++
openacc/impl/psb_d_oacc_hll_mv_from_coo.F90 | 25 ++
openacc/impl/psb_d_oacc_hll_mv_from_fmt.F90 | 24 ++
openacc/impl/psb_d_oacc_hll_reallocate_nz.F90 | 29 ++
openacc/impl/psb_d_oacc_hll_scal.F90 | 59 +++
openacc/impl/psb_d_oacc_hll_scals.F90 | 39 ++
openacc/impl/psb_d_oacc_hll_vect_mv.F90 | 67 ++++
openacc/psb_d_oacc_hll_mat_mod.F90 | 352 ++++++++++++++++++
16 files changed, 1063 insertions(+), 3 deletions(-)
create mode 100644 openacc/impl/psb_d_oacc_hll_allocate_mnnz.F90
create mode 100644 openacc/impl/psb_d_oacc_hll_cp_from_coo.F90
create mode 100644 openacc/impl/psb_d_oacc_hll_cp_from_fmt.F90
create mode 100644 openacc/impl/psb_d_oacc_hll_csmm.F90
create mode 100644 openacc/impl/psb_d_oacc_hll_csmv.F90
create mode 100644 openacc/impl/psb_d_oacc_hll_inner_vect_sv.F90
create mode 100644 openacc/impl/psb_d_oacc_hll_mold.F90
create mode 100644 openacc/impl/psb_d_oacc_hll_mv_from_coo.F90
create mode 100644 openacc/impl/psb_d_oacc_hll_mv_from_fmt.F90
create mode 100644 openacc/impl/psb_d_oacc_hll_reallocate_nz.F90
create mode 100644 openacc/impl/psb_d_oacc_hll_scal.F90
create mode 100644 openacc/impl/psb_d_oacc_hll_scals.F90
create mode 100644 openacc/impl/psb_d_oacc_hll_vect_mv.F90
create mode 100644 openacc/psb_d_oacc_hll_mat_mod.F90
diff --git a/openacc/Makefile b/openacc/Makefile
index cba6b1f7..e7e99072 100644
--- a/openacc/Makefile
+++ b/openacc/Makefile
@@ -22,7 +22,7 @@ FOBJS= psb_i_oacc_vect_mod.o \
psb_d_oacc_vect_mod.o psb_d_oacc_csr_mat_mod.o \
psb_c_oacc_vect_mod.o psb_c_oacc_csr_mat_mod.o \
psb_z_oacc_vect_mod.o psb_z_oacc_csr_mat_mod.o \
- psb_d_oacc_ell_mat_mod.o \
+ psb_d_oacc_ell_mat_mod.o psb_d_oacc_hll_mat_mod.o\
psb_oacc_mod.o psb_oacc_env_mod.o
@@ -49,7 +49,7 @@ psb_oacc_mod.o : psb_i_oacc_vect_mod.o \
psb_d_oacc_vect_mod.o psb_d_oacc_csr_mat_mod.o \
psb_c_oacc_vect_mod.o psb_c_oacc_csr_mat_mod.o \
psb_z_oacc_vect_mod.o psb_z_oacc_csr_mat_mod.o \
- psb_d_oacc_ell_mat_mod.o \
+ psb_d_oacc_ell_mat_mod.o psb_d_oacc_hll_mat_mod.o \
psb_oacc_env_mod.o
psb_s_oacc_vect_mod.o psb_d_oacc_vect_mod.o \
@@ -61,10 +61,11 @@ psb_d_oacc_csr_mat_mod.o: psb_d_oacc_vect_mod.o
psb_c_oacc_csr_mat_mod.o: psb_c_oacc_vect_mod.o
psb_z_oacc_csr_mat_mod.o: psb_z_oacc_vect_mod.o
psb_d_oacc_ell_mat_mod.o: psb_d_oacc_vect_mod.o
+psb_d_oacc_hll_mat_mod.o: psb_d_oacc_vect_mod.o
clean: cclean iclean
- /bin/rm -f $(FOBJS) *$(.mod) *.a
+ /bin/rm -f $(FOBJS) *$(.mod) *.a *.smod
veryclean: clean
cclean:
/bin/rm -f $(COBJS)
diff --git a/openacc/impl/Makefile b/openacc/impl/Makefile
index a38c703d..c0a157e8 100755
--- a/openacc/impl/Makefile
+++ b/openacc/impl/Makefile
@@ -84,6 +84,19 @@ psb_d_oacc_ell_cp_from_fmt.o \
psb_d_oacc_ell_mv_from_coo.o \
psb_d_oacc_ell_mv_from_fmt.o \
psb_d_oacc_ell_mold.o \
+psb_d_oacc_hll_mold.o \
+psb_d_oacc_hll_mv_from_fmt.o \
+psb_d_oacc_hll_mv_from_coo.o \
+psb_d_oacc_hll_cp_from_fmt.o \
+psb_d_oacc_hll_cp_from_coo.o \
+psb_d_oacc_hll_allocate_mnnz.o \
+psb_d_oacc_hll_reallocate_nz.o \
+psb_d_oacc_hll_scal.o \
+psb_d_oacc_hll_scals.o \
+psb_d_oacc_hll_csmv.o \
+psb_d_oacc_hll_csmm.o \
+psb_d_oacc_hll_inner_vect_sv.o \
+psb_d_oacc_hll_vect_mv.o \
objs: $(OBJS)
diff --git a/openacc/impl/psb_d_oacc_hll_allocate_mnnz.F90 b/openacc/impl/psb_d_oacc_hll_allocate_mnnz.F90
new file mode 100644
index 00000000..909ee90b
--- /dev/null
+++ b/openacc/impl/psb_d_oacc_hll_allocate_mnnz.F90
@@ -0,0 +1,53 @@
+submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_allocate_mnnz_impl
+ use psb_base_mod
+contains
+ module subroutine psb_d_oacc_hll_allocate_mnnz(m, n, a, nz)
+ implicit none
+ integer(psb_ipk_), intent(in) :: m, n
+ class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_), intent(in), optional :: nz
+ integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: err_act, nz_
+ character(len=20) :: name='allocate_mnnz'
+ logical, parameter :: debug=.false.
+ integer(psb_ipk_) :: hksz, nhacks
+
+ call psb_erractionsave(err_act)
+ info = psb_success_
+
+ if (present(nz)) then
+ nz_ = nz
+ else
+ nz_ = 10
+ end if
+
+ call a%psb_d_hll_sparse_mat%allocate(m, n, nz_)
+
+ hksz = a%hksz
+ nhacks = (m + hksz - 1) / hksz
+
+ if (.not.allocated(a%val)) then
+ allocate(a%val(nz_ * m))
+ allocate(a%ja(nz_ * m))
+ allocate(a%irn(m))
+ allocate(a%idiag(m))
+ allocate(a%hkoffs(nhacks))
+ end if
+
+ a%val = 0.0_psb_dpk_
+ a%ja = -1
+ a%irn = 0
+ a%idiag = 0
+ a%hkoffs = 0
+
+ call a%set_dev()
+ if (info /= 0) goto 9999
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_d_oacc_hll_allocate_mnnz
+end submodule psb_d_oacc_hll_allocate_mnnz_impl
diff --git a/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 b/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90
new file mode 100644
index 00000000..fbe793d5
--- /dev/null
+++ b/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90
@@ -0,0 +1,85 @@
+submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_cp_from_coo_impl
+ use psb_base_mod
+contains
+ module subroutine psb_d_oacc_hll_cp_from_coo(a, b, info)
+ implicit none
+
+ class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
+ class(psb_d_coo_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ integer(psb_ipk_) :: i, j, k, row, col, nz_per_row
+ real(psb_dpk_) :: value
+ integer(psb_ipk_), allocatable :: row_counts(:)
+ integer(psb_ipk_) :: hacksize, nza
+
+ info = psb_success_
+ hacksize = 32 ! Assuming a default hack size of 32
+
+ call a%set_nrows(b%get_nrows())
+ call a%set_ncols(b%get_ncols())
+ nz_per_row = a%nzt
+
+ if (.not.allocated(a%val)) then
+ allocate(a%val(nz_per_row * a%get_nrows()))
+ allocate(a%ja(nz_per_row * a%get_nrows()))
+ allocate(a%irn(a%get_nrows()))
+ allocate(a%idiag(a%get_nrows()))
+ allocate(a%hkoffs((a%get_nrows() + hacksize - 1) / hacksize))
+ end if
+ a%val = 0.0_psb_dpk_
+ a%ja = -1
+ a%irn = 0
+ a%idiag = 0
+
+ allocate(row_counts(a%get_nrows()))
+ row_counts = 0
+
+ nza = b%get_nzeros()
+
+ !$acc parallel loop present(b, a, row_counts)
+ do k = 1, nza
+ row = b%ia(k)
+ col = b%ja(k)
+ value = b%val(k)
+ if (row_counts(row) < nz_per_row) then
+ a%val(row_counts(row) + 1 + (row - 1) * nz_per_row) = value
+ a%ja(row_counts(row) + 1 + (row - 1) * nz_per_row) = col
+ row_counts(row) = row_counts(row) + 1
+ else
+ info = psb_err_invalid_mat_state_
+ goto 9999
+ end if
+ end do
+
+ a%irn = row_counts
+
+ !$acc parallel loop present(a)
+ do i = 1, a%get_nrows()
+ do j = 1, nz_per_row
+ if (a%ja(j + (i - 1) * nz_per_row) == i) then
+ a%idiag(i) = j
+ exit
+ end if
+ end do
+ end do
+
+ ! Calculate hkoffs for HLL format
+ !$acc parallel loop present(a)
+ do i = 1, size(a%hkoffs)
+ a%hkoffs(i) = (i - 1) * hacksize
+ end do
+
+ deallocate(row_counts)
+
+ call a%set_dev()
+ if (info /= 0) goto 9999
+
+ return
+
+9999 continue
+ info = psb_err_alloc_dealloc_
+ return
+
+ end subroutine psb_d_oacc_hll_cp_from_coo
+end submodule psb_d_oacc_hll_cp_from_coo_impl
diff --git a/openacc/impl/psb_d_oacc_hll_cp_from_fmt.F90 b/openacc/impl/psb_d_oacc_hll_cp_from_fmt.F90
new file mode 100644
index 00000000..fb99737c
--- /dev/null
+++ b/openacc/impl/psb_d_oacc_hll_cp_from_fmt.F90
@@ -0,0 +1,24 @@
+submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_cp_from_fmt_impl
+ use psb_base_mod
+contains
+ module subroutine psb_d_oacc_hll_cp_from_fmt(a, b, info)
+ implicit none
+
+ class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
+ class(psb_d_base_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ info = psb_success_
+
+ select type(b)
+ type is (psb_d_coo_sparse_mat)
+ call a%cp_from_coo(b, info)
+ class default
+ call a%psb_d_hll_sparse_mat%cp_from_fmt(b, info)
+ if (info /= 0) return
+
+ !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs)
+ end select
+
+ end subroutine psb_d_oacc_hll_cp_from_fmt
+end submodule psb_d_oacc_hll_cp_from_fmt_impl
diff --git a/openacc/impl/psb_d_oacc_hll_csmm.F90 b/openacc/impl/psb_d_oacc_hll_csmm.F90
new file mode 100644
index 00000000..2dd6b53b
--- /dev/null
+++ b/openacc/impl/psb_d_oacc_hll_csmm.F90
@@ -0,0 +1,86 @@
+submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_csmm_impl
+ use psb_base_mod
+contains
+ module subroutine psb_d_oacc_hll_csmm(alpha, a, x, beta, y, info, trans)
+ implicit none
+ class(psb_d_oacc_hll_sparse_mat), intent(in) :: a
+ real(psb_dpk_), intent(in) :: alpha, beta
+ real(psb_dpk_), intent(in) :: x(:,:)
+ real(psb_dpk_), intent(inout) :: y(:,:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ character :: trans_
+ integer(psb_ipk_) :: i, j, m, n, k, nxy, nhacks
+ logical :: tra
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 'd_oacc_hll_csmm'
+ logical, parameter :: debug = .false.
+
+ info = psb_success_
+ call psb_erractionsave(err_act)
+
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+
+ if (.not.a%is_asb()) then
+ info = psb_err_invalid_mat_state_
+ call psb_errpush(info, name)
+ goto 9999
+ endif
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
+
+ if (tra) then
+ m = a%get_ncols()
+ n = a%get_nrows()
+ else
+ n = a%get_ncols()
+ m = a%get_nrows()
+ end if
+
+ if (size(x,1) < n) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (size(y,1) < m) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (tra) then
+ call a%psb_d_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans)
+ else
+ nxy = min(size(x,2), size(y,2))
+ nhacks = (a%get_nrows() + a%hksz - 1) / a%hksz
+
+ !$acc parallel loop collapse(2) present(a, x, y)
+ do j = 1, nxy
+ do i = 1, m
+ y(i,j) = beta * y(i,j)
+ end do
+ end do
+
+ !$acc parallel loop present(a, x, y)
+ do j = 1, nxy
+ do k = 1, nhacks
+ do i = a%hkoffs(k), a%hkoffs(k + 1) - 1
+ y(a%irn(i), j) = y(a%irn(i), j) + alpha * a%val(i) * x(a%ja(i), j)
+ end do
+ end do
+ end do
+ endif
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_d_oacc_hll_csmm
+end submodule psb_d_oacc_hll_csmm_impl
diff --git a/openacc/impl/psb_d_oacc_hll_csmv.F90 b/openacc/impl/psb_d_oacc_hll_csmv.F90
new file mode 100644
index 00000000..504cad19
--- /dev/null
+++ b/openacc/impl/psb_d_oacc_hll_csmv.F90
@@ -0,0 +1,83 @@
+submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_csmv_impl
+ use psb_base_mod
+contains
+ module subroutine psb_d_oacc_hll_csmv(alpha, a, x, beta, y, info, trans)
+ implicit none
+ class(psb_d_oacc_hll_sparse_mat), intent(in) :: a
+ real(psb_dpk_), intent(in) :: alpha, beta
+ real(psb_dpk_), intent(in) :: x(:)
+ real(psb_dpk_), intent(inout) :: y(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ character :: trans_
+ integer(psb_ipk_) :: i, j, m, n, hksz, nhacks
+ logical :: tra
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 'd_oacc_hll_csmv'
+ logical, parameter :: debug = .false.
+
+ call psb_erractionsave(err_act)
+ info = psb_success_
+
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+
+ if (.not.a%is_asb()) then
+ info = psb_err_invalid_mat_state_
+ call psb_errpush(info, name)
+ goto 9999
+ endif
+
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
+
+ if (tra) then
+ m = a%get_ncols()
+ n = a%get_nrows()
+ else
+ n = a%get_ncols()
+ m = a%get_nrows()
+ end if
+
+ if (size(x,1) < n) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (size(y,1) < m) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (tra) then
+ call a%psb_d_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans)
+ else
+ hksz = a%hksz
+ nhacks = (a%get_nrows() + hksz - 1) / hksz
+
+ !$acc parallel loop present(a, x, y)
+ do i = 1, m
+ y(i) = beta * y(i)
+ end do
+
+ !$acc parallel loop collapse(2) present(a, x, y)
+ do i = 1, nhacks
+ do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
+ y(a%irn(j)) = y(a%irn(j)) + alpha * a%val(j) * x(a%ja(j))
+ end do
+ end do
+ endif
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_d_oacc_hll_csmv
+end submodule psb_d_oacc_hll_csmv_impl
diff --git a/openacc/impl/psb_d_oacc_hll_inner_vect_sv.F90 b/openacc/impl/psb_d_oacc_hll_inner_vect_sv.F90
new file mode 100644
index 00000000..ae1c3c94
--- /dev/null
+++ b/openacc/impl/psb_d_oacc_hll_inner_vect_sv.F90
@@ -0,0 +1,86 @@
+submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_inner_vect_sv_impl
+ use psb_base_mod
+contains
+ module subroutine psb_d_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans)
+ implicit none
+ class(psb_d_oacc_hll_sparse_mat), intent(in) :: a
+ real(psb_dpk_), intent(in) :: alpha, beta
+ class(psb_d_base_vect_type), intent(inout) :: x, y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ real(psb_dpk_), allocatable :: rx(:), ry(:)
+ logical :: tra
+ character :: trans_
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 'd_oacc_hll_inner_vect_sv'
+ logical, parameter :: debug = .false.
+ integer(psb_ipk_) :: i, j, nhacks, hksz
+
+ call psb_get_erraction(err_act)
+ info = psb_success_
+
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+
+ if (.not.a%is_asb()) then
+ info = psb_err_invalid_mat_state_
+ call psb_errpush(info, name)
+ goto 9999
+ endif
+
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
+
+ if (tra .or. (beta /= dzero)) then
+ call x%sync()
+ call y%sync()
+ call a%psb_d_hll_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans)
+ call y%set_host()
+ else
+ select type (xx => x)
+ type is (psb_d_vect_oacc)
+ select type(yy => y)
+ type is (psb_d_vect_oacc)
+ if (xx%is_host()) call xx%sync()
+ if (beta /= dzero) then
+ if (yy%is_host()) call yy%sync()
+ end if
+ nhacks = size(a%hkoffs) - 1
+ hksz = a%hksz
+ !$acc parallel loop present(a, xx, yy)
+ do i = 1, nhacks
+ do j = a%hkoffs(i), a%hkoffs(i+1) - 1
+ yy%v(a%irn(j)) = alpha * a%val(j) * xx%v(a%ja(j)) + beta * yy%v(a%irn(j))
+ end do
+ end do
+ call yy%set_dev()
+ class default
+ rx = xx%get_vect()
+ ry = y%get_vect()
+ call a%psb_d_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
+ call y%bld(ry)
+ end select
+ class default
+ rx = x%get_vect()
+ ry = y%get_vect()
+ call a%psb_d_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
+ call y%bld(ry)
+ end select
+ endif
+
+ if (info /= psb_success_) then
+ info = psb_err_from_subroutine_
+ call psb_errpush(info, name, a_err = 'hll_vect_sv')
+ goto 9999
+ endif
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+ end subroutine psb_d_oacc_hll_inner_vect_sv
+end submodule psb_d_oacc_hll_inner_vect_sv_impl
diff --git a/openacc/impl/psb_d_oacc_hll_mold.F90 b/openacc/impl/psb_d_oacc_hll_mold.F90
new file mode 100644
index 00000000..89ead65b
--- /dev/null
+++ b/openacc/impl/psb_d_oacc_hll_mold.F90
@@ -0,0 +1,34 @@
+submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_mold_impl
+ use psb_base_mod
+contains
+ module subroutine psb_d_oacc_hll_mold(a, b, info)
+ implicit none
+ class(psb_d_oacc_hll_sparse_mat), intent(in) :: a
+ class(psb_d_base_sparse_mat), intent(inout), allocatable :: b
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 'hll_mold'
+ logical, parameter :: debug = .false.
+
+ call psb_get_erraction(err_act)
+
+ info = 0
+ if (allocated(b)) then
+ call b%free()
+ deallocate(b, stat=info)
+ end if
+ if (info == 0) allocate(psb_d_oacc_hll_sparse_mat :: b, stat=info)
+
+ if (info /= psb_success_) then
+ info = psb_err_alloc_dealloc_
+ call psb_errpush(info, name)
+ goto 9999
+ end if
+ return
+
+9999 call psb_error_handler(err_act)
+
+ return
+
+ end subroutine psb_d_oacc_hll_mold
+end submodule psb_d_oacc_hll_mold_impl
diff --git a/openacc/impl/psb_d_oacc_hll_mv_from_coo.F90 b/openacc/impl/psb_d_oacc_hll_mv_from_coo.F90
new file mode 100644
index 00000000..7bf22c13
--- /dev/null
+++ b/openacc/impl/psb_d_oacc_hll_mv_from_coo.F90
@@ -0,0 +1,25 @@
+submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_mv_from_coo_impl
+ use psb_base_mod
+contains
+ module subroutine psb_d_oacc_hll_mv_from_coo(a, b, info)
+ implicit none
+
+ class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
+ class(psb_d_coo_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ info = psb_success_
+
+ call a%psb_d_hll_sparse_mat%mv_from_coo(b, info)
+ if (info /= 0) goto 9999
+
+ !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs)
+
+ return
+
+9999 continue
+ info = psb_err_alloc_dealloc_
+ return
+
+ end subroutine psb_d_oacc_hll_mv_from_coo
+end submodule psb_d_oacc_hll_mv_from_coo_impl
diff --git a/openacc/impl/psb_d_oacc_hll_mv_from_fmt.F90 b/openacc/impl/psb_d_oacc_hll_mv_from_fmt.F90
new file mode 100644
index 00000000..e6615365
--- /dev/null
+++ b/openacc/impl/psb_d_oacc_hll_mv_from_fmt.F90
@@ -0,0 +1,24 @@
+submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_mv_from_fmt_impl
+ use psb_base_mod
+contains
+ module subroutine psb_d_oacc_hll_mv_from_fmt(a, b, info)
+ implicit none
+
+ class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
+ class(psb_d_base_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ info = psb_success_
+
+ select type(b)
+ type is (psb_d_coo_sparse_mat)
+ call a%mv_from_coo(b, info)
+ class default
+ call a%psb_d_hll_sparse_mat%mv_from_fmt(b, info)
+ if (info /= 0) return
+
+ !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs)
+ end select
+
+ end subroutine psb_d_oacc_hll_mv_from_fmt
+end submodule psb_d_oacc_hll_mv_from_fmt_impl
diff --git a/openacc/impl/psb_d_oacc_hll_reallocate_nz.F90 b/openacc/impl/psb_d_oacc_hll_reallocate_nz.F90
new file mode 100644
index 00000000..412409d1
--- /dev/null
+++ b/openacc/impl/psb_d_oacc_hll_reallocate_nz.F90
@@ -0,0 +1,29 @@
+submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_reallocate_nz_impl
+ use psb_base_mod
+contains
+ module subroutine psb_d_oacc_hll_reallocate_nz(nz, a)
+ implicit none
+ integer(psb_ipk_), intent(in) :: nz
+ class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name='d_oacc_hll_reallocate_nz'
+ logical, parameter :: debug=.false.
+ integer(psb_ipk_) :: hksz, nhacks
+
+ call psb_erractionsave(err_act)
+ info = psb_success_
+
+ call a%psb_d_hll_sparse_mat%reallocate(nz)
+
+ call a%set_dev()
+ if (info /= 0) goto 9999
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_d_oacc_hll_reallocate_nz
+end submodule psb_d_oacc_hll_reallocate_nz_impl
diff --git a/openacc/impl/psb_d_oacc_hll_scal.F90 b/openacc/impl/psb_d_oacc_hll_scal.F90
new file mode 100644
index 00000000..50210f4b
--- /dev/null
+++ b/openacc/impl/psb_d_oacc_hll_scal.F90
@@ -0,0 +1,59 @@
+submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_scal_impl
+ use psb_base_mod
+contains
+ module subroutine psb_d_oacc_hll_scal(d, a, info, side)
+ implicit none
+ class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
+ real(psb_dpk_), intent(in) :: d(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, intent(in), optional :: side
+
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 'scal'
+ integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks
+
+ info = psb_success_
+ call psb_erractionsave(err_act)
+
+ if (a%is_host()) call a%sync()
+
+ hksz = a%hksz
+ nhacks = (a%get_nrows() + hksz - 1) / hksz
+ nzt = a%nzt
+
+ if (present(side)) then
+ if (side == 'L') then
+ !$acc parallel loop collapse(2) present(a, d)
+ do i = 1, nhacks
+ do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
+ k = (j - a%hkoffs(i)) / nzt + (i - 1) * hksz + 1
+ a%val(j) = a%val(j) * d(k)
+ end do
+ end do
+ else if (side == 'R') then
+ !$acc parallel loop collapse(2) present(a, d)
+ do i = 1, nhacks
+ do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
+ a%val(j) = a%val(j) * d(a%ja(j))
+ end do
+ end do
+ end if
+ else
+ !$acc parallel loop collapse(2) present(a, d)
+ do i = 1, nhacks
+ do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
+ a%val(j) = a%val(j) * d(j - a%hkoffs(i) + 1)
+ end do
+ end do
+ end if
+
+ call a%set_dev()
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_d_oacc_hll_scal
+end submodule psb_d_oacc_hll_scal_impl
diff --git a/openacc/impl/psb_d_oacc_hll_scals.F90 b/openacc/impl/psb_d_oacc_hll_scals.F90
new file mode 100644
index 00000000..ccb6b1b8
--- /dev/null
+++ b/openacc/impl/psb_d_oacc_hll_scals.F90
@@ -0,0 +1,39 @@
+submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_scals_impl
+ use psb_base_mod
+contains
+ module subroutine psb_d_oacc_hll_scals(d, a, info)
+ implicit none
+ class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
+ real(psb_dpk_), intent(in) :: d
+ integer(psb_ipk_), intent(out) :: info
+
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 'scal'
+ integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks
+
+ info = psb_success_
+ call psb_erractionsave(err_act)
+
+ if (a%is_host()) call a%sync()
+
+ hksz = a%hksz
+ nhacks = (a%get_nrows() + hksz - 1) / hksz
+ nzt = a%nzt
+
+ !$acc parallel loop collapse(2) present(a)
+ do i = 1, nhacks
+ do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
+ a%val(j) = a%val(j) * d
+ end do
+ end do
+
+ call a%set_dev()
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_d_oacc_hll_scals
+end submodule psb_d_oacc_hll_scals_impl
diff --git a/openacc/impl/psb_d_oacc_hll_vect_mv.F90 b/openacc/impl/psb_d_oacc_hll_vect_mv.F90
new file mode 100644
index 00000000..875b646f
--- /dev/null
+++ b/openacc/impl/psb_d_oacc_hll_vect_mv.F90
@@ -0,0 +1,67 @@
+submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_vect_mv_impl
+ use psb_base_mod
+contains
+ module subroutine psb_d_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans)
+ implicit none
+
+ real(psb_dpk_), intent(in) :: alpha, beta
+ class(psb_d_oacc_hll_sparse_mat), intent(in) :: a
+ class(psb_d_base_vect_type), intent(inout) :: x, y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ integer(psb_ipk_) :: m, n, nhacks, hksz
+
+ info = psb_success_
+ m = a%get_nrows()
+ n = a%get_ncols()
+ nhacks = size(a%hkoffs) - 1
+ hksz = a%hksz
+
+ if ((n /= size(x%v)) .or. (m /= size(y%v))) then
+ write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
+ info = psb_err_invalid_mat_state_
+ return
+ end if
+
+ if (a%is_host()) call a%sync()
+ if (x%is_host()) call x%sync()
+ if (y%is_host()) call y%sync()
+
+ call inner_spmv(m, nhacks, hksz, alpha, a%val, a%ja, a%hkoffs, x%v, beta, y%v, info)
+ call y%set_dev()
+
+ contains
+
+ subroutine inner_spmv(m, nhacks, hksz, alpha, val, ja, hkoffs, x, beta, y, info)
+ implicit none
+ integer(psb_ipk_) :: m, nhacks, hksz
+ real(psb_dpk_), intent(in) :: alpha, beta
+ real(psb_dpk_) :: val(:), x(:), y(:)
+ integer(psb_ipk_) :: ja(:), hkoffs(:)
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: i, j, idx, k
+ real(psb_dpk_) :: tmp
+
+ info = 0
+
+ !$acc parallel loop present(val, ja, hkoffs, x, y)
+ do i = 1, nhacks
+ do k = 0, hksz - 1
+ idx = hkoffs(i) + k
+ if (idx <= hkoffs(i + 1) - 1) then
+ tmp = 0.0_psb_dpk_
+ !$acc loop seq
+ do j = hkoffs(i) + k, hkoffs(i + 1) - 1, hksz
+ if (ja(j) > 0) then
+ tmp = tmp + val(j) * x(ja(j))
+ end if
+ end do
+ y(k + 1 + (i - 1) * hksz) = alpha * tmp + beta * y(k + 1 + (i - 1) * hksz)
+ end if
+ end do
+ end do
+ end subroutine inner_spmv
+
+ end subroutine psb_d_oacc_hll_vect_mv
+end submodule psb_d_oacc_hll_vect_mv_impl
diff --git a/openacc/psb_d_oacc_hll_mat_mod.F90 b/openacc/psb_d_oacc_hll_mat_mod.F90
new file mode 100644
index 00000000..530af94a
--- /dev/null
+++ b/openacc/psb_d_oacc_hll_mat_mod.F90
@@ -0,0 +1,352 @@
+module psb_d_oacc_hll_mat_mod
+ use iso_c_binding
+ use psb_d_mat_mod
+ use psb_d_hll_mat_mod
+ use psb_d_oacc_vect_mod
+
+ integer(psb_ipk_), parameter, private :: is_host = -1
+ integer(psb_ipk_), parameter, private :: is_sync = 0
+ integer(psb_ipk_), parameter, private :: is_dev = 1
+
+ type, extends(psb_d_hll_sparse_mat) :: psb_d_oacc_hll_sparse_mat
+ integer(psb_ipk_) :: devstate = is_host
+ contains
+ procedure, nopass :: get_fmt => d_oacc_hll_get_fmt
+ procedure, pass(a) :: sizeof => d_oacc_hll_sizeof
+ procedure, pass(a) :: is_host => d_oacc_hll_is_host
+ procedure, pass(a) :: is_sync => d_oacc_hll_is_sync
+ procedure, pass(a) :: is_dev => d_oacc_hll_is_dev
+ procedure, pass(a) :: set_host => d_oacc_hll_set_host
+ procedure, pass(a) :: set_sync => d_oacc_hll_set_sync
+ procedure, pass(a) :: set_dev => d_oacc_hll_set_dev
+ procedure, pass(a) :: sync_space => d_oacc_hll_sync_space
+ procedure, pass(a) :: sync => d_oacc_hll_sync
+ procedure, pass(a) :: free => d_oacc_hll_free
+ procedure, pass(a) :: vect_mv => psb_d_oacc_hll_vect_mv
+ procedure, pass(a) :: in_vect_sv => psb_d_oacc_hll_inner_vect_sv
+ procedure, pass(a) :: csmm => psb_d_oacc_hll_csmm
+ procedure, pass(a) :: csmv => psb_d_oacc_hll_csmv
+ procedure, pass(a) :: scals => psb_d_oacc_hll_scals
+ procedure, pass(a) :: scalv => psb_d_oacc_hll_scal
+ procedure, pass(a) :: reallocate_nz => psb_d_oacc_hll_reallocate_nz
+ procedure, pass(a) :: allocate_mnnz => psb_d_oacc_hll_allocate_mnnz
+ procedure, pass(a) :: cp_from_coo => psb_d_oacc_hll_cp_from_coo
+ procedure, pass(a) :: cp_from_fmt => psb_d_oacc_hll_cp_from_fmt
+ procedure, pass(a) :: mv_from_coo => psb_d_oacc_hll_mv_from_coo
+ procedure, pass(a) :: mv_from_fmt => psb_d_oacc_hll_mv_from_fmt
+ procedure, pass(a) :: mold => psb_d_oacc_hll_mold
+
+ end type psb_d_oacc_hll_sparse_mat
+
+ interface
+ module subroutine psb_d_oacc_hll_mold(a,b,info)
+ class(psb_d_oacc_hll_sparse_mat), intent(in) :: a
+ class(psb_d_base_sparse_mat), intent(inout), allocatable :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_d_oacc_hll_mold
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_hll_cp_from_fmt(a,b,info)
+ class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
+ class(psb_d_base_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_d_oacc_hll_cp_from_fmt
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_hll_mv_from_coo(a,b,info)
+ class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
+ class(psb_d_coo_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_d_oacc_hll_mv_from_coo
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_hll_mv_from_fmt(a,b,info)
+ class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
+ class(psb_d_base_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_d_oacc_hll_mv_from_fmt
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans)
+ class(psb_d_oacc_hll_sparse_mat), intent(in) :: a
+ real(psb_dpk_), intent(in) :: alpha, beta
+ class(psb_d_base_vect_type), intent(inout) :: x, y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_d_oacc_hll_vect_mv
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans)
+ class(psb_d_oacc_hll_sparse_mat), intent(in) :: a
+ real(psb_dpk_), intent(in) :: alpha, beta
+ class(psb_d_base_vect_type), intent(inout) :: x,y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_d_oacc_hll_inner_vect_sv
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_hll_csmm(alpha, a, x, beta, y, info, trans)
+ class(psb_d_oacc_hll_sparse_mat), intent(in) :: a
+ real(psb_dpk_), intent(in) :: alpha, beta, x(:,:)
+ real(psb_dpk_), intent(inout) :: y(:,:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_d_oacc_hll_csmm
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_hll_csmv(alpha, a, x, beta, y, info, trans)
+ class(psb_d_oacc_hll_sparse_mat), intent(in) :: a
+ real(psb_dpk_), intent(in) :: alpha, beta, x(:)
+ real(psb_dpk_), intent(inout) :: y(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_d_oacc_hll_csmv
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_hll_scals(d, a, info)
+ class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
+ real(psb_dpk_), intent(in) :: d
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_d_oacc_hll_scals
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_hll_scal(d,a,info,side)
+ class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
+ real(psb_dpk_), intent(in) :: d(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: side
+ end subroutine psb_d_oacc_hll_scal
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_hll_reallocate_nz(nz,a)
+ class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_), intent(in) :: nz
+ end subroutine psb_d_oacc_hll_reallocate_nz
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_hll_allocate_mnnz(m,n,a,nz)
+ class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_), intent(in) :: m,n
+ integer(psb_ipk_), intent(in), optional :: nz
+ end subroutine psb_d_oacc_hll_allocate_mnnz
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_hll_cp_from_coo(a,b,info)
+ class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
+ class(psb_d_coo_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_d_oacc_hll_cp_from_coo
+ end interface
+
+ contains
+
+ subroutine d_oacc_hll_free(a)
+ use psb_base_mod
+ implicit none
+ class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_) :: info
+
+ if (allocated(a%val)) then
+ !$acc exit data delete(a%val)
+ end if
+ if (allocated(a%ja)) then
+ !$acc exit data delete(a%ja)
+ end if
+ if (allocated(a%irn)) then
+ !$acc exit data delete(a%irn)
+ end if
+ if (allocated(a%idiag)) then
+ !$acc exit data delete(a%idiag)
+ end if
+ if (allocated(a%hkoffs)) then
+ !$acc exit data delete(a%hkoffs)
+ end if
+
+ call a%psb_d_hll_sparse_mat%free()
+
+ return
+ end subroutine d_oacc_hll_free
+
+ function d_oacc_hll_sizeof(a) result(res)
+ implicit none
+ class(psb_d_oacc_hll_sparse_mat), intent(in) :: a
+ integer(psb_epk_) :: res
+
+ if (a%is_dev()) call a%sync()
+
+ res = 8
+ res = res + psb_sizeof_dp * size(a%val)
+ res = res + psb_sizeof_ip * size(a%ja)
+ res = res + psb_sizeof_ip * size(a%irn)
+ res = res + psb_sizeof_ip * size(a%idiag)
+ res = res + psb_sizeof_ip * size(a%hkoffs)
+ end function d_oacc_hll_sizeof
+
+
+
+ function d_oacc_hll_is_host(a) result(res)
+ implicit none
+ class(psb_d_oacc_hll_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_host)
+ end function d_oacc_hll_is_host
+
+ function d_oacc_hll_is_sync(a) result(res)
+ implicit none
+ class(psb_d_oacc_hll_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_sync)
+ end function d_oacc_hll_is_sync
+
+ function d_oacc_hll_is_dev(a) result(res)
+ implicit none
+ class(psb_d_oacc_hll_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_dev)
+ end function d_oacc_hll_is_dev
+
+ subroutine d_oacc_hll_set_host(a)
+ implicit none
+ class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_host
+ end subroutine d_oacc_hll_set_host
+
+ subroutine d_oacc_hll_set_sync(a)
+ implicit none
+ class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_sync
+ end subroutine d_oacc_hll_set_sync
+
+ subroutine d_oacc_hll_set_dev(a)
+ implicit none
+ class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_dev
+ end subroutine d_oacc_hll_set_dev
+
+ function d_oacc_hll_get_fmt() result(res)
+ implicit none
+ character(len=5) :: res
+ res = 'HLL_oacc'
+ end function d_oacc_hll_get_fmt
+
+ subroutine d_oacc_hll_sync_space(a)
+ implicit none
+ class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
+
+ if (allocated(a%val)) then
+ call d_oacc_create_dev(a%val)
+ end if
+ if (allocated(a%ja)) then
+ call i_oacc_create_dev(a%ja)
+ end if
+ if (allocated(a%irn)) then
+ call i_oacc_create_dev_scalar(a%irn)
+ end if
+ if (allocated(a%idiag)) then
+ call i_oacc_create_dev_scalar(a%idiag)
+ end if
+ if (allocated(a%hkoffs)) then
+ call i_oacc_create_dev_scalar(a%hkoffs)
+ end if
+
+ contains
+ subroutine d_oacc_create_dev(v)
+ implicit none
+ real(psb_dpk_), intent(in) :: v(:)
+ !$acc enter data copyin(v)
+ end subroutine d_oacc_create_dev
+
+ subroutine i_oacc_create_dev(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc enter data copyin(v)
+ end subroutine i_oacc_create_dev
+
+ subroutine i_oacc_create_dev_scalar(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc enter data copyin(v)
+ end subroutine i_oacc_create_dev_scalar
+
+ end subroutine d_oacc_hll_sync_space
+
+
+ subroutine d_oacc_hll_sync(a)
+ implicit none
+ class(psb_d_oacc_hll_sparse_mat), target, intent(in) :: a
+ class(psb_d_oacc_hll_sparse_mat), pointer :: tmpa
+ integer(psb_ipk_) :: info
+
+ tmpa => a
+ if (a%is_dev()) then
+ call d_oacc_hll_to_host(a%val)
+ call i_oacc_hll_to_host(a%ja)
+ call i_oacc_hll_to_host_scalar(a%irn)
+ call i_oacc_hll_to_host_scalar(a%idiag)
+ call i_oacc_hll_to_host_scalar(a%hkoffs)
+ else if (a%is_host()) then
+ call d_oacc_hll_to_dev(a%val)
+ call i_oacc_hll_to_dev(a%ja)
+ call i_oacc_hll_to_dev_scalar(a%irn)
+ call i_oacc_hll_to_dev_scalar(a%idiag)
+ call i_oacc_hll_to_dev_scalar(a%hkoffs)
+ end if
+ call tmpa%set_sync()
+ end subroutine d_oacc_hll_sync
+
+ subroutine d_oacc_hll_to_host(v)
+ implicit none
+ real(psb_dpk_), intent(in) :: v(:)
+ !$acc update self(v)
+ end subroutine d_oacc_hll_to_host
+
+ subroutine d_oacc_hll_to_dev(v)
+ implicit none
+ real(psb_dpk_), intent(in) :: v(:)
+ !$acc update device(v)
+ end subroutine d_oacc_hll_to_dev
+
+ subroutine i_oacc_hll_to_host(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc update self(v)
+ end subroutine i_oacc_hll_to_host
+
+ subroutine i_oacc_hll_to_dev(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc update device(v)
+ end subroutine i_oacc_hll_to_dev
+
+ subroutine i_oacc_hll_to_host_scalar(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc update self(v)
+ end subroutine i_oacc_hll_to_host_scalar
+
+ subroutine i_oacc_hll_to_dev_scalar(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc update device(v)
+ end subroutine i_oacc_hll_to_dev_scalar
+
+
+end module psb_d_oacc_hll_mat_mod
\ No newline at end of file
From 55d1067ec2bed450305073a174b4364ce590129e Mon Sep 17 00:00:00 2001
From: tloloum
Date: Tue, 6 Aug 2024 11:42:52 +0200
Subject: [PATCH 22/86] collapse loop
---
openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 | 2 +-
openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 | 2 +-
openacc/impl/psb_d_oacc_hll_csmv.F90 | 5 ++++-
openacc/impl/psb_d_oacc_hll_scal.F90 | 9 ++++++---
openacc/impl/psb_d_oacc_hll_scals.F90 | 3 ++-
5 files changed, 14 insertions(+), 7 deletions(-)
diff --git a/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 b/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90
index 560d3451..4e8402e7 100644
--- a/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90
+++ b/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90
@@ -47,7 +47,7 @@ contains
row_counts(row) = row_counts(row) + 1
else
info = psb_err_invalid_mat_state_
- goto 9999
+ !goto 9999
end if
end do
diff --git a/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 b/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90
index fbe793d5..4a258c74 100644
--- a/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90
+++ b/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90
@@ -48,7 +48,7 @@ contains
row_counts(row) = row_counts(row) + 1
else
info = psb_err_invalid_mat_state_
- goto 9999
+ !goto 9999
end if
end do
diff --git a/openacc/impl/psb_d_oacc_hll_csmv.F90 b/openacc/impl/psb_d_oacc_hll_csmv.F90
index 504cad19..bccd3d6d 100644
--- a/openacc/impl/psb_d_oacc_hll_csmv.F90
+++ b/openacc/impl/psb_d_oacc_hll_csmv.F90
@@ -64,8 +64,11 @@ contains
do i = 1, m
y(i) = beta * y(i)
end do
-
+#if (__GNUC__ <= 13)
+ !$acc parallel loop present(a, x, y)
+#else
!$acc parallel loop collapse(2) present(a, x, y)
+#endif
do i = 1, nhacks
do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
y(a%irn(j)) = y(a%irn(j)) + alpha * a%val(j) * x(a%ja(j))
diff --git a/openacc/impl/psb_d_oacc_hll_scal.F90 b/openacc/impl/psb_d_oacc_hll_scal.F90
index 50210f4b..c0c284ef 100644
--- a/openacc/impl/psb_d_oacc_hll_scal.F90
+++ b/openacc/impl/psb_d_oacc_hll_scal.F90
@@ -23,7 +23,8 @@ contains
if (present(side)) then
if (side == 'L') then
- !$acc parallel loop collapse(2) present(a, d)
+ ! $ a parallel loop collapse(2) present(a, d)
+ !$acc parallel loop present(a, d)
do i = 1, nhacks
do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
k = (j - a%hkoffs(i)) / nzt + (i - 1) * hksz + 1
@@ -31,7 +32,8 @@ contains
end do
end do
else if (side == 'R') then
- !$acc parallel loop collapse(2) present(a, d)
+ ! $ a parallel loop collapse(2) present(a, d)
+ !$acc parallel loop present(a, d)
do i = 1, nhacks
do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
a%val(j) = a%val(j) * d(a%ja(j))
@@ -39,7 +41,8 @@ contains
end do
end if
else
- !$acc parallel loop collapse(2) present(a, d)
+ ! $ a parallel loop collapse(2) present(a, d)
+ !$acc parallel loop present(a, d)
do i = 1, nhacks
do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
a%val(j) = a%val(j) * d(j - a%hkoffs(i) + 1)
diff --git a/openacc/impl/psb_d_oacc_hll_scals.F90 b/openacc/impl/psb_d_oacc_hll_scals.F90
index ccb6b1b8..1e3457b5 100644
--- a/openacc/impl/psb_d_oacc_hll_scals.F90
+++ b/openacc/impl/psb_d_oacc_hll_scals.F90
@@ -20,7 +20,8 @@ contains
nhacks = (a%get_nrows() + hksz - 1) / hksz
nzt = a%nzt
- !$acc parallel loop collapse(2) present(a)
+ ! $ a parallel loop collapse(2) present(a)
+ !$acc parallel loop present(a)
do i = 1, nhacks
do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
a%val(j) = a%val(j) * d
From a28d3b048b38248729dd9d59388553138cb71fb3 Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Wed, 7 Aug 2024 10:30:11 +0200
Subject: [PATCH 23/86] Fix configure for OpenACC (added warning message)
---
config/pac.m4 | 50 --------------------------------------------------
configure | 51 ++++++++++++++++++++++++++++++++++++++++++++++++++-
configure.ac | 12 +++++++++++-
3 files changed, 61 insertions(+), 52 deletions(-)
diff --git a/config/pac.m4 b/config/pac.m4
index c23de4fb..83356540 100644
--- a/config/pac.m4
+++ b/config/pac.m4
@@ -2234,56 +2234,6 @@ AC_HELP_STRING([--with-cudacc], [A comma-separated list of CCs to compile to, fo
[pac_cv_cudacc=''])
])
-AC_DEFUN(PAC_ARG_WITH_LIBRSB,
- [SAVE_LIBS="$LIBS"
- SAVE_CPPFLAGS="$CPPFLAGS"
-
- AC_ARG_WITH(librsb,
- AC_HELP_STRING([--with-librsb], [The directory for LIBRSB, for example,
- --with-librsb=/opt/packages/librsb]),
- [pac_cv_librsb_dir=$withval],
- [pac_cv_librsb_dir=''])
-
- if test "x$pac_cv_librsb_dir" != "x"; then
- LIBS="-L$pac_cv_librsb_dir $LIBS"
- RSB_INCLUDES="-I$pac_cv_librsb_dir"
- # CPPFLAGS="$GPU_INCLUDES $CUDA_INCLUDES $CPPFLAGS"
- RSB_LIBDIR="-L$pac_cv_librsb_dir"
- fi
- #AC_MSG_CHECKING([librsb dir $pac_cv_librsb_dir])
- AC_CHECK_HEADER([$pac_cv_librsb_dir/rsb.h],
- [pac_rsb_header_ok=yes],
- [pac_rsb_header_ok=no; RSB_INCLUDES=""])
-
- if test "x$pac_rsb_header_ok" == "xyes" ; then
- RSB_LIBS="-lrsb $RSB_LIBDIR"
- # LIBS="$GPU_LIBS $CUDA_LIBS -lm $LIBS";
- # AC_MSG_CHECKING([for spgpuCreate in $GPU_LIBS])
- # AC_TRY_LINK_FUNC(spgpuCreate,
- # [pac_cv_have_spgpu=yes;pac_gpu_lib_ok=yes; ],
- # [pac_cv_have_spgpu=no;pac_gpu_lib_ok=no; GPU_LIBS=""])
- # AC_MSG_RESULT($pac_gpu_lib_ok)
- # if test "x$pac_cv_have_spgpu" == "xyes" ; then
- # AC_MSG_NOTICE([Have found SPGPU])
- RSBLIBNAME="librsb.a";
- LIBRSB_DIR="$pac_cv_librsb_dir";
- # SPGPU_DEFINES="-DHAVE_SPGPU";
- LIBRSB_INCDIR="$LIBRSB_DIR";
- LIBRSB_INCLUDES="-I$LIBRSB_INCDIR";
- LIBRSB_LIBS="-lrsb -L$LIBRSB_DIR";
- # CUDA_DIR="$pac_cv_cuda_dir";
- LIBRSB_DEFINES="-DHAVE_RSB";
- LRSB=-lpsb_rsb
- # CUDA_INCLUDES="-I$pac_cv_cuda_dir/include"
- # CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib"
- FDEFINES="$LIBRSB_DEFINES $psblas_cv_define_prepend $FDEFINES";
- CDEFINES="$LIBRSB_DEFINES $CDEFINES";#CDEFINES="-DHAVE_SPGPU -DHAVE_CUDA $CDEFINES";
- fi
-# fi
-LIBS="$SAVE_LIBS"
-CPPFLAGS="$SAVE_CPPFLAGS"
-])
-dnl
dnl @synopsis PAC_CHECK_CUDA_VERSION
dnl
diff --git a/configure b/configure
index 1c3c80c6..04408e2a 100755
--- a/configure
+++ b/configure
@@ -653,6 +653,12 @@ ac_subst_vars='am__EXEEXT_FALSE
am__EXEEXT_TRUE
LTLIBOBJS
LIBOBJS
+LRSB
+LIBRSB_DEFINES
+LIBRSB_DIR
+LIBRSB_INCDIR
+LIBRSB_INCLUDES
+LIBRSB_LIBS
LCUDA
CUDALD
CUDAD
@@ -5841,7 +5847,7 @@ fi
# we just gave the user the chance to append values to these variables
-###############################################################################
+
if test -n "$ac_tool_prefix"; then
@@ -10985,6 +10991,27 @@ fi
#FLINK="$FLINK $FCOPENACC";
fi
+###############################################################################
+LIBRSB_DIR="$pac_cv_librsb_dir";
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for LIBRSB install dir" >&5
+printf %s "checking for LIBRSB install dir... " >&6; }
+case $LIBRSB_DIR in
+ /*) ;;
+ *) esac
+pac_cv_status_file="$LIBRSB_DIR/librsb.a"
+if test ! -f "$pac_cv_status_file" ; then
+ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+ #AC_MSG_ERROR([Could not find an installation in $LIBRSB_DIR.])
+else
+ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $LIBRSB_DIR" >&5
+printf "%s\n" "$LIBRSB_DIR" >&6; }
+ RSBTARGETLIB=rsbd;
+ RSBTARGETOBJ=rsbobj;
+fi
+
+
+
###############################################################################
@@ -11082,6 +11109,12 @@ FDEFINES=$(PSBFDEFINES) $(CUDA_DEFINES)
+
+
+
+
+
+
@@ -12422,6 +12455,9 @@ fi
CUDA : ${HAVE_CUDA}
CUDA_CC : ${pac_cv_cudacc}
+ OPENACC : ${pac_cv_openacc}
+ FCOPENACC : ${FCOPENACC}
+
BLAS : ${BLAS_LIBS}
METIS usable : ${psblas_cv_have_metis}
@@ -12455,6 +12491,9 @@ printf "%s\n" "$as_me:
CUDA : ${HAVE_CUDA}
CUDA_CC : ${pac_cv_cudacc}
+ OPENACC : ${pac_cv_openacc}
+ FCOPENACC : ${FCOPENACC}
+
BLAS : ${BLAS_LIBS}
METIS usable : ${psblas_cv_have_metis}
@@ -12471,6 +12510,16 @@ printf "%s\n" "$as_me:
If you are satisfied, run 'make' to build ${PACKAGE_NAME} and its documentation; otherwise
type ./configure --help=short for a complete list of configure options specific to ${PACKAGE_NAME}.
" >&6;}
+if test x"${pac_cv_openacc}" == x"yes" ; then
+ if test x"${FCOPENACC}" == x ; then
+ { printf "%s\n" "$as_me:${as_lineno-$LINENO}:
+ WARNING: OpenACC enabled, but no choice for FCOPENACC compile flag.
+ You may want to rerun configure with --with-fcopenacc= " >&5
+printf "%s\n" "$as_me:
+ WARNING: OpenACC enabled, but no choice for FCOPENACC compile flag.
+ You may want to rerun configure with --with-fcopenacc= " >&6;}
+ fi
+fi
###############################################################################
diff --git a/configure.ac b/configure.ac
index c5a19006..47b41406 100755
--- a/configure.ac
+++ b/configure.ac
@@ -860,7 +860,7 @@ if test x"$pac_cv_openacc" == x"yes" ; then
fi
###############################################################################
-PAC_ARG_WITH_LIBRSB
+dnl PAC_ARG_WITH_LIBRSB()
LIBRSB_DIR="$pac_cv_librsb_dir";
AC_MSG_CHECKING([for LIBRSB install dir])
case $LIBRSB_DIR in
@@ -1023,6 +1023,9 @@ AC_MSG_NOTICE([
CUDA : ${HAVE_CUDA}
CUDA_CC : ${pac_cv_cudacc}
+ OPENACC : ${pac_cv_openacc}
+ FCOPENACC : ${FCOPENACC}
+
BLAS : ${BLAS_LIBS}
METIS usable : ${psblas_cv_have_metis}
@@ -1042,6 +1045,13 @@ dnl Note : we should use LDLIBS sooner or later!
dnl To install the program and its documentation, run 'make install' if you are root,
dnl or run 'su -c "make install"' if you are not root.
])
+if test x"${pac_cv_openacc}" == x"yes" ; then
+ if test x"${FCOPENACC}" == x ; then
+ AC_MSG_NOTICE([
+ WARNING: OpenACC enabled, but no choice for FCOPENACC compile flag.
+ You may want to rerun configure with --with-fcopenacc= ])
+ fi
+fi
###############################################################################
From 464baceb139735f2e28289f5ecabfa6400762ebd Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Wed, 7 Aug 2024 10:54:19 +0200
Subject: [PATCH 24/86] HLL loop nest cannot run with collapse
---
openacc/impl/psb_d_oacc_hll_csmv.F90 | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/openacc/impl/psb_d_oacc_hll_csmv.F90 b/openacc/impl/psb_d_oacc_hll_csmv.F90
index bccd3d6d..d38fca61 100644
--- a/openacc/impl/psb_d_oacc_hll_csmv.F90
+++ b/openacc/impl/psb_d_oacc_hll_csmv.F90
@@ -64,11 +64,9 @@ contains
do i = 1, m
y(i) = beta * y(i)
end do
-#if (__GNUC__ <= 13)
+ ! This loop nest cannot be run with collapse, since
+ ! the inner loop extent varies.
!$acc parallel loop present(a, x, y)
-#else
- !$acc parallel loop collapse(2) present(a, x, y)
-#endif
do i = 1, nhacks
do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
y(a%irn(j)) = y(a%irn(j)) + alpha * a%val(j) * x(a%ja(j))
From 03aaa090db31ada5478a0a8bd4af07f339bab657 Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Wed, 7 Aug 2024 17:04:04 +0200
Subject: [PATCH 25/86] New AX_OPENACC macro and supporting flags
---
config/ax_c_openacc.m4 | 104 +++++++++++
config/ax_cxx_openacc.m4 | 104 +++++++++++
config/ax_fc_openacc.m4 | 108 +++++++++++
configure | 286 +++++++++++++++++++++++++++++-
configure.ac | 32 +++-
test/openacc/Makefile | 11 +-
test/openacc/psb_d_oacc_pde3d.F90 | 3 +-
7 files changed, 639 insertions(+), 9 deletions(-)
create mode 100644 config/ax_c_openacc.m4
create mode 100644 config/ax_cxx_openacc.m4
create mode 100644 config/ax_fc_openacc.m4
diff --git a/config/ax_c_openacc.m4 b/config/ax_c_openacc.m4
new file mode 100644
index 00000000..52f645a9
--- /dev/null
+++ b/config/ax_c_openacc.m4
@@ -0,0 +1,104 @@
+# AC_OPENACC
+# ---------
+# Check which options need to be passed to the C compiler to support Openacc.
+# Set the OPENACC_CFLAGS / OPENACC_CXXFLAGS / OPENACC_FFLAGS variable to these
+# options.
+# The options are necessary at compile time (so the #pragmas are understood)
+# and at link time (so the appropriate library is linked with).
+# This macro takes care to not produce redundant options if $CC $CFLAGS already
+# supports Openacc.
+#
+# For each candidate option, we do a compile test first, then a link test;
+# if the compile test succeeds but the link test fails, that means we have
+# found the correct option but it doesn't work because the libraries are
+# broken. (This can happen, for instance, with SunPRO C and a bad combination
+# of operating system patches.)
+#
+# Several of the options in our candidate list can be misinterpreted by
+# compilers that don't use them to activate Openacc support; for example,
+# many compilers understand "-openacc" to mean "write output to a file
+# named 'penmp'" rather than "enable Openacc". We can't completely avoid
+# the possibility of clobbering files named 'penmp' or 'mp' in configure's
+# working directory; therefore, this macro will bomb out if any such file
+# already exists when it's invoked.
+AC_DEFUN([AX_C_OPENACC],
+[AC_REQUIRE([_AX_OPENACC_SAFE_WD])]dnl
+[AC_ARG_ENABLE([openacc],
+ [AS_HELP_STRING([--disable-openacc], [do not use Openacc])])]dnl
+[
+ OPENACC_[]_AC_LANG_PREFIX[]FLAGS=
+ if test "$enable_openacc" != no; then
+ AC_LANG_PUSH([C])
+ AC_CACHE_CHECK([for $[]_AC_CC[] option to support Openacc],
+ [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc],
+ [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='not found'
+ dnl Try these flags:
+ dnl (on by default) ''
+ dnl GCC >= 4.2 -fopenacc
+ dnl SunPRO C -xopenacc
+ dnl Intel C -openacc
+ dnl SGI C, PGI C -mp
+ dnl Tru64 Compaq C -omp
+ dnl IBM XL C (AIX, Linux) -qsmp=omp
+ dnl Cray CCE -homp
+ dnl NEC SX -Popenacc
+ dnl Lahey Fortran (Linux) --openacc
+ for ac_option in '' -fopenacc -openacc -acc; do
+
+ ac_save_[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS
+ _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $ac_option"
+ AC_COMPILE_IFELSE([
+#ifndef _OPENACC
+#error "OpenACC not supported"
+#endif
+#include
+ int main (void) { acc_init (0); return 0;}
+],
+ [AC_LINK_IFELSE([
+#ifndef _OPENACC
+#error "OpenACC not supported"
+#endif
+#include
+ int main (void) { acc_init (0); return 0;}
+],
+ [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc=$ac_option],
+ [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'])])
+ _AC_LANG_PREFIX[]FLAGS=$ac_save_[]_AC_LANG_PREFIX[]FLAGS
+
+ if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'not found'; then
+ break
+ fi
+ done
+ if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = 'not found'; then
+ ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'
+ elif test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = ''; then
+ ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='none needed'
+ fi
+ dnl _AX_OPENACC_SAFE_WD checked that these files did not exist before we
+ dnl started probing for Openacc support, so if they exist now, they were
+ dnl created by the probe loop and it's safe to delete them.
+ rm -f penmp mp])
+ if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'unsupported' && \
+ test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'none needed'; then
+ OPENACC_[]_AC_LANG_PREFIX[]FLAGS="$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc"
+ fi
+ AC_LANG_POP([C])
+ fi
+])
+
+# _AC_OPENACC_SAFE_WD
+# ------------------
+# AC_REQUIREd by AC_OPENACC. Checks both at autoconf time and at
+# configure time for files that AC_OPENACC clobbers.
+AC_DEFUN([_AX_OPENACC_SAFE_WD],
+[m4_syscmd([test ! -e penmp && test ! -e mp])]dnl
+[m4_if(sysval, [0], [], [m4_fatal(m4_normalize(
+ [AX_OPENACC clobbers files named 'mp' and 'penmp'.
+ To use AX_OPENACC you must not have either of these files
+ at the top level of your source tree.]))])]dnl
+[if test -e penmp || test -e mp; then
+ AC_MSG_ERROR(m4_normalize(
+ [AX@&t@_OPENACC clobbers files named 'mp' and 'penmp'.
+ Aborting configure because one of these files already exists.]))
+fi])
+
diff --git a/config/ax_cxx_openacc.m4 b/config/ax_cxx_openacc.m4
new file mode 100644
index 00000000..5a2ad278
--- /dev/null
+++ b/config/ax_cxx_openacc.m4
@@ -0,0 +1,104 @@
+# AC_OPENACC
+# ---------
+# Check which options need to be passed to the C compiler to support Openacc.
+# Set the OPENACC_CFLAGS / OPENACC_CXXFLAGS / OPENACC_FFLAGS variable to these
+# options.
+# The options are necessary at compile time (so the #pragmas are understood)
+# and at link time (so the appropriate library is linked with).
+# This macro takes care to not produce redundant options if $CC $CFLAGS already
+# supports Openacc.
+#
+# For each candidate option, we do a compile test first, then a link test;
+# if the compile test succeeds but the link test fails, that means we have
+# found the correct option but it doesn't work because the libraries are
+# broken. (This can happen, for instance, with SunPRO C and a bad combination
+# of operating system patches.)
+#
+# Several of the options in our candidate list can be misinterpreted by
+# compilers that don't use them to activate Openacc support; for example,
+# many compilers understand "-openacc" to mean "write output to a file
+# named 'penmp'" rather than "enable Openacc". We can't completely avoid
+# the possibility of clobbering files named 'penmp' or 'mp' in configure's
+# working directory; therefore, this macro will bomb out if any such file
+# already exists when it's invoked.
+AC_DEFUN([AX_CXX_OPENACC],
+[AC_REQUIRE([_AX_OPENACC_SAFE_WD])]dnl
+[AC_ARG_ENABLE([openacc],
+ [AS_HELP_STRING([--disable-openacc], [do not use Openacc])])]dnl
+[
+ OPENACC_[]_AC_LANG_PREFIX[]FLAGS=
+ if test "$enable_openacc" != no; then
+ AC_LANG_PUSH([C++])
+ AC_CACHE_CHECK([for $[]_AC_CC[] option to support Openacc],
+ [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc],
+ [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='not found'
+ dnl Try these flags:
+ dnl (on by default) ''
+ dnl GCC >= 4.2 -fopenacc
+ dnl SunPRO C -xopenacc
+ dnl Intel C -openacc
+ dnl SGI C, PGI C -mp
+ dnl Tru64 Compaq C -omp
+ dnl IBM XL C (AIX, Linux) -qsmp=omp
+ dnl Cray CCE -homp
+ dnl NEC SX -Popenacc
+ dnl Lahey Fortran (Linux) --openacc
+ for ac_option in '' -fopenacc -openacc -acc; do
+
+ ac_save_[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS
+ _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $ac_option"
+ AC_COMPILE_IFELSE([
+#ifndef _OPENACC
+#error "OpenACC not supported"
+#endif
+#include
+ int main (void) { acc_init (acc_get_device_type()); return 0;}
+],
+ [AC_LINK_IFELSE([
+#ifndef _OPENACC
+#error "OpenACC not supported"
+#endif
+#include
+ int main (void) { acc_init (acc_get_device_type()); return 0;}
+],
+ [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc=$ac_option],
+ [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'])])
+ _AC_LANG_PREFIX[]FLAGS=$ac_save_[]_AC_LANG_PREFIX[]FLAGS
+
+ if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'not found'; then
+ break
+ fi
+ done
+ if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = 'not found'; then
+ ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'
+ elif test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = ''; then
+ ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='none needed'
+ fi
+ dnl _AX_OPENACC_SAFE_WD checked that these files did not exist before we
+ dnl started probing for Openacc support, so if they exist now, they were
+ dnl created by the probe loop and it's safe to delete them.
+ rm -f penmp mp])
+ if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'unsupported' && \
+ test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'none needed'; then
+ OPENACC_[]_AC_LANG_PREFIX[]FLAGS="$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc"
+ fi
+ AC_LANG_POP([C++])
+ fi
+])
+
+dnl _AC_OPENACC_SAFE_WD
+dnl ------------------
+dnl AC_REQUIREd by AC_OPENACC. Checks both at autoconf time and at
+dnl configure time for files that AC_OPENACC clobbers.
+dnl AC_DEFUN([_AX_OPENACC_SAFE_WD],
+dnl [m4_syscmd([test ! -e penmp && test ! -e mp])]dnl
+dnl [m4_if(sysval, [0], [], [m4_fatal(m4_normalize(
+dnl [AX_OPENACC clobbers files named 'mp' and 'penmp'.
+dnl To use AX_OPENACC you must not have either of these files
+dnl at the top level of your source tree.]))])]dnl
+dnl [if test -e penmp || test -e mp; then
+dnl AC_MSG_ERROR(m4_normalize(
+dnl [AX@&t@_OPENACC clobbers files named 'mp' and 'penmp'.
+dnl Aborting configure because one of these files already exists.]))
+dnl fi])
+
diff --git a/config/ax_fc_openacc.m4 b/config/ax_fc_openacc.m4
new file mode 100644
index 00000000..59775b2b
--- /dev/null
+++ b/config/ax_fc_openacc.m4
@@ -0,0 +1,108 @@
+# AC_OPENACC
+# ---------
+# Check which options need to be passed to the C compiler to support Openacc.
+# Set the OPENACC_CFLAGS / OPENACC_CXXFLAGS / OPENACC_FFLAGS variable to these
+# options.
+# The options are necessary at compile time (so the #pragmas are understood)
+# and at link time (so the appropriate library is linked with).
+# This macro takes care to not produce redundant options if $CC $CFLAGS already
+# supports Openacc.
+#
+# For each candidate option, we do a compile test first, then a link test;
+# if the compile test succeeds but the link test fails, that means we have
+# found the correct option but it doesn't work because the libraries are
+# broken. (This can happen, for instance, with SunPRO C and a bad combination
+# of operating system patches.)
+#
+# Several of the options in our candidate list can be misinterpreted by
+# compilers that don't use them to activate Openacc support; for example,
+# many compilers understand "-openacc" to mean "write output to a file
+# named 'penmp'" rather than "enable Openacc". We can't completely avoid
+# the possibility of clobbering files named 'penmp' or 'mp' in configure's
+# working directory; therefore, this macro will bomb out if any such file
+# already exists when it's invoked.
+AC_DEFUN([AX_FC_OPENACC],
+[AC_REQUIRE([_AX_OPENACC_SAFE_WD])]dnl
+[AC_ARG_ENABLE([openacc],
+ [AS_HELP_STRING([--disable-openacc], [do not use Openacc])])]dnl
+[
+ OPENACC_[]_AC_LANG_PREFIX[]FLAGS=
+ if test "$enable_openacc" != no; then
+ AC_LANG_PUSH([Fortran])
+ AC_CACHE_CHECK([for $[]_AC_CC[] option to support Openacc],
+ [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc],
+ [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='not found'
+ dnl Try these flags:
+ dnl (on by default) ''
+ dnl GCC >= 4.2 -fopenacc
+ dnl SunPRO C -xopenacc
+ dnl Intel C -openacc
+ dnl SGI C, PGI C -mp
+ dnl Tru64 Compaq C -omp
+ dnl IBM XL C (AIX, Linux) -qsmp=omp
+ dnl Cray CCE -homp
+ dnl NEC SX -Popenacc
+ dnl Lahey Fortran (Linux) --openacc
+ for ac_option in '' -fopenacc -openacc -acc; do
+
+ ac_save_[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS
+ _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $ac_option"
+ AC_COMPILE_IFELSE([
+ program main
+ use openacc
+ implicit none
+ integer tid, np
+ tid = 42
+ call acc_init(0)
+ end
+],
+ [AC_LINK_IFELSE([
+ program main
+ use openacc
+ implicit none
+ integer tid, np
+ tid = 42
+ call acc_init(0)
+ end
+],
+ [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc=$ac_option],
+ [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'])])
+ _AC_LANG_PREFIX[]FLAGS=$ac_save_[]_AC_LANG_PREFIX[]FLAGS
+
+ if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'unsupported'; then
+ break
+ fi
+ done
+ if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = 'not found'; then
+ ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'
+ elif test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = ''; then
+ ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='none needed'
+ fi
+ dnl _AX_OPENACC_SAFE_WD checked that these files did not exist before we
+ dnl started probing for Openacc support, so if they exist now, they were
+ dnl created by the probe loop and it's safe to delete them.
+ rm -f penmp mp])
+ if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'unsupported' && \
+ test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'none needed'; then
+ OPENACC_[]_AC_LANG_PREFIX[]FLAGS="$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc"
+ fi
+ AC_LANG_POP([Fortran])
+ fi
+])
+
+# _AC_OPENACC_SAFE_WD
+# ------------------
+# AC_REQUIREd by AC_OPENACC. Checks both at autoconf time and at
+# configure time for files that AC_OPENACC clobbers.
+AC_DEFUN([_AX_OPENACC_SAFE_WD],
+[m4_syscmd([test ! -e penmp && test ! -e mp])]dnl
+[m4_if(sysval, [0], [], [m4_fatal(m4_normalize(
+ [AX_OPENACC clobbers files named 'mp' and 'penmp'.
+ To use AX_OPENACC you must not have either of these files
+ at the top level of your source tree.]))])]dnl
+[if test -e penmp || test -e mp; then
+ AC_MSG_ERROR(m4_normalize(
+ [AX@&t@_OPENACC clobbers files named 'mp' and 'penmp'.
+ Aborting configure because one of these files already exists.]))
+fi])
+
diff --git a/configure b/configure
index 04408e2a..3022ff53 100755
--- a/configure
+++ b/configure
@@ -852,6 +852,7 @@ with_amdlibdir
with_cuda
with_cudacc
enable_openacc
+with_extraopenacc
with_ccopenacc
with_cxxopenacc
with_fcopenacc
@@ -1506,6 +1507,7 @@ Optional Features:
--enable-openmp Specify whether to enable openmp.
--disable-openmp do not use OpenMP
--enable-openacc Specify whether to enable openacc.
+ --disable-openacc do not use Openacc
Optional Packages:
--with-PACKAGE[=ARG] use PACKAGE [ARG=yes]
@@ -1551,6 +1553,8 @@ Optional Packages:
--with-cuda=DIR Specify the CUDA install directory.
--with-cudacc A comma-separated list of CCs to compile to, for
example, --with-cudacc=50,60,70,75
+ --with-extraopenacc additional [EXTRAOPENACC] flags to be added: will
+ prepend to [EXTRAOPENACC]
--with-ccopenacc additional [CCOPENACC] flags to be added: will
prepend to [CCOPENACC]
--with-cxxopenacc additional [CXXOPENACC] flags to be added: will
@@ -10925,6 +10929,284 @@ fi
if test x"$pac_cv_openacc" == x"yes" ; then
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether additional EXTRAOPENACC flags should be added (should be invoked only once)" >&5
+printf %s "checking whether additional EXTRAOPENACC flags should be added (should be invoked only once)... " >&6; }
+
+# Check whether --with-extraopenacc was given.
+if test ${with_extraopenacc+y}
+then :
+ withval=$with_extraopenacc;
+EXTRAOPENACC="${withval} ${EXTRAOPENACC}"
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: EXTRAOPENACC = ${EXTRAOPENACC}" >&5
+printf "%s\n" "EXTRAOPENACC = ${EXTRAOPENACC}" >&6; }
+
+else $as_nop
+ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+
+fi
+
+
+ if test -e penmp || test -e mp; then
+ as_fn_error $? "AX_OPENACC clobbers files named 'mp' and 'penmp'. Aborting configure because one of these files already exists." "$LINENO" 5
+fi
+# Check whether --enable-openacc was given.
+if test ${enable_openacc+y}
+then :
+ enableval=$enable_openacc;
+fi
+
+ OPENACC_CFLAGS=
+ if test "$enable_openacc" != no; then
+ ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to support Openacc" >&5
+printf %s "checking for $CC option to support Openacc... " >&6; }
+if test ${ax_cv_prog_c_openacc+y}
+then :
+ printf %s "(cached) " >&6
+else $as_nop
+ ax_cv_prog_c_openacc='not found'
+ for ac_option in '' -fopenacc -openacc -acc; do
+
+ ac_save_CFLAGS=$CFLAGS
+ CFLAGS="$CFLAGS $ac_option"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+#ifndef _OPENACC
+#error "OpenACC not supported"
+#endif
+#include
+ int main (void) { acc_init (0); return 0;}
+
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+#ifndef _OPENACC
+#error "OpenACC not supported"
+#endif
+#include
+ int main (void) { acc_init (0); return 0;}
+
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+ ax_cv_prog_c_openacc=$ac_option
+else $as_nop
+ ax_cv_prog_c_openacc='unsupported'
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+ CFLAGS=$ac_save_CFLAGS
+
+ if test "$ax_cv_prog_c_openacc" != 'not found'; then
+ break
+ fi
+ done
+ if test "$ax_cv_prog_c_openacc" = 'not found'; then
+ ac_cv_prog_c_openacc='unsupported'
+ elif test "$ax_cv_prog_c_openacc" = ''; then
+ ac_cv_prog_c_openacc='none needed'
+ fi
+ rm -f penmp mp
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_prog_c_openacc" >&5
+printf "%s\n" "$ax_cv_prog_c_openacc" >&6; }
+ if test "$ax_cv_prog_c_openacc" != 'unsupported' && \
+ test "$ax_cv_prog_c_openacc" != 'none needed'; then
+ OPENACC_CFLAGS="$ax_cv_prog_c_openacc"
+ fi
+ ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+ fi
+
+ CCOPENACC="$ax_cv_prog_c_openacc";
+ # Check whether --enable-openacc was given.
+if test ${enable_openacc+y}
+then :
+ enableval=$enable_openacc;
+fi
+
+ OPENACC_CFLAGS=
+ if test "$enable_openacc" != no; then
+ ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CXX option to support Openacc" >&5
+printf %s "checking for $CXX option to support Openacc... " >&6; }
+if test ${ax_cv_prog_cxx_openacc+y}
+then :
+ printf %s "(cached) " >&6
+else $as_nop
+ ax_cv_prog_cxx_openacc='not found'
+ for ac_option in '' -fopenacc -openacc -acc; do
+
+ ac_save_CXXFLAGS=$CXXFLAGS
+ CXXFLAGS="$CXXFLAGS $ac_option"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+#ifndef _OPENACC
+#error "OpenACC not supported"
+#endif
+#include
+ int main (void) { acc_init (acc_get_device_type()); return 0;}
+
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"
+then :
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+#ifndef _OPENACC
+#error "OpenACC not supported"
+#endif
+#include
+ int main (void) { acc_init (acc_get_device_type()); return 0;}
+
+_ACEOF
+if ac_fn_cxx_try_link "$LINENO"
+then :
+ ax_cv_prog_cxx_openacc=$ac_option
+else $as_nop
+ ax_cv_prog_cxx_openacc='unsupported'
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+ CXXFLAGS=$ac_save_CXXFLAGS
+
+ if test "$ax_cv_prog_cxx_openacc" != 'not found'; then
+ break
+ fi
+ done
+ if test "$ax_cv_prog_cxx_openacc" = 'not found'; then
+ ac_cv_prog_cxx_openacc='unsupported'
+ elif test "$ax_cv_prog_cxx_openacc" = ''; then
+ ac_cv_prog_cxx_openacc='none needed'
+ fi
+ rm -f penmp mp
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_prog_cxx_openacc" >&5
+printf "%s\n" "$ax_cv_prog_cxx_openacc" >&6; }
+ if test "$ax_cv_prog_cxx_openacc" != 'unsupported' && \
+ test "$ax_cv_prog_cxx_openacc" != 'none needed'; then
+ OPENACC_CXXFLAGS="$ax_cv_prog_cxx_openacc"
+ fi
+ ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+ fi
+
+ CXXOPENACC="$ax_cv_prog_cxx_openacc";
+ # Check whether --enable-openacc was given.
+if test ${enable_openacc+y}
+then :
+ enableval=$enable_openacc;
+fi
+
+ OPENACC_CFLAGS=
+ if test "$enable_openacc" != no; then
+ ac_ext=${ac_fc_srcext-f}
+ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5'
+ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_fc_compiler_gnu
+
+ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $FC option to support Openacc" >&5
+printf %s "checking for $FC option to support Openacc... " >&6; }
+if test ${ax_cv_prog_fc_openacc+y}
+then :
+ printf %s "(cached) " >&6
+else $as_nop
+ ax_cv_prog_fc_openacc='not found'
+ for ac_option in '' -fopenacc -openacc -acc; do
+
+ ac_save_FCFLAGS=$FCFLAGS
+ FCFLAGS="$FCFLAGS $ac_option"
+ cat > conftest.$ac_ext <<_ACEOF
+
+ program main
+ use openacc
+ implicit none
+ integer tid, np
+ tid = 42
+ call acc_init(0)
+ end
+
+_ACEOF
+if ac_fn_fc_try_compile "$LINENO"
+then :
+ cat > conftest.$ac_ext <<_ACEOF
+
+ program main
+ use openacc
+ implicit none
+ integer tid, np
+ tid = 42
+ call acc_init(0)
+ end
+
+_ACEOF
+if ac_fn_fc_try_link "$LINENO"
+then :
+ ax_cv_prog_fc_openacc=$ac_option
+else $as_nop
+ ax_cv_prog_fc_openacc='unsupported'
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+ conftest$ac_exeext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+ FCFLAGS=$ac_save_FCFLAGS
+
+ if test "$ax_cv_prog_fc_openacc" != 'unsupported'; then
+ break
+ fi
+ done
+ if test "$ax_cv_prog_fc_openacc" = 'not found'; then
+ ac_cv_prog_fc_openacc='unsupported'
+ elif test "$ax_cv_prog_fc_openacc" = ''; then
+ ac_cv_prog_fc_openacc='none needed'
+ fi
+ rm -f penmp mp
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_prog_fc_openacc" >&5
+printf "%s\n" "$ax_cv_prog_fc_openacc" >&6; }
+ if test "$ax_cv_prog_fc_openacc" != 'unsupported' && \
+ test "$ax_cv_prog_fc_openacc" != 'none needed'; then
+ OPENACC_FCFLAGS="$ax_cv_prog_fc_openacc"
+ fi
+ ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+ fi
+
+ FCOPENACC="$ax_cv_prog_fc_openacc";
+
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether additional CCOPENACC flags should be added (should be invoked only once)" >&5
printf %s "checking whether additional CCOPENACC flags should be added (should be invoked only once)... " >&6; }
@@ -10981,7 +11263,9 @@ printf "%s\n" "no" >&6; }
fi
-
+ CCOPENACC="$CCOPENACC $EXTRAOPENACC";
+ CXXOPENACC="$CXXOPENACC $EXTRAOPENACC";
+ FCOPENACC="$FCOPENACC $EXTRAOPENACC";
OACCD=oaccd;
OACCLD=oaccld;
diff --git a/configure.ac b/configure.ac
index 47b41406..c702d4b1 100755
--- a/configure.ac
+++ b/configure.ac
@@ -845,11 +845,35 @@ if test "x$pac_cv_ipk_size" != "x4"; then
fi
PAC_ARG_OPENACC()
+dnl AC_ARG_ENABLE([openacc],
+dnl [AS_HELP_STRING([--disable-openacc], [do not use Openacc])])
if test x"$pac_cv_openacc" == x"yes" ; then
- PAC_ARG_WITH_FLAGS(ccopenacc,CCOPENACC)
+ PAC_ARG_WITH_FLAGS(extraopenacc,EXTRAOPENACC)
+ dnl if test false; then
+ AX_C_OPENACC()
+ CCOPENACC="$ax_cv_prog_c_openacc";
+ AX_CXX_OPENACC()
+ CXXOPENACC="$ax_cv_prog_cxx_openacc";
+ AX_FC_OPENACC()
+ FCOPENACC="$ax_cv_prog_fc_openacc";
+ dnl AX_OPENACC()
+ dnl
+ dnl CXXOPENACC="$ax_cv_prog_cxx_openacc";
+ dnl FCOPENACC="$ax_cv_prog_fc_openacc";
+ dnl else
+dnl AC_MSG_NOTICE([OpenACC 1 flags CC $CCOPENACC CXX $CXXOPENACC FC $FCOPENACC])
+ PAC_ARG_WITH_FLAGS(ccopenacc,CCOPENACC)
PAC_ARG_WITH_FLAGS(cxxopenacc,CXXOPENACC)
PAC_ARG_WITH_FLAGS(fcopenacc,FCOPENACC)
-
+dnl AC_MSG_NOTICE([OpenACC 2 flags CC $CCOPENACC CXX $CXXOPENACC FC $FCOPENACC])
+dnl CCOPENACC="$ax_cv_prog_c_openacc";
+dnl CXXOPENACC="$ax_cv_prog_cxx_openacc";
+dnl FCOPENACC="$ax_cv_prog_fc_openacc";
+dnl fi
+ CCOPENACC="$CCOPENACC $EXTRAOPENACC";
+ CXXOPENACC="$CXXOPENACC $EXTRAOPENACC";
+ FCOPENACC="$FCOPENACC $EXTRAOPENACC";
+dnl AC_MSG_NOTICE([OpenACC 3 flags CC $CCOPENACC CXX $CXXOPENACC FC $FCOPENACC])
OACCD=oaccd;
OACCLD=oaccld;
@@ -1025,7 +1049,9 @@ AC_MSG_NOTICE([
OPENACC : ${pac_cv_openacc}
FCOPENACC : ${FCOPENACC}
-
+ OACCD : ${OACCD}
+ OACCLD : ${OACCLD}
+
BLAS : ${BLAS_LIBS}
METIS usable : ${psblas_cv_have_metis}
diff --git a/test/openacc/Makefile b/test/openacc/Makefile
index a2827b30..d0bd4880 100644
--- a/test/openacc/Makefile
+++ b/test/openacc/Makefile
@@ -17,14 +17,17 @@ FINCLUDES=$(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FMFLAG). $(FMFLAG)$(PSBMODDIR)
FFLAGS=-O0 -march=native -fopenacc -foffload=nvptx-none="-march=sm_70"
CFLAGS=-O0 -march=native
-SRCS=vectoacc.F90 datavect.F90
+VTC=vectoacc.o
+DVT=datavect.o
CSRC=timers.c
OBJS=$(SRCS:.F90=.o) $(CSRC:.c=.o)
-all: dir $(OBJS)
- $(FC) $(FFLAGS) $(OBJS) -o datavect $(FINCLUDES) $(PSBLAS_LIB) $(LDLIBS)
- /bin/mv datavect $(EXEDIR)
+all: dir psb_d_oacc_pde3d
+
+#$(OBJS)
+# $(FC) $(FFLAGS) $(OBJS) -o datavect $(FINCLUDES) $(PSBLAS_LIB) $(LDLIBS)
+# /bin/mv datavect $(EXEDIR)
dir:
@if test ! -d $(EXEDIR); then mkdir $(EXEDIR); fi
diff --git a/test/openacc/psb_d_oacc_pde3d.F90 b/test/openacc/psb_d_oacc_pde3d.F90
index ac992884..7c0cb4ef 100644
--- a/test/openacc/psb_d_oacc_pde3d.F90
+++ b/test/openacc/psb_d_oacc_pde3d.F90
@@ -733,7 +733,8 @@ program psb_d_oacc_pde3d
end if
! get parameters
- call get_parms(ctxt, kmethd, ptype, afmt, agfmt, idim, istopc, itmax, itrace, irst, ipart, parms)
+ call get_parms(ctxt, kmethd, ptype, afmt, agfmt, idim, istopc,&
+ & itmax, itrace, irst, ipart, parms)
! allocate and fill in the coefficient matrix, rhs and initial guess
call psb_barrier(ctxt)
From ff8513b4c60985649da87f4ffeb8d214a5e18ca7 Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Wed, 7 Aug 2024 17:04:29 +0200
Subject: [PATCH 26/86] Ignore .smod files in git
---
.gitignore | 1 +
1 file changed, 1 insertion(+)
diff --git a/.gitignore b/.gitignore
index 7227f784..d190e4c7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,7 @@
*.a
*.o
*.mod
+*.smod
*~
# header files generated
From 2982aaee27b501e18b6a8125a263a19f03b65897 Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Thu, 8 Aug 2024 11:07:28 +0200
Subject: [PATCH 27/86] Implementation in OpenACC for ELL and HLL into
templates. Merge from development
---
base/modules/auxil/psi_c_serial_mod.f90 | 8 +-
base/modules/auxil/psi_d_serial_mod.f90 | 8 +-
base/modules/auxil/psi_e_serial_mod.f90 | 8 +-
base/modules/auxil/psi_i2_serial_mod.f90 | 8 +-
base/modules/auxil/psi_m_serial_mod.f90 | 8 +-
base/modules/auxil/psi_s_serial_mod.f90 | 8 +-
base/modules/auxil/psi_z_serial_mod.f90 | 8 +-
base/modules/psblas/psb_c_psblas_mod.F90 | 8 +-
base/modules/psblas/psb_d_psblas_mod.F90 | 8 +-
base/modules/psblas/psb_s_psblas_mod.F90 | 8 +-
base/modules/psblas/psb_z_psblas_mod.F90 | 8 +-
base/modules/serial/psb_c_base_vect_mod.F90 | 14 +-
base/modules/serial/psb_c_vect_mod.F90 | 8 +-
base/modules/serial/psb_d_base_vect_mod.F90 | 14 +-
base/modules/serial/psb_d_vect_mod.F90 | 8 +-
base/modules/serial/psb_s_base_vect_mod.F90 | 14 +-
base/modules/serial/psb_s_vect_mod.F90 | 8 +-
base/modules/serial/psb_z_base_vect_mod.F90 | 14 +-
base/modules/serial/psb_z_vect_mod.F90 | 8 +-
base/psblas/psb_caxpby.f90 | 8 +-
base/psblas/psb_daxpby.f90 | 8 +-
base/psblas/psb_saxpby.f90 | 8 +-
base/psblas/psb_zaxpby.f90 | 8 +-
base/serial/psi_c_serial_impl.F90 | 8 +-
base/serial/psi_d_serial_impl.F90 | 8 +-
base/serial/psi_e_serial_impl.F90 | 8 +-
base/serial/psi_i2_serial_impl.F90 | 8 +-
base/serial/psi_m_serial_impl.F90 | 8 +-
base/serial/psi_s_serial_impl.F90 | 8 +-
base/serial/psi_z_serial_impl.F90 | 8 +-
cuda/psb_c_cuda_vect_mod.F90 | 8 +-
cuda/psb_c_vectordev_mod.F90 | 8 +-
cuda/psb_d_cuda_vect_mod.F90 | 8 +-
cuda/psb_d_vectordev_mod.F90 | 8 +-
cuda/psb_s_cuda_vect_mod.F90 | 8 +-
cuda/psb_s_vectordev_mod.F90 | 8 +-
cuda/psb_z_cuda_vect_mod.F90 | 8 +-
cuda/psb_z_vectordev_mod.F90 | 8 +-
openacc/Makefile | 27 +-
openacc/impl/Makefile | 84 +-
openacc/impl/psb_c_oacc_ell_allocate_mnnz.F90 | 47 +
openacc/impl/psb_c_oacc_ell_cp_from_coo.F90 | 78 ++
openacc/impl/psb_c_oacc_ell_cp_from_fmt.F90 | 24 +
openacc/impl/psb_c_oacc_ell_csmm.F90 | 86 ++
openacc/impl/psb_c_oacc_ell_csmv.F90 | 82 ++
openacc/impl/psb_c_oacc_ell_inner_vect_sv.F90 | 85 ++
openacc/impl/psb_c_oacc_ell_mold.F90 | 34 +
openacc/impl/psb_c_oacc_ell_mv_from_coo.F90 | 25 +
openacc/impl/psb_c_oacc_ell_mv_from_fmt.F90 | 24 +
openacc/impl/psb_c_oacc_ell_reallocate_nz.F90 | 28 +
openacc/impl/psb_c_oacc_ell_scal.F90 | 58 ++
openacc/impl/psb_c_oacc_ell_scals.F90 | 39 +
openacc/impl/psb_c_oacc_ell_vect_mv.F90 | 66 ++
openacc/impl/psb_c_oacc_hll_allocate_mnnz.F90 | 53 +
openacc/impl/psb_c_oacc_hll_cp_from_coo.F90 | 85 ++
openacc/impl/psb_c_oacc_hll_cp_from_fmt.F90 | 24 +
openacc/impl/psb_c_oacc_hll_csmm.F90 | 86 ++
openacc/impl/psb_c_oacc_hll_csmv.F90 | 84 ++
openacc/impl/psb_c_oacc_hll_inner_vect_sv.F90 | 86 ++
openacc/impl/psb_c_oacc_hll_mold.F90 | 34 +
openacc/impl/psb_c_oacc_hll_mv_from_coo.F90 | 25 +
openacc/impl/psb_c_oacc_hll_mv_from_fmt.F90 | 24 +
openacc/impl/psb_c_oacc_hll_reallocate_nz.F90 | 29 +
openacc/impl/psb_c_oacc_hll_scal.F90 | 62 ++
openacc/impl/psb_c_oacc_hll_scals.F90 | 40 +
openacc/impl/psb_c_oacc_hll_vect_mv.F90 | 67 ++
openacc/impl/psb_d_oacc_ell_allocate_mnnz.F90 | 2 +-
openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 | 2 +-
openacc/impl/psb_d_oacc_hll_allocate_mnnz.F90 | 2 +-
openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 | 2 +-
openacc/impl/psb_s_oacc_ell_allocate_mnnz.F90 | 47 +
openacc/impl/psb_s_oacc_ell_cp_from_coo.F90 | 78 ++
openacc/impl/psb_s_oacc_ell_cp_from_fmt.F90 | 24 +
openacc/impl/psb_s_oacc_ell_csmm.F90 | 86 ++
openacc/impl/psb_s_oacc_ell_csmv.F90 | 82 ++
openacc/impl/psb_s_oacc_ell_inner_vect_sv.F90 | 85 ++
openacc/impl/psb_s_oacc_ell_mold.F90 | 34 +
openacc/impl/psb_s_oacc_ell_mv_from_coo.F90 | 25 +
openacc/impl/psb_s_oacc_ell_mv_from_fmt.F90 | 24 +
openacc/impl/psb_s_oacc_ell_reallocate_nz.F90 | 28 +
openacc/impl/psb_s_oacc_ell_scal.F90 | 58 ++
openacc/impl/psb_s_oacc_ell_scals.F90 | 39 +
openacc/impl/psb_s_oacc_ell_vect_mv.F90 | 66 ++
openacc/impl/psb_s_oacc_hll_allocate_mnnz.F90 | 53 +
openacc/impl/psb_s_oacc_hll_cp_from_coo.F90 | 85 ++
openacc/impl/psb_s_oacc_hll_cp_from_fmt.F90 | 24 +
openacc/impl/psb_s_oacc_hll_csmm.F90 | 86 ++
openacc/impl/psb_s_oacc_hll_csmv.F90 | 84 ++
openacc/impl/psb_s_oacc_hll_inner_vect_sv.F90 | 86 ++
openacc/impl/psb_s_oacc_hll_mold.F90 | 34 +
openacc/impl/psb_s_oacc_hll_mv_from_coo.F90 | 25 +
openacc/impl/psb_s_oacc_hll_mv_from_fmt.F90 | 24 +
openacc/impl/psb_s_oacc_hll_reallocate_nz.F90 | 29 +
openacc/impl/psb_s_oacc_hll_scal.F90 | 62 ++
openacc/impl/psb_s_oacc_hll_scals.F90 | 40 +
openacc/impl/psb_s_oacc_hll_vect_mv.F90 | 67 ++
openacc/impl/psb_z_oacc_ell_allocate_mnnz.F90 | 47 +
openacc/impl/psb_z_oacc_ell_cp_from_coo.F90 | 78 ++
openacc/impl/psb_z_oacc_ell_cp_from_fmt.F90 | 24 +
openacc/impl/psb_z_oacc_ell_csmm.F90 | 86 ++
openacc/impl/psb_z_oacc_ell_csmv.F90 | 82 ++
openacc/impl/psb_z_oacc_ell_inner_vect_sv.F90 | 85 ++
openacc/impl/psb_z_oacc_ell_mold.F90 | 34 +
openacc/impl/psb_z_oacc_ell_mv_from_coo.F90 | 25 +
openacc/impl/psb_z_oacc_ell_mv_from_fmt.F90 | 24 +
openacc/impl/psb_z_oacc_ell_reallocate_nz.F90 | 28 +
openacc/impl/psb_z_oacc_ell_scal.F90 | 58 ++
openacc/impl/psb_z_oacc_ell_scals.F90 | 39 +
openacc/impl/psb_z_oacc_ell_vect_mv.F90 | 66 ++
openacc/impl/psb_z_oacc_hll_allocate_mnnz.F90 | 53 +
openacc/impl/psb_z_oacc_hll_cp_from_coo.F90 | 85 ++
openacc/impl/psb_z_oacc_hll_cp_from_fmt.F90 | 24 +
openacc/impl/psb_z_oacc_hll_csmm.F90 | 86 ++
openacc/impl/psb_z_oacc_hll_csmv.F90 | 84 ++
openacc/impl/psb_z_oacc_hll_inner_vect_sv.F90 | 86 ++
openacc/impl/psb_z_oacc_hll_mold.F90 | 34 +
openacc/impl/psb_z_oacc_hll_mv_from_coo.F90 | 25 +
openacc/impl/psb_z_oacc_hll_mv_from_fmt.F90 | 24 +
openacc/impl/psb_z_oacc_hll_reallocate_nz.F90 | 29 +
openacc/impl/psb_z_oacc_hll_scal.F90 | 62 ++
openacc/impl/psb_z_oacc_hll_scals.F90 | 40 +
openacc/impl/psb_z_oacc_hll_vect_mv.F90 | 67 ++
openacc/psb_c_oacc_ell_mat_mod.F90 | 341 +++++++
openacc/psb_c_oacc_hll_mat_mod.F90 | 352 +++++++
openacc/psb_c_oacc_vect_mod.F90 | 33 +-
openacc/psb_d_oacc_ell_mat_mod.F90 | 682 +++++++------
openacc/psb_d_oacc_hll_mat_mod.F90 | 702 +++++++-------
openacc/psb_d_oacc_vect_mod.F90 | 33 +-
openacc/psb_i_oacc_vect_mod.F90 | 916 +++++++++---------
openacc/psb_l_oacc_vect_mod.F90 | 507 ++++++++++
openacc/psb_oacc_mod.F90 | 9 +
openacc/psb_s_oacc_ell_mat_mod.F90 | 341 +++++++
openacc/psb_s_oacc_hll_mat_mod.F90 | 352 +++++++
openacc/psb_s_oacc_vect_mod.F90 | 33 +-
openacc/psb_z_oacc_ell_mat_mod.F90 | 341 +++++++
openacc/psb_z_oacc_hll_mat_mod.F90 | 352 +++++++
openacc/psb_z_oacc_vect_mod.F90 | 33 +-
137 files changed, 8195 insertions(+), 1404 deletions(-)
create mode 100644 openacc/impl/psb_c_oacc_ell_allocate_mnnz.F90
create mode 100644 openacc/impl/psb_c_oacc_ell_cp_from_coo.F90
create mode 100644 openacc/impl/psb_c_oacc_ell_cp_from_fmt.F90
create mode 100644 openacc/impl/psb_c_oacc_ell_csmm.F90
create mode 100644 openacc/impl/psb_c_oacc_ell_csmv.F90
create mode 100644 openacc/impl/psb_c_oacc_ell_inner_vect_sv.F90
create mode 100644 openacc/impl/psb_c_oacc_ell_mold.F90
create mode 100644 openacc/impl/psb_c_oacc_ell_mv_from_coo.F90
create mode 100644 openacc/impl/psb_c_oacc_ell_mv_from_fmt.F90
create mode 100644 openacc/impl/psb_c_oacc_ell_reallocate_nz.F90
create mode 100644 openacc/impl/psb_c_oacc_ell_scal.F90
create mode 100644 openacc/impl/psb_c_oacc_ell_scals.F90
create mode 100644 openacc/impl/psb_c_oacc_ell_vect_mv.F90
create mode 100644 openacc/impl/psb_c_oacc_hll_allocate_mnnz.F90
create mode 100644 openacc/impl/psb_c_oacc_hll_cp_from_coo.F90
create mode 100644 openacc/impl/psb_c_oacc_hll_cp_from_fmt.F90
create mode 100644 openacc/impl/psb_c_oacc_hll_csmm.F90
create mode 100644 openacc/impl/psb_c_oacc_hll_csmv.F90
create mode 100644 openacc/impl/psb_c_oacc_hll_inner_vect_sv.F90
create mode 100644 openacc/impl/psb_c_oacc_hll_mold.F90
create mode 100644 openacc/impl/psb_c_oacc_hll_mv_from_coo.F90
create mode 100644 openacc/impl/psb_c_oacc_hll_mv_from_fmt.F90
create mode 100644 openacc/impl/psb_c_oacc_hll_reallocate_nz.F90
create mode 100644 openacc/impl/psb_c_oacc_hll_scal.F90
create mode 100644 openacc/impl/psb_c_oacc_hll_scals.F90
create mode 100644 openacc/impl/psb_c_oacc_hll_vect_mv.F90
create mode 100644 openacc/impl/psb_s_oacc_ell_allocate_mnnz.F90
create mode 100644 openacc/impl/psb_s_oacc_ell_cp_from_coo.F90
create mode 100644 openacc/impl/psb_s_oacc_ell_cp_from_fmt.F90
create mode 100644 openacc/impl/psb_s_oacc_ell_csmm.F90
create mode 100644 openacc/impl/psb_s_oacc_ell_csmv.F90
create mode 100644 openacc/impl/psb_s_oacc_ell_inner_vect_sv.F90
create mode 100644 openacc/impl/psb_s_oacc_ell_mold.F90
create mode 100644 openacc/impl/psb_s_oacc_ell_mv_from_coo.F90
create mode 100644 openacc/impl/psb_s_oacc_ell_mv_from_fmt.F90
create mode 100644 openacc/impl/psb_s_oacc_ell_reallocate_nz.F90
create mode 100644 openacc/impl/psb_s_oacc_ell_scal.F90
create mode 100644 openacc/impl/psb_s_oacc_ell_scals.F90
create mode 100644 openacc/impl/psb_s_oacc_ell_vect_mv.F90
create mode 100644 openacc/impl/psb_s_oacc_hll_allocate_mnnz.F90
create mode 100644 openacc/impl/psb_s_oacc_hll_cp_from_coo.F90
create mode 100644 openacc/impl/psb_s_oacc_hll_cp_from_fmt.F90
create mode 100644 openacc/impl/psb_s_oacc_hll_csmm.F90
create mode 100644 openacc/impl/psb_s_oacc_hll_csmv.F90
create mode 100644 openacc/impl/psb_s_oacc_hll_inner_vect_sv.F90
create mode 100644 openacc/impl/psb_s_oacc_hll_mold.F90
create mode 100644 openacc/impl/psb_s_oacc_hll_mv_from_coo.F90
create mode 100644 openacc/impl/psb_s_oacc_hll_mv_from_fmt.F90
create mode 100644 openacc/impl/psb_s_oacc_hll_reallocate_nz.F90
create mode 100644 openacc/impl/psb_s_oacc_hll_scal.F90
create mode 100644 openacc/impl/psb_s_oacc_hll_scals.F90
create mode 100644 openacc/impl/psb_s_oacc_hll_vect_mv.F90
create mode 100644 openacc/impl/psb_z_oacc_ell_allocate_mnnz.F90
create mode 100644 openacc/impl/psb_z_oacc_ell_cp_from_coo.F90
create mode 100644 openacc/impl/psb_z_oacc_ell_cp_from_fmt.F90
create mode 100644 openacc/impl/psb_z_oacc_ell_csmm.F90
create mode 100644 openacc/impl/psb_z_oacc_ell_csmv.F90
create mode 100644 openacc/impl/psb_z_oacc_ell_inner_vect_sv.F90
create mode 100644 openacc/impl/psb_z_oacc_ell_mold.F90
create mode 100644 openacc/impl/psb_z_oacc_ell_mv_from_coo.F90
create mode 100644 openacc/impl/psb_z_oacc_ell_mv_from_fmt.F90
create mode 100644 openacc/impl/psb_z_oacc_ell_reallocate_nz.F90
create mode 100644 openacc/impl/psb_z_oacc_ell_scal.F90
create mode 100644 openacc/impl/psb_z_oacc_ell_scals.F90
create mode 100644 openacc/impl/psb_z_oacc_ell_vect_mv.F90
create mode 100644 openacc/impl/psb_z_oacc_hll_allocate_mnnz.F90
create mode 100644 openacc/impl/psb_z_oacc_hll_cp_from_coo.F90
create mode 100644 openacc/impl/psb_z_oacc_hll_cp_from_fmt.F90
create mode 100644 openacc/impl/psb_z_oacc_hll_csmm.F90
create mode 100644 openacc/impl/psb_z_oacc_hll_csmv.F90
create mode 100644 openacc/impl/psb_z_oacc_hll_inner_vect_sv.F90
create mode 100644 openacc/impl/psb_z_oacc_hll_mold.F90
create mode 100644 openacc/impl/psb_z_oacc_hll_mv_from_coo.F90
create mode 100644 openacc/impl/psb_z_oacc_hll_mv_from_fmt.F90
create mode 100644 openacc/impl/psb_z_oacc_hll_reallocate_nz.F90
create mode 100644 openacc/impl/psb_z_oacc_hll_scal.F90
create mode 100644 openacc/impl/psb_z_oacc_hll_scals.F90
create mode 100644 openacc/impl/psb_z_oacc_hll_vect_mv.F90
create mode 100644 openacc/psb_c_oacc_ell_mat_mod.F90
create mode 100644 openacc/psb_c_oacc_hll_mat_mod.F90
create mode 100644 openacc/psb_l_oacc_vect_mod.F90
create mode 100644 openacc/psb_s_oacc_ell_mat_mod.F90
create mode 100644 openacc/psb_s_oacc_hll_mat_mod.F90
create mode 100644 openacc/psb_z_oacc_ell_mat_mod.F90
create mode 100644 openacc/psb_z_oacc_hll_mat_mod.F90
diff --git a/base/modules/auxil/psi_c_serial_mod.f90 b/base/modules/auxil/psi_c_serial_mod.f90
index 3fe001c8..38b740a7 100644
--- a/base/modules/auxil/psi_c_serial_mod.f90
+++ b/base/modules/auxil/psi_c_serial_mod.f90
@@ -99,8 +99,8 @@ module psi_c_serial_mod
end subroutine psi_caxpbyv2
end interface psb_geaxpby
- interface psi_upd_xyz
- subroutine psi_c_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
+ interface psi_abgdxyz
+ subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_spk_
implicit none
integer(psb_ipk_), intent(in) :: m
@@ -109,8 +109,8 @@ module psi_c_serial_mod
complex(psb_spk_), intent (inout) :: z(:)
complex(psb_spk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
- end subroutine psi_c_upd_xyz
- end interface psi_upd_xyz
+ end subroutine psi_cabgdxyz
+ end interface psi_abgdxyz
interface psi_xyzw
subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
diff --git a/base/modules/auxil/psi_d_serial_mod.f90 b/base/modules/auxil/psi_d_serial_mod.f90
index a08263df..1d65c5f6 100644
--- a/base/modules/auxil/psi_d_serial_mod.f90
+++ b/base/modules/auxil/psi_d_serial_mod.f90
@@ -99,8 +99,8 @@ module psi_d_serial_mod
end subroutine psi_daxpbyv2
end interface psb_geaxpby
- interface psi_upd_xyz
- subroutine psi_d_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
+ interface psi_abgdxyz
+ subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_dpk_
implicit none
integer(psb_ipk_), intent(in) :: m
@@ -109,8 +109,8 @@ module psi_d_serial_mod
real(psb_dpk_), intent (inout) :: z(:)
real(psb_dpk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
- end subroutine psi_d_upd_xyz
- end interface psi_upd_xyz
+ end subroutine psi_dabgdxyz
+ end interface psi_abgdxyz
interface psi_xyzw
subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
diff --git a/base/modules/auxil/psi_e_serial_mod.f90 b/base/modules/auxil/psi_e_serial_mod.f90
index 1f1bebd7..6f4e8c06 100644
--- a/base/modules/auxil/psi_e_serial_mod.f90
+++ b/base/modules/auxil/psi_e_serial_mod.f90
@@ -99,8 +99,8 @@ module psi_e_serial_mod
end subroutine psi_eaxpbyv2
end interface psb_geaxpby
- interface psi_upd_xyz
- subroutine psi_e_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
+ interface psi_abgdxyz
+ subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
implicit none
integer(psb_ipk_), intent(in) :: m
@@ -109,8 +109,8 @@ module psi_e_serial_mod
integer(psb_epk_), intent (inout) :: z(:)
integer(psb_epk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
- end subroutine psi_e_upd_xyz
- end interface psi_upd_xyz
+ end subroutine psi_eabgdxyz
+ end interface psi_abgdxyz
interface psi_xyzw
subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info)
diff --git a/base/modules/auxil/psi_i2_serial_mod.f90 b/base/modules/auxil/psi_i2_serial_mod.f90
index 770d3256..ffa14059 100644
--- a/base/modules/auxil/psi_i2_serial_mod.f90
+++ b/base/modules/auxil/psi_i2_serial_mod.f90
@@ -99,8 +99,8 @@ module psi_i2_serial_mod
end subroutine psi_i2axpbyv2
end interface psb_geaxpby
- interface psi_upd_xyz
- subroutine psi_i2_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
+ interface psi_abgdxyz
+ subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
implicit none
integer(psb_ipk_), intent(in) :: m
@@ -109,8 +109,8 @@ module psi_i2_serial_mod
integer(psb_i2pk_), intent (inout) :: z(:)
integer(psb_i2pk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
- end subroutine psi_i2_upd_xyz
- end interface psi_upd_xyz
+ end subroutine psi_i2abgdxyz
+ end interface psi_abgdxyz
interface psi_xyzw
subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info)
diff --git a/base/modules/auxil/psi_m_serial_mod.f90 b/base/modules/auxil/psi_m_serial_mod.f90
index 3583cccc..5661fdbf 100644
--- a/base/modules/auxil/psi_m_serial_mod.f90
+++ b/base/modules/auxil/psi_m_serial_mod.f90
@@ -99,8 +99,8 @@ module psi_m_serial_mod
end subroutine psi_maxpbyv2
end interface psb_geaxpby
- interface psi_upd_xyz
- subroutine psi_m_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
+ interface psi_abgdxyz
+ subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
implicit none
integer(psb_ipk_), intent(in) :: m
@@ -109,8 +109,8 @@ module psi_m_serial_mod
integer(psb_mpk_), intent (inout) :: z(:)
integer(psb_mpk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
- end subroutine psi_m_upd_xyz
- end interface psi_upd_xyz
+ end subroutine psi_mabgdxyz
+ end interface psi_abgdxyz
interface psi_xyzw
subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
diff --git a/base/modules/auxil/psi_s_serial_mod.f90 b/base/modules/auxil/psi_s_serial_mod.f90
index 3e0c6d91..5cc17d58 100644
--- a/base/modules/auxil/psi_s_serial_mod.f90
+++ b/base/modules/auxil/psi_s_serial_mod.f90
@@ -99,8 +99,8 @@ module psi_s_serial_mod
end subroutine psi_saxpbyv2
end interface psb_geaxpby
- interface psi_upd_xyz
- subroutine psi_s_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
+ interface psi_abgdxyz
+ subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_spk_
implicit none
integer(psb_ipk_), intent(in) :: m
@@ -109,8 +109,8 @@ module psi_s_serial_mod
real(psb_spk_), intent (inout) :: z(:)
real(psb_spk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
- end subroutine psi_s_upd_xyz
- end interface psi_upd_xyz
+ end subroutine psi_sabgdxyz
+ end interface psi_abgdxyz
interface psi_xyzw
subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
diff --git a/base/modules/auxil/psi_z_serial_mod.f90 b/base/modules/auxil/psi_z_serial_mod.f90
index a8ea734e..8a3f053d 100644
--- a/base/modules/auxil/psi_z_serial_mod.f90
+++ b/base/modules/auxil/psi_z_serial_mod.f90
@@ -99,8 +99,8 @@ module psi_z_serial_mod
end subroutine psi_zaxpbyv2
end interface psb_geaxpby
- interface psi_upd_xyz
- subroutine psi_z_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
+ interface psi_abgdxyz
+ subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_dpk_
implicit none
integer(psb_ipk_), intent(in) :: m
@@ -109,8 +109,8 @@ module psi_z_serial_mod
complex(psb_dpk_), intent (inout) :: z(:)
complex(psb_dpk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
- end subroutine psi_z_upd_xyz
- end interface psi_upd_xyz
+ end subroutine psi_zabgdxyz
+ end interface psi_abgdxyz
interface psi_xyzw
subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
diff --git a/base/modules/psblas/psb_c_psblas_mod.F90 b/base/modules/psblas/psb_c_psblas_mod.F90
index 591dec09..7f7f937c 100644
--- a/base/modules/psblas/psb_c_psblas_mod.F90
+++ b/base/modules/psblas/psb_c_psblas_mod.F90
@@ -143,8 +143,8 @@ module psb_c_psblas_mod
end subroutine psb_caxpby
end interface
- interface psb_upd_xyz
- subroutine psb_c_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
+ interface psb_abgdxyz
+ subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
import :: psb_desc_type, psb_spk_, psb_ipk_, &
& psb_c_vect_type, psb_cspmat_type
@@ -154,8 +154,8 @@ module psb_c_psblas_mod
complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta
type(psb_desc_type), intent (in) :: desc_a
integer(psb_ipk_), intent(out) :: info
- end subroutine psb_c_upd_xyz_vect
- end interface psb_upd_xyz
+ end subroutine psb_cabgdxyz_vect
+ end interface psb_abgdxyz
interface psb_geamax
function psb_camax(x, desc_a, info, jx,global)
diff --git a/base/modules/psblas/psb_d_psblas_mod.F90 b/base/modules/psblas/psb_d_psblas_mod.F90
index b200bc8a..12090956 100644
--- a/base/modules/psblas/psb_d_psblas_mod.F90
+++ b/base/modules/psblas/psb_d_psblas_mod.F90
@@ -143,8 +143,8 @@ module psb_d_psblas_mod
end subroutine psb_daxpby
end interface
- interface psb_upd_xyz
- subroutine psb_d_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
+ interface psb_abgdxyz
+ subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
import :: psb_desc_type, psb_dpk_, psb_ipk_, &
& psb_d_vect_type, psb_dspmat_type
@@ -154,8 +154,8 @@ module psb_d_psblas_mod
real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta
type(psb_desc_type), intent (in) :: desc_a
integer(psb_ipk_), intent(out) :: info
- end subroutine psb_d_upd_xyz_vect
- end interface psb_upd_xyz
+ end subroutine psb_dabgdxyz_vect
+ end interface psb_abgdxyz
interface psb_geamax
function psb_damax(x, desc_a, info, jx,global)
diff --git a/base/modules/psblas/psb_s_psblas_mod.F90 b/base/modules/psblas/psb_s_psblas_mod.F90
index a60da025..7a7ce783 100644
--- a/base/modules/psblas/psb_s_psblas_mod.F90
+++ b/base/modules/psblas/psb_s_psblas_mod.F90
@@ -143,8 +143,8 @@ module psb_s_psblas_mod
end subroutine psb_saxpby
end interface
- interface psb_upd_xyz
- subroutine psb_s_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
+ interface psb_abgdxyz
+ subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
import :: psb_desc_type, psb_spk_, psb_ipk_, &
& psb_s_vect_type, psb_sspmat_type
@@ -154,8 +154,8 @@ module psb_s_psblas_mod
real(psb_spk_), intent (in) :: alpha, beta, gamma, delta
type(psb_desc_type), intent (in) :: desc_a
integer(psb_ipk_), intent(out) :: info
- end subroutine psb_s_upd_xyz_vect
- end interface psb_upd_xyz
+ end subroutine psb_sabgdxyz_vect
+ end interface psb_abgdxyz
interface psb_geamax
function psb_samax(x, desc_a, info, jx,global)
diff --git a/base/modules/psblas/psb_z_psblas_mod.F90 b/base/modules/psblas/psb_z_psblas_mod.F90
index 241df2b9..bcfe9caa 100644
--- a/base/modules/psblas/psb_z_psblas_mod.F90
+++ b/base/modules/psblas/psb_z_psblas_mod.F90
@@ -143,8 +143,8 @@ module psb_z_psblas_mod
end subroutine psb_zaxpby
end interface
- interface psb_upd_xyz
- subroutine psb_z_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
+ interface psb_abgdxyz
+ subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
import :: psb_desc_type, psb_dpk_, psb_ipk_, &
& psb_z_vect_type, psb_zspmat_type
@@ -154,8 +154,8 @@ module psb_z_psblas_mod
complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta
type(psb_desc_type), intent (in) :: desc_a
integer(psb_ipk_), intent(out) :: info
- end subroutine psb_z_upd_xyz_vect
- end interface psb_upd_xyz
+ end subroutine psb_zabgdxyz_vect
+ end interface psb_abgdxyz
interface psb_geamax
function psb_zamax(x, desc_a, info, jx,global)
diff --git a/base/modules/serial/psb_c_base_vect_mod.F90 b/base/modules/serial/psb_c_base_vect_mod.F90
index 4dac86d6..41bab5ab 100644
--- a/base/modules/serial/psb_c_base_vect_mod.F90
+++ b/base/modules/serial/psb_c_base_vect_mod.F90
@@ -155,7 +155,7 @@ module psb_c_base_vect_mod
procedure, pass(z) :: axpby_v2 => c_base_axpby_v2
procedure, pass(z) :: axpby_a2 => c_base_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
- procedure, pass(z) :: upd_xyz => c_base_upd_xyz
+ procedure, pass(z) :: abgdxyz => c_base_abgdxyz
procedure, pass(w) :: xyzw => c_base_xyzw
!
@@ -1130,12 +1130,12 @@ contains
end subroutine c_base_axpby_a2
!
- ! UPD_XYZ is invoked via Z, hence the structure below.
+ ! ABGDXYZ is invoked via Z, hence the structure below.
!
!
- !> Function base_upd_xyz
+ !> Function base_abgdxyz
!! \memberof psb_c_base_vect_type
- !! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
+ !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param beta scalar beta
@@ -1146,7 +1146,7 @@ contains
!! \param z The class(base_vect) to be added
!! \param info return code
!!
- subroutine c_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
+ subroutine c_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@@ -1159,11 +1159,11 @@ contains
if (x%is_dev().and.(alpha/=czero)) call x%sync()
if (y%is_dev().and.(beta/=czero)) call y%sync()
if (z%is_dev().and.(delta/=czero)) call z%sync()
- call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
+ call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
call y%set_host()
call z%set_host()
- end subroutine c_base_upd_xyz
+ end subroutine c_base_abgdxyz
subroutine c_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info)
use psi_serial_mod
diff --git a/base/modules/serial/psb_c_vect_mod.F90 b/base/modules/serial/psb_c_vect_mod.F90
index 1e9510f2..865f9456 100644
--- a/base/modules/serial/psb_c_vect_mod.F90
+++ b/base/modules/serial/psb_c_vect_mod.F90
@@ -102,7 +102,7 @@ module psb_c_vect_mod
procedure, pass(z) :: axpby_v2 => c_vect_axpby_v2
procedure, pass(z) :: axpby_a2 => c_vect_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
- procedure, pass(z) :: upd_xyz => c_vect_upd_xyz
+ procedure, pass(z) :: abgdxyz => c_vect_abgdxyz
procedure, pass(z) :: xyzw => c_vect_xyzw
procedure, pass(y) :: mlt_v => c_vect_mlt_v
@@ -774,7 +774,7 @@ contains
end subroutine c_vect_axpby_a2
- subroutine c_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info)
+ subroutine c_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@@ -785,9 +785,9 @@ contains
integer(psb_ipk_), intent(out) :: info
if (allocated(z%v)) &
- call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
+ call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
- end subroutine c_vect_upd_xyz
+ end subroutine c_vect_abgdxyz
subroutine c_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info)
use psi_serial_mod
diff --git a/base/modules/serial/psb_d_base_vect_mod.F90 b/base/modules/serial/psb_d_base_vect_mod.F90
index f07b5aed..1ad1ffa5 100644
--- a/base/modules/serial/psb_d_base_vect_mod.F90
+++ b/base/modules/serial/psb_d_base_vect_mod.F90
@@ -155,7 +155,7 @@ module psb_d_base_vect_mod
procedure, pass(z) :: axpby_v2 => d_base_axpby_v2
procedure, pass(z) :: axpby_a2 => d_base_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
- procedure, pass(z) :: upd_xyz => d_base_upd_xyz
+ procedure, pass(z) :: abgdxyz => d_base_abgdxyz
procedure, pass(w) :: xyzw => d_base_xyzw
!
@@ -1137,12 +1137,12 @@ contains
end subroutine d_base_axpby_a2
!
- ! UPD_XYZ is invoked via Z, hence the structure below.
+ ! ABGDXYZ is invoked via Z, hence the structure below.
!
!
- !> Function base_upd_xyz
+ !> Function base_abgdxyz
!! \memberof psb_d_base_vect_type
- !! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
+ !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param beta scalar beta
@@ -1153,7 +1153,7 @@ contains
!! \param z The class(base_vect) to be added
!! \param info return code
!!
- subroutine d_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
+ subroutine d_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@@ -1166,11 +1166,11 @@ contains
if (x%is_dev().and.(alpha/=dzero)) call x%sync()
if (y%is_dev().and.(beta/=dzero)) call y%sync()
if (z%is_dev().and.(delta/=dzero)) call z%sync()
- call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
+ call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
call y%set_host()
call z%set_host()
- end subroutine d_base_upd_xyz
+ end subroutine d_base_abgdxyz
subroutine d_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info)
use psi_serial_mod
diff --git a/base/modules/serial/psb_d_vect_mod.F90 b/base/modules/serial/psb_d_vect_mod.F90
index ae3062dd..55dd8230 100644
--- a/base/modules/serial/psb_d_vect_mod.F90
+++ b/base/modules/serial/psb_d_vect_mod.F90
@@ -102,7 +102,7 @@ module psb_d_vect_mod
procedure, pass(z) :: axpby_v2 => d_vect_axpby_v2
procedure, pass(z) :: axpby_a2 => d_vect_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
- procedure, pass(z) :: upd_xyz => d_vect_upd_xyz
+ procedure, pass(z) :: abgdxyz => d_vect_abgdxyz
procedure, pass(z) :: xyzw => d_vect_xyzw
procedure, pass(y) :: mlt_v => d_vect_mlt_v
@@ -781,7 +781,7 @@ contains
end subroutine d_vect_axpby_a2
- subroutine d_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info)
+ subroutine d_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@@ -792,9 +792,9 @@ contains
integer(psb_ipk_), intent(out) :: info
if (allocated(z%v)) &
- call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
+ call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
- end subroutine d_vect_upd_xyz
+ end subroutine d_vect_abgdxyz
subroutine d_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info)
use psi_serial_mod
diff --git a/base/modules/serial/psb_s_base_vect_mod.F90 b/base/modules/serial/psb_s_base_vect_mod.F90
index 596cd634..26b82c31 100644
--- a/base/modules/serial/psb_s_base_vect_mod.F90
+++ b/base/modules/serial/psb_s_base_vect_mod.F90
@@ -155,7 +155,7 @@ module psb_s_base_vect_mod
procedure, pass(z) :: axpby_v2 => s_base_axpby_v2
procedure, pass(z) :: axpby_a2 => s_base_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
- procedure, pass(z) :: upd_xyz => s_base_upd_xyz
+ procedure, pass(z) :: abgdxyz => s_base_abgdxyz
procedure, pass(w) :: xyzw => s_base_xyzw
!
@@ -1137,12 +1137,12 @@ contains
end subroutine s_base_axpby_a2
!
- ! UPD_XYZ is invoked via Z, hence the structure below.
+ ! ABGDXYZ is invoked via Z, hence the structure below.
!
!
- !> Function base_upd_xyz
+ !> Function base_abgdxyz
!! \memberof psb_s_base_vect_type
- !! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
+ !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param beta scalar beta
@@ -1153,7 +1153,7 @@ contains
!! \param z The class(base_vect) to be added
!! \param info return code
!!
- subroutine s_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
+ subroutine s_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@@ -1166,11 +1166,11 @@ contains
if (x%is_dev().and.(alpha/=szero)) call x%sync()
if (y%is_dev().and.(beta/=szero)) call y%sync()
if (z%is_dev().and.(delta/=szero)) call z%sync()
- call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
+ call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
call y%set_host()
call z%set_host()
- end subroutine s_base_upd_xyz
+ end subroutine s_base_abgdxyz
subroutine s_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info)
use psi_serial_mod
diff --git a/base/modules/serial/psb_s_vect_mod.F90 b/base/modules/serial/psb_s_vect_mod.F90
index cad4659c..a50b2a0a 100644
--- a/base/modules/serial/psb_s_vect_mod.F90
+++ b/base/modules/serial/psb_s_vect_mod.F90
@@ -102,7 +102,7 @@ module psb_s_vect_mod
procedure, pass(z) :: axpby_v2 => s_vect_axpby_v2
procedure, pass(z) :: axpby_a2 => s_vect_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
- procedure, pass(z) :: upd_xyz => s_vect_upd_xyz
+ procedure, pass(z) :: abgdxyz => s_vect_abgdxyz
procedure, pass(z) :: xyzw => s_vect_xyzw
procedure, pass(y) :: mlt_v => s_vect_mlt_v
@@ -781,7 +781,7 @@ contains
end subroutine s_vect_axpby_a2
- subroutine s_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info)
+ subroutine s_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@@ -792,9 +792,9 @@ contains
integer(psb_ipk_), intent(out) :: info
if (allocated(z%v)) &
- call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
+ call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
- end subroutine s_vect_upd_xyz
+ end subroutine s_vect_abgdxyz
subroutine s_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info)
use psi_serial_mod
diff --git a/base/modules/serial/psb_z_base_vect_mod.F90 b/base/modules/serial/psb_z_base_vect_mod.F90
index 1bbdfba1..a3afc9c1 100644
--- a/base/modules/serial/psb_z_base_vect_mod.F90
+++ b/base/modules/serial/psb_z_base_vect_mod.F90
@@ -155,7 +155,7 @@ module psb_z_base_vect_mod
procedure, pass(z) :: axpby_v2 => z_base_axpby_v2
procedure, pass(z) :: axpby_a2 => z_base_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
- procedure, pass(z) :: upd_xyz => z_base_upd_xyz
+ procedure, pass(z) :: abgdxyz => z_base_abgdxyz
procedure, pass(w) :: xyzw => z_base_xyzw
!
@@ -1130,12 +1130,12 @@ contains
end subroutine z_base_axpby_a2
!
- ! UPD_XYZ is invoked via Z, hence the structure below.
+ ! ABGDXYZ is invoked via Z, hence the structure below.
!
!
- !> Function base_upd_xyz
+ !> Function base_abgdxyz
!! \memberof psb_z_base_vect_type
- !! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
+ !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param beta scalar beta
@@ -1146,7 +1146,7 @@ contains
!! \param z The class(base_vect) to be added
!! \param info return code
!!
- subroutine z_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
+ subroutine z_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@@ -1159,11 +1159,11 @@ contains
if (x%is_dev().and.(alpha/=zzero)) call x%sync()
if (y%is_dev().and.(beta/=zzero)) call y%sync()
if (z%is_dev().and.(delta/=zzero)) call z%sync()
- call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
+ call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
call y%set_host()
call z%set_host()
- end subroutine z_base_upd_xyz
+ end subroutine z_base_abgdxyz
subroutine z_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info)
use psi_serial_mod
diff --git a/base/modules/serial/psb_z_vect_mod.F90 b/base/modules/serial/psb_z_vect_mod.F90
index 48f2e947..21e0c546 100644
--- a/base/modules/serial/psb_z_vect_mod.F90
+++ b/base/modules/serial/psb_z_vect_mod.F90
@@ -102,7 +102,7 @@ module psb_z_vect_mod
procedure, pass(z) :: axpby_v2 => z_vect_axpby_v2
procedure, pass(z) :: axpby_a2 => z_vect_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
- procedure, pass(z) :: upd_xyz => z_vect_upd_xyz
+ procedure, pass(z) :: abgdxyz => z_vect_abgdxyz
procedure, pass(z) :: xyzw => z_vect_xyzw
procedure, pass(y) :: mlt_v => z_vect_mlt_v
@@ -774,7 +774,7 @@ contains
end subroutine z_vect_axpby_a2
- subroutine z_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info)
+ subroutine z_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@@ -785,9 +785,9 @@ contains
integer(psb_ipk_), intent(out) :: info
if (allocated(z%v)) &
- call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
+ call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
- end subroutine z_vect_upd_xyz
+ end subroutine z_vect_abgdxyz
subroutine z_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info)
use psi_serial_mod
diff --git a/base/psblas/psb_caxpby.f90 b/base/psblas/psb_caxpby.f90
index 5d80ef00..7c22bb06 100644
--- a/base/psblas/psb_caxpby.f90
+++ b/base/psblas/psb_caxpby.f90
@@ -743,9 +743,9 @@ subroutine psb_caddconst_vect(x,b,z,desc_a,info)
end subroutine psb_caddconst_vect
-subroutine psb_c_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
+subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
- use psb_base_mod, psb_protect_name => psb_c_upd_xyz_vect
+ use psb_base_mod, psb_protect_name => psb_cabgdxyz_vect
implicit none
type(psb_c_vect_type), intent (inout) :: x
type(psb_c_vect_type), intent (inout) :: y
@@ -812,7 +812,7 @@ subroutine psb_c_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
end if
if(desc_a%get_local_rows() > 0) then
- call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info)
+ call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info)
end if
call psb_erractionrestore(err_act)
@@ -822,5 +822,5 @@ subroutine psb_c_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
return
-end subroutine psb_c_upd_xyz_vect
+end subroutine psb_cabgdxyz_vect
diff --git a/base/psblas/psb_daxpby.f90 b/base/psblas/psb_daxpby.f90
index 38ebe465..1de77647 100644
--- a/base/psblas/psb_daxpby.f90
+++ b/base/psblas/psb_daxpby.f90
@@ -743,9 +743,9 @@ subroutine psb_daddconst_vect(x,b,z,desc_a,info)
end subroutine psb_daddconst_vect
-subroutine psb_d_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
+subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
- use psb_base_mod, psb_protect_name => psb_d_upd_xyz_vect
+ use psb_base_mod, psb_protect_name => psb_dabgdxyz_vect
implicit none
type(psb_d_vect_type), intent (inout) :: x
type(psb_d_vect_type), intent (inout) :: y
@@ -812,7 +812,7 @@ subroutine psb_d_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
end if
if(desc_a%get_local_rows() > 0) then
- call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info)
+ call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info)
end if
call psb_erractionrestore(err_act)
@@ -822,5 +822,5 @@ subroutine psb_d_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
return
-end subroutine psb_d_upd_xyz_vect
+end subroutine psb_dabgdxyz_vect
diff --git a/base/psblas/psb_saxpby.f90 b/base/psblas/psb_saxpby.f90
index 0055fdbe..1b1f24e6 100644
--- a/base/psblas/psb_saxpby.f90
+++ b/base/psblas/psb_saxpby.f90
@@ -743,9 +743,9 @@ subroutine psb_saddconst_vect(x,b,z,desc_a,info)
end subroutine psb_saddconst_vect
-subroutine psb_s_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
+subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
- use psb_base_mod, psb_protect_name => psb_s_upd_xyz_vect
+ use psb_base_mod, psb_protect_name => psb_sabgdxyz_vect
implicit none
type(psb_s_vect_type), intent (inout) :: x
type(psb_s_vect_type), intent (inout) :: y
@@ -812,7 +812,7 @@ subroutine psb_s_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
end if
if(desc_a%get_local_rows() > 0) then
- call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info)
+ call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info)
end if
call psb_erractionrestore(err_act)
@@ -822,5 +822,5 @@ subroutine psb_s_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
return
-end subroutine psb_s_upd_xyz_vect
+end subroutine psb_sabgdxyz_vect
diff --git a/base/psblas/psb_zaxpby.f90 b/base/psblas/psb_zaxpby.f90
index e93488e3..0f37a1f4 100644
--- a/base/psblas/psb_zaxpby.f90
+++ b/base/psblas/psb_zaxpby.f90
@@ -743,9 +743,9 @@ subroutine psb_zaddconst_vect(x,b,z,desc_a,info)
end subroutine psb_zaddconst_vect
-subroutine psb_z_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
+subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
- use psb_base_mod, psb_protect_name => psb_z_upd_xyz_vect
+ use psb_base_mod, psb_protect_name => psb_zabgdxyz_vect
implicit none
type(psb_z_vect_type), intent (inout) :: x
type(psb_z_vect_type), intent (inout) :: y
@@ -812,7 +812,7 @@ subroutine psb_z_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
end if
if(desc_a%get_local_rows() > 0) then
- call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info)
+ call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info)
end if
call psb_erractionrestore(err_act)
@@ -822,5 +822,5 @@ subroutine psb_z_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
return
-end subroutine psb_z_upd_xyz_vect
+end subroutine psb_zabgdxyz_vect
diff --git a/base/serial/psi_c_serial_impl.F90 b/base/serial/psi_c_serial_impl.F90
index e3f1d9a3..e230a1e0 100644
--- a/base/serial/psi_c_serial_impl.F90
+++ b/base/serial/psi_c_serial_impl.F90
@@ -1568,7 +1568,7 @@ subroutine caxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
end subroutine caxpbyv2
-subroutine psi_c_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
+subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
@@ -1582,7 +1582,7 @@ subroutine psi_c_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
- name='c_upd_xyz'
+ name='cabgdxyz'
info = psb_success_
if (m.lt.0) then
@@ -1791,7 +1791,7 @@ subroutine psi_c_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
call fcpsb_serror()
return
-end subroutine psi_c_upd_xyz
+end subroutine psi_cabgdxyz
subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
@@ -1808,7 +1808,7 @@ subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
- name='c_xyzw'
+ name='cabgdxyz'
info = psb_success_
if (m.lt.0) then
diff --git a/base/serial/psi_d_serial_impl.F90 b/base/serial/psi_d_serial_impl.F90
index d6a9a31d..bf1b2917 100644
--- a/base/serial/psi_d_serial_impl.F90
+++ b/base/serial/psi_d_serial_impl.F90
@@ -1568,7 +1568,7 @@ subroutine daxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
end subroutine daxpbyv2
-subroutine psi_d_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
+subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
@@ -1582,7 +1582,7 @@ subroutine psi_d_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
- name='d_upd_xyz'
+ name='dabgdxyz'
info = psb_success_
if (m.lt.0) then
@@ -1791,7 +1791,7 @@ subroutine psi_d_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
call fcpsb_serror()
return
-end subroutine psi_d_upd_xyz
+end subroutine psi_dabgdxyz
subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
@@ -1808,7 +1808,7 @@ subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
- name='d_xyzw'
+ name='dabgdxyz'
info = psb_success_
if (m.lt.0) then
diff --git a/base/serial/psi_e_serial_impl.F90 b/base/serial/psi_e_serial_impl.F90
index 37b11a94..911ab4ec 100644
--- a/base/serial/psi_e_serial_impl.F90
+++ b/base/serial/psi_e_serial_impl.F90
@@ -1568,7 +1568,7 @@ subroutine eaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
end subroutine eaxpbyv2
-subroutine psi_e_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
+subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
@@ -1582,7 +1582,7 @@ subroutine psi_e_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
- name='e_upd_xyz'
+ name='eabgdxyz'
info = psb_success_
if (m.lt.0) then
@@ -1791,7 +1791,7 @@ subroutine psi_e_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
call fcpsb_serror()
return
-end subroutine psi_e_upd_xyz
+end subroutine psi_eabgdxyz
subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
@@ -1808,7 +1808,7 @@ subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
- name='e_xyzw'
+ name='eabgdxyz'
info = psb_success_
if (m.lt.0) then
diff --git a/base/serial/psi_i2_serial_impl.F90 b/base/serial/psi_i2_serial_impl.F90
index c20cd60b..fb42dfcd 100644
--- a/base/serial/psi_i2_serial_impl.F90
+++ b/base/serial/psi_i2_serial_impl.F90
@@ -1568,7 +1568,7 @@ subroutine i2axpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
end subroutine i2axpbyv2
-subroutine psi_i2_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
+subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
@@ -1582,7 +1582,7 @@ subroutine psi_i2_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
- name='i2_upd_xyz'
+ name='i2abgdxyz'
info = psb_success_
if (m.lt.0) then
@@ -1791,7 +1791,7 @@ subroutine psi_i2_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
call fcpsb_serror()
return
-end subroutine psi_i2_upd_xyz
+end subroutine psi_i2abgdxyz
subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
@@ -1808,7 +1808,7 @@ subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
- name='i2_xyzw'
+ name='i2abgdxyz'
info = psb_success_
if (m.lt.0) then
diff --git a/base/serial/psi_m_serial_impl.F90 b/base/serial/psi_m_serial_impl.F90
index 55913a16..346fd897 100644
--- a/base/serial/psi_m_serial_impl.F90
+++ b/base/serial/psi_m_serial_impl.F90
@@ -1568,7 +1568,7 @@ subroutine maxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
end subroutine maxpbyv2
-subroutine psi_m_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
+subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
@@ -1582,7 +1582,7 @@ subroutine psi_m_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
- name='m_upd_xyz'
+ name='mabgdxyz'
info = psb_success_
if (m.lt.0) then
@@ -1791,7 +1791,7 @@ subroutine psi_m_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
call fcpsb_serror()
return
-end subroutine psi_m_upd_xyz
+end subroutine psi_mabgdxyz
subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
@@ -1808,7 +1808,7 @@ subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
- name='m_xyzw'
+ name='mabgdxyz'
info = psb_success_
if (m.lt.0) then
diff --git a/base/serial/psi_s_serial_impl.F90 b/base/serial/psi_s_serial_impl.F90
index c3846c8e..52f86bcd 100644
--- a/base/serial/psi_s_serial_impl.F90
+++ b/base/serial/psi_s_serial_impl.F90
@@ -1568,7 +1568,7 @@ subroutine saxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
end subroutine saxpbyv2
-subroutine psi_s_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
+subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
@@ -1582,7 +1582,7 @@ subroutine psi_s_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
- name='s_upd_xyz'
+ name='sabgdxyz'
info = psb_success_
if (m.lt.0) then
@@ -1791,7 +1791,7 @@ subroutine psi_s_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
call fcpsb_serror()
return
-end subroutine psi_s_upd_xyz
+end subroutine psi_sabgdxyz
subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
@@ -1808,7 +1808,7 @@ subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
- name='s_xyzw'
+ name='sabgdxyz'
info = psb_success_
if (m.lt.0) then
diff --git a/base/serial/psi_z_serial_impl.F90 b/base/serial/psi_z_serial_impl.F90
index 763eae22..7e680273 100644
--- a/base/serial/psi_z_serial_impl.F90
+++ b/base/serial/psi_z_serial_impl.F90
@@ -1568,7 +1568,7 @@ subroutine zaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
end subroutine zaxpbyv2
-subroutine psi_z_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
+subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
@@ -1582,7 +1582,7 @@ subroutine psi_z_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
- name='z_upd_xyz'
+ name='zabgdxyz'
info = psb_success_
if (m.lt.0) then
@@ -1791,7 +1791,7 @@ subroutine psi_z_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
call fcpsb_serror()
return
-end subroutine psi_z_upd_xyz
+end subroutine psi_zabgdxyz
subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
@@ -1808,7 +1808,7 @@ subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
- name='z_xyzw'
+ name='zabgdxyz'
info = psb_success_
if (m.lt.0) then
diff --git a/cuda/psb_c_cuda_vect_mod.F90 b/cuda/psb_c_cuda_vect_mod.F90
index 45fafe0a..2c2a4f61 100644
--- a/cuda/psb_c_cuda_vect_mod.F90
+++ b/cuda/psb_c_cuda_vect_mod.F90
@@ -90,7 +90,7 @@ module psb_c_cuda_vect_mod
procedure, pass(x) :: dot_a => c_cuda_dot_a
procedure, pass(y) :: axpby_v => c_cuda_axpby_v
procedure, pass(y) :: axpby_a => c_cuda_axpby_a
- procedure, pass(z) :: upd_xyz => c_cuda_upd_xyz
+ procedure, pass(z) :: abgdxyz => c_cuda_abgdxyz
procedure, pass(y) :: mlt_v => c_cuda_mlt_v
procedure, pass(y) :: mlt_a => c_cuda_mlt_a
procedure, pass(z) :: mlt_a_2 => c_cuda_mlt_a_2
@@ -912,7 +912,7 @@ contains
end subroutine c_cuda_axpby_v
- subroutine c_cuda_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
+ subroutine c_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@@ -946,7 +946,7 @@ contains
if ((nx d_cuda_dot_a
procedure, pass(y) :: axpby_v => d_cuda_axpby_v
procedure, pass(y) :: axpby_a => d_cuda_axpby_a
- procedure, pass(z) :: upd_xyz => d_cuda_upd_xyz
+ procedure, pass(z) :: abgdxyz => d_cuda_abgdxyz
procedure, pass(y) :: mlt_v => d_cuda_mlt_v
procedure, pass(y) :: mlt_a => d_cuda_mlt_a
procedure, pass(z) :: mlt_a_2 => d_cuda_mlt_a_2
@@ -912,7 +912,7 @@ contains
end subroutine d_cuda_axpby_v
- subroutine d_cuda_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
+ subroutine d_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@@ -946,7 +946,7 @@ contains
if ((nx s_cuda_dot_a
procedure, pass(y) :: axpby_v => s_cuda_axpby_v
procedure, pass(y) :: axpby_a => s_cuda_axpby_a
- procedure, pass(z) :: upd_xyz => s_cuda_upd_xyz
+ procedure, pass(z) :: abgdxyz => s_cuda_abgdxyz
procedure, pass(y) :: mlt_v => s_cuda_mlt_v
procedure, pass(y) :: mlt_a => s_cuda_mlt_a
procedure, pass(z) :: mlt_a_2 => s_cuda_mlt_a_2
@@ -912,7 +912,7 @@ contains
end subroutine s_cuda_axpby_v
- subroutine s_cuda_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
+ subroutine s_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@@ -946,7 +946,7 @@ contains
if ((nx z_cuda_dot_a
procedure, pass(y) :: axpby_v => z_cuda_axpby_v
procedure, pass(y) :: axpby_a => z_cuda_axpby_a
- procedure, pass(z) :: upd_xyz => z_cuda_upd_xyz
+ procedure, pass(z) :: abgdxyz => z_cuda_abgdxyz
procedure, pass(y) :: mlt_v => z_cuda_mlt_v
procedure, pass(y) :: mlt_a => z_cuda_mlt_a
procedure, pass(z) :: mlt_a_2 => z_cuda_mlt_a_2
@@ -912,7 +912,7 @@ contains
end subroutine z_cuda_axpby_v
- subroutine z_cuda_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
+ subroutine z_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@@ -946,7 +946,7 @@ contains
if ((nx x)
+ type is (psb_c_vect_oacc)
+ select type(yy => y)
+ type is (psb_c_vect_oacc)
+ if (xx%is_host()) call xx%sync()
+ if (beta /= dzero) then
+ if (yy%is_host()) call yy%sync()
+ end if
+ nzt = a%nzt
+ !$acc parallel loop present(a, xx, yy)
+ do i = 1, size(a%val, 1)
+ do j = 1, nzt
+ yy%v(i) = alpha * a%val(i, j) * xx%v(a%ja(i, j)) + beta * yy%v(i)
+ end do
+ end do
+ call yy%set_dev()
+ class default
+ rx = xx%get_vect()
+ ry = y%get_vect()
+ call a%psb_c_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
+ call y%bld(ry)
+ end select
+ class default
+ rx = x%get_vect()
+ ry = y%get_vect()
+ call a%psb_c_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
+ call y%bld(ry)
+ end select
+ endif
+
+ if (info /= psb_success_) then
+ info = psb_err_from_subroutine_
+ call psb_errpush(info, name, a_err = 'ell_vect_sv')
+ goto 9999
+ endif
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+ end subroutine psb_c_oacc_ell_inner_vect_sv
+end submodule psb_c_oacc_ell_inner_vect_sv_impl
diff --git a/openacc/impl/psb_c_oacc_ell_mold.F90 b/openacc/impl/psb_c_oacc_ell_mold.F90
new file mode 100644
index 00000000..88331d1d
--- /dev/null
+++ b/openacc/impl/psb_c_oacc_ell_mold.F90
@@ -0,0 +1,34 @@
+submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_mold_impl
+ use psb_base_mod
+contains
+ module subroutine psb_c_oacc_ell_mold(a, b, info)
+ implicit none
+ class(psb_c_oacc_ell_sparse_mat), intent(in) :: a
+ class(psb_c_base_sparse_mat), intent(inout), allocatable :: b
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 'ell_mold'
+ logical, parameter :: debug = .false.
+
+ call psb_get_erraction(err_act)
+
+ info = 0
+ if (allocated(b)) then
+ call b%free()
+ deallocate(b, stat=info)
+ end if
+ if (info == 0) allocate(psb_c_oacc_ell_sparse_mat :: b, stat=info)
+
+ if (info /= psb_success_) then
+ info = psb_err_alloc_dealloc_
+ call psb_errpush(info, name)
+ goto 9999
+ end if
+ return
+
+9999 call psb_error_handler(err_act)
+
+ return
+
+ end subroutine psb_c_oacc_ell_mold
+end submodule psb_c_oacc_ell_mold_impl
diff --git a/openacc/impl/psb_c_oacc_ell_mv_from_coo.F90 b/openacc/impl/psb_c_oacc_ell_mv_from_coo.F90
new file mode 100644
index 00000000..7e703aa2
--- /dev/null
+++ b/openacc/impl/psb_c_oacc_ell_mv_from_coo.F90
@@ -0,0 +1,25 @@
+submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_mv_from_coo_impl
+ use psb_base_mod
+contains
+ module subroutine psb_c_oacc_ell_mv_from_coo(a, b, info)
+ implicit none
+
+ class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
+ class(psb_c_coo_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ info = psb_success_
+
+ call a%psb_c_ell_sparse_mat%mv_from_coo(b, info)
+ if (info /= 0) goto 9999
+
+ !$acc update device(a%val, a%ja, a%irn, a%idiag)
+
+ return
+
+9999 continue
+ info = psb_err_alloc_dealloc_
+ return
+
+ end subroutine psb_c_oacc_ell_mv_from_coo
+end submodule psb_c_oacc_ell_mv_from_coo_impl
diff --git a/openacc/impl/psb_c_oacc_ell_mv_from_fmt.F90 b/openacc/impl/psb_c_oacc_ell_mv_from_fmt.F90
new file mode 100644
index 00000000..7d1f790d
--- /dev/null
+++ b/openacc/impl/psb_c_oacc_ell_mv_from_fmt.F90
@@ -0,0 +1,24 @@
+submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_mv_from_fmt_impl
+ use psb_base_mod
+contains
+ module subroutine psb_c_oacc_ell_mv_from_fmt(a, b, info)
+ implicit none
+
+ class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
+ class(psb_c_base_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ info = psb_success_
+
+ select type(b)
+ type is (psb_c_coo_sparse_mat)
+ call a%mv_from_coo(b, info)
+ class default
+ call a%psb_c_ell_sparse_mat%mv_from_fmt(b, info)
+ if (info /= 0) return
+
+ !$acc update device(a%val, a%ja, a%irn, a%idiag)
+ end select
+
+ end subroutine psb_c_oacc_ell_mv_from_fmt
+end submodule psb_c_oacc_ell_mv_from_fmt_impl
diff --git a/openacc/impl/psb_c_oacc_ell_reallocate_nz.F90 b/openacc/impl/psb_c_oacc_ell_reallocate_nz.F90
new file mode 100644
index 00000000..9f21c5df
--- /dev/null
+++ b/openacc/impl/psb_c_oacc_ell_reallocate_nz.F90
@@ -0,0 +1,28 @@
+submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_reallocate_nz_impl
+ use psb_base_mod
+contains
+ module subroutine psb_c_oacc_ell_reallocate_nz(nz, a)
+ implicit none
+ integer(psb_ipk_), intent(in) :: nz
+ class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name='c_oacc_ell_reallocate_nz'
+ logical, parameter :: debug=.false.
+
+ call psb_erractionsave(err_act)
+ info = psb_success_
+
+ call a%psb_c_ell_sparse_mat%reallocate(nz)
+
+ call a%set_dev()
+ if (info /= 0) goto 9999
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_c_oacc_ell_reallocate_nz
+end submodule psb_c_oacc_ell_reallocate_nz_impl
diff --git a/openacc/impl/psb_c_oacc_ell_scal.F90 b/openacc/impl/psb_c_oacc_ell_scal.F90
new file mode 100644
index 00000000..b3ea90fb
--- /dev/null
+++ b/openacc/impl/psb_c_oacc_ell_scal.F90
@@ -0,0 +1,58 @@
+submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_scal_impl
+ use psb_base_mod
+contains
+ module subroutine psb_c_oacc_ell_scal(d, a, info, side)
+ implicit none
+ class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
+ complex(psb_spk_), intent(in) :: d(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, intent(in), optional :: side
+
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name='scal'
+ logical, parameter :: debug=.false.
+ integer(psb_ipk_) :: i, j, m, nzt
+
+ info = psb_success_
+ call psb_erractionsave(err_act)
+
+ if (a%is_host()) call a%sync()
+
+ m = a%get_nrows()
+ nzt = a%nzt
+
+ if (present(side)) then
+ if (side == 'L') then
+ !$acc parallel loop collapse(2) present(a, d)
+ do i = 1, m
+ do j = 1, nzt
+ a%val(i, j) = a%val(i, j) * d(i)
+ end do
+ end do
+ else if (side == 'R') then
+ !$acc parallel loop collapse(2) present(a, d)
+ do i = 1, m
+ do j = 1, nzt
+ a%val(i, j) = a%val(i, j) * d(a%ja(i, j))
+ end do
+ end do
+ end if
+ else
+ !$acc parallel loop collapse(2) present(a, d)
+ do i = 1, m
+ do j = 1, nzt
+ a%val(i, j) = a%val(i, j) * d(j)
+ end do
+ end do
+ end if
+
+ call a%set_dev()
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_c_oacc_ell_scal
+end submodule psb_c_oacc_ell_scal_impl
diff --git a/openacc/impl/psb_c_oacc_ell_scals.F90 b/openacc/impl/psb_c_oacc_ell_scals.F90
new file mode 100644
index 00000000..f067f253
--- /dev/null
+++ b/openacc/impl/psb_c_oacc_ell_scals.F90
@@ -0,0 +1,39 @@
+submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_scals_impl
+ use psb_base_mod
+contains
+ module subroutine psb_c_oacc_ell_scals(d, a, info)
+ implicit none
+ class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
+ complex(psb_spk_), intent(in) :: d
+ integer(psb_ipk_), intent(out) :: info
+
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name='scal'
+ logical, parameter :: debug=.false.
+ integer(psb_ipk_) :: i, j, nzt, m
+
+ info = psb_success_
+ call psb_erractionsave(err_act)
+
+ if (a%is_host()) call a%sync()
+
+ m = a%get_nrows()
+ nzt = a%nzt
+
+ !$acc parallel loop collapse(2) present(a)
+ do i = 1, m
+ do j = 1, nzt
+ a%val(i, j) = a%val(i, j) * d
+ end do
+ end do
+
+ call a%set_dev()
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_c_oacc_ell_scals
+end submodule psb_c_oacc_ell_scals_impl
diff --git a/openacc/impl/psb_c_oacc_ell_vect_mv.F90 b/openacc/impl/psb_c_oacc_ell_vect_mv.F90
new file mode 100644
index 00000000..e65d00ba
--- /dev/null
+++ b/openacc/impl/psb_c_oacc_ell_vect_mv.F90
@@ -0,0 +1,66 @@
+submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_vect_mv_impl
+ use psb_base_mod
+contains
+ module subroutine psb_c_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans)
+ implicit none
+
+ complex(psb_spk_), intent(in) :: alpha, beta
+ class(psb_c_oacc_ell_sparse_mat), intent(in) :: a
+ class(psb_c_base_vect_type), intent(inout) :: x, y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ integer(psb_ipk_) :: m, n, nzt
+
+ info = psb_success_
+ m = a%get_nrows()
+ n = a%get_ncols()
+ nzt = a%nzt
+
+ if ((n /= size(x%v)) .or. (m /= size(y%v))) then
+ write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
+ info = psb_err_invalid_mat_state_
+ return
+ end if
+
+ if (a%is_host()) call a%sync()
+ if (x%is_host()) call x%sync()
+ if (y%is_host()) call y%sync()
+
+ call inner_spmv(m, n, nzt, alpha, a%val, a%ja, x%v, beta, y%v, info)
+ call y%set_dev()
+
+ contains
+
+ subroutine inner_spmv(m, n, nzt, alpha, val, ja, x, beta, y, info)
+ implicit none
+ integer(psb_ipk_) :: m, n, nzt
+ complex(psb_spk_), intent(in) :: alpha, beta
+ complex(psb_spk_) :: val(:,:), x(:), y(:)
+ integer(psb_ipk_) :: ja(:,:)
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: i, j, ii, isz
+ complex(psb_spk_) :: tmp
+ integer(psb_ipk_), parameter :: vsz = 256
+
+ info = 0
+
+ !$acc parallel loop vector_length(vsz) private(isz)
+ do ii = 1, m, vsz
+ isz = min(vsz, m - ii + 1)
+ !$acc loop independent private(tmp)
+ do i = ii, ii + isz - 1
+ tmp = 0.0_psb_dpk_
+ !$acc loop seq
+ do j = 1, nzt
+ if (ja(i,j) > 0) then
+ tmp = tmp + val(i,j) * x(ja(i,j))
+ end if
+ end do
+ y(i) = alpha * tmp + beta * y(i)
+ end do
+ end do
+ end subroutine inner_spmv
+
+ end subroutine psb_c_oacc_ell_vect_mv
+end submodule psb_c_oacc_ell_vect_mv_impl
diff --git a/openacc/impl/psb_c_oacc_hll_allocate_mnnz.F90 b/openacc/impl/psb_c_oacc_hll_allocate_mnnz.F90
new file mode 100644
index 00000000..0840d0d6
--- /dev/null
+++ b/openacc/impl/psb_c_oacc_hll_allocate_mnnz.F90
@@ -0,0 +1,53 @@
+submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_allocate_mnnz_impl
+ use psb_base_mod
+contains
+ module subroutine psb_c_oacc_hll_allocate_mnnz(m, n, a, nz)
+ implicit none
+ integer(psb_ipk_), intent(in) :: m, n
+ class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_), intent(in), optional :: nz
+ integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: err_act, nz_
+ character(len=20) :: name='allocate_mnnz'
+ logical, parameter :: debug=.false.
+ integer(psb_ipk_) :: hksz, nhacks
+
+ call psb_erractionsave(err_act)
+ info = psb_success_
+
+ if (present(nz)) then
+ nz_ = nz
+ else
+ nz_ = 10
+ end if
+
+ call a%psb_c_hll_sparse_mat%allocate(m, n, nz_)
+
+ hksz = a%hksz
+ nhacks = (m + hksz - 1) / hksz
+
+ if (.not.allocated(a%val)) then
+ allocate(a%val(nz_ * m))
+ allocate(a%ja(nz_ * m))
+ allocate(a%irn(m))
+ allocate(a%idiag(m))
+ allocate(a%hkoffs(nhacks))
+ end if
+
+ a%val = czero
+ a%ja = -1
+ a%irn = 0
+ a%idiag = 0
+ a%hkoffs = 0
+
+ call a%set_dev()
+ if (info /= 0) goto 9999
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_c_oacc_hll_allocate_mnnz
+end submodule psb_c_oacc_hll_allocate_mnnz_impl
diff --git a/openacc/impl/psb_c_oacc_hll_cp_from_coo.F90 b/openacc/impl/psb_c_oacc_hll_cp_from_coo.F90
new file mode 100644
index 00000000..4c12cdf8
--- /dev/null
+++ b/openacc/impl/psb_c_oacc_hll_cp_from_coo.F90
@@ -0,0 +1,85 @@
+submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_cp_from_coo_impl
+ use psb_base_mod
+contains
+ module subroutine psb_c_oacc_hll_cp_from_coo(a, b, info)
+ implicit none
+
+ class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
+ class(psb_c_coo_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ integer(psb_ipk_) :: i, j, k, row, col, nz_per_row
+ complex(psb_spk_) :: value
+ integer(psb_ipk_), allocatable :: row_counts(:)
+ integer(psb_ipk_) :: hacksize, nza
+
+ info = psb_success_
+ hacksize = 32 ! Assuming a default hack size of 32
+
+ call a%set_nrows(b%get_nrows())
+ call a%set_ncols(b%get_ncols())
+ nz_per_row = a%nzt
+
+ if (.not.allocated(a%val)) then
+ allocate(a%val(nz_per_row * a%get_nrows()))
+ allocate(a%ja(nz_per_row * a%get_nrows()))
+ allocate(a%irn(a%get_nrows()))
+ allocate(a%idiag(a%get_nrows()))
+ allocate(a%hkoffs((a%get_nrows() + hacksize - 1) / hacksize))
+ end if
+ a%val = czero
+ a%ja = -1
+ a%irn = 0
+ a%idiag = 0
+
+ allocate(row_counts(a%get_nrows()))
+ row_counts = 0
+
+ nza = b%get_nzeros()
+
+ !$acc parallel loop present(b, a, row_counts)
+ do k = 1, nza
+ row = b%ia(k)
+ col = b%ja(k)
+ value = b%val(k)
+ if (row_counts(row) < nz_per_row) then
+ a%val(row_counts(row) + 1 + (row - 1) * nz_per_row) = value
+ a%ja(row_counts(row) + 1 + (row - 1) * nz_per_row) = col
+ row_counts(row) = row_counts(row) + 1
+ else
+ info = psb_err_invalid_mat_state_
+ !goto 9999
+ end if
+ end do
+
+ a%irn = row_counts
+
+ !$acc parallel loop present(a)
+ do i = 1, a%get_nrows()
+ do j = 1, nz_per_row
+ if (a%ja(j + (i - 1) * nz_per_row) == i) then
+ a%idiag(i) = j
+ exit
+ end if
+ end do
+ end do
+
+ ! Calculate hkoffs for HLL format
+ !$acc parallel loop present(a)
+ do i = 1, size(a%hkoffs)
+ a%hkoffs(i) = (i - 1) * hacksize
+ end do
+
+ deallocate(row_counts)
+
+ call a%set_dev()
+ if (info /= 0) goto 9999
+
+ return
+
+9999 continue
+ info = psb_err_alloc_dealloc_
+ return
+
+ end subroutine psb_c_oacc_hll_cp_from_coo
+end submodule psb_c_oacc_hll_cp_from_coo_impl
diff --git a/openacc/impl/psb_c_oacc_hll_cp_from_fmt.F90 b/openacc/impl/psb_c_oacc_hll_cp_from_fmt.F90
new file mode 100644
index 00000000..af6cc1d5
--- /dev/null
+++ b/openacc/impl/psb_c_oacc_hll_cp_from_fmt.F90
@@ -0,0 +1,24 @@
+submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_cp_from_fmt_impl
+ use psb_base_mod
+contains
+ module subroutine psb_c_oacc_hll_cp_from_fmt(a, b, info)
+ implicit none
+
+ class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
+ class(psb_c_base_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ info = psb_success_
+
+ select type(b)
+ type is (psb_c_coo_sparse_mat)
+ call a%cp_from_coo(b, info)
+ class default
+ call a%psb_c_hll_sparse_mat%cp_from_fmt(b, info)
+ if (info /= 0) return
+
+ !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs)
+ end select
+
+ end subroutine psb_c_oacc_hll_cp_from_fmt
+end submodule psb_c_oacc_hll_cp_from_fmt_impl
diff --git a/openacc/impl/psb_c_oacc_hll_csmm.F90 b/openacc/impl/psb_c_oacc_hll_csmm.F90
new file mode 100644
index 00000000..6b0fc637
--- /dev/null
+++ b/openacc/impl/psb_c_oacc_hll_csmm.F90
@@ -0,0 +1,86 @@
+submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_csmm_impl
+ use psb_base_mod
+contains
+ module subroutine psb_c_oacc_hll_csmm(alpha, a, x, beta, y, info, trans)
+ implicit none
+ class(psb_c_oacc_hll_sparse_mat), intent(in) :: a
+ complex(psb_spk_), intent(in) :: alpha, beta
+ complex(psb_spk_), intent(in) :: x(:,:)
+ complex(psb_spk_), intent(inout) :: y(:,:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ character :: trans_
+ integer(psb_ipk_) :: i, j, m, n, k, nxy, nhacks
+ logical :: tra
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 'c_oacc_hll_csmm'
+ logical, parameter :: debug = .false.
+
+ info = psb_success_
+ call psb_erractionsave(err_act)
+
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+
+ if (.not.a%is_asb()) then
+ info = psb_err_invalid_mat_state_
+ call psb_errpush(info, name)
+ goto 9999
+ endif
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
+
+ if (tra) then
+ m = a%get_ncols()
+ n = a%get_nrows()
+ else
+ n = a%get_ncols()
+ m = a%get_nrows()
+ end if
+
+ if (size(x,1) < n) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (size(y,1) < m) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (tra) then
+ call a%psb_c_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans)
+ else
+ nxy = min(size(x,2), size(y,2))
+ nhacks = (a%get_nrows() + a%hksz - 1) / a%hksz
+
+ !$acc parallel loop collapse(2) present(a, x, y)
+ do j = 1, nxy
+ do i = 1, m
+ y(i,j) = beta * y(i,j)
+ end do
+ end do
+
+ !$acc parallel loop present(a, x, y)
+ do j = 1, nxy
+ do k = 1, nhacks
+ do i = a%hkoffs(k), a%hkoffs(k + 1) - 1
+ y(a%irn(i), j) = y(a%irn(i), j) + alpha * a%val(i) * x(a%ja(i), j)
+ end do
+ end do
+ end do
+ endif
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_c_oacc_hll_csmm
+end submodule psb_c_oacc_hll_csmm_impl
diff --git a/openacc/impl/psb_c_oacc_hll_csmv.F90 b/openacc/impl/psb_c_oacc_hll_csmv.F90
new file mode 100644
index 00000000..f32e37b7
--- /dev/null
+++ b/openacc/impl/psb_c_oacc_hll_csmv.F90
@@ -0,0 +1,84 @@
+submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_csmv_impl
+ use psb_base_mod
+contains
+ module subroutine psb_c_oacc_hll_csmv(alpha, a, x, beta, y, info, trans)
+ implicit none
+ class(psb_c_oacc_hll_sparse_mat), intent(in) :: a
+ complex(psb_spk_), intent(in) :: alpha, beta
+ complex(psb_spk_), intent(in) :: x(:)
+ complex(psb_spk_), intent(inout) :: y(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ character :: trans_
+ integer(psb_ipk_) :: i, j, m, n, hksz, nhacks
+ logical :: tra
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 'c_oacc_hll_csmv'
+ logical, parameter :: debug = .false.
+
+ call psb_erractionsave(err_act)
+ info = psb_success_
+
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+
+ if (.not.a%is_asb()) then
+ info = psb_err_invalid_mat_state_
+ call psb_errpush(info, name)
+ goto 9999
+ endif
+
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
+
+ if (tra) then
+ m = a%get_ncols()
+ n = a%get_nrows()
+ else
+ n = a%get_ncols()
+ m = a%get_nrows()
+ end if
+
+ if (size(x,1) < n) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (size(y,1) < m) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (tra) then
+ call a%psb_c_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans)
+ else
+ hksz = a%hksz
+ nhacks = (a%get_nrows() + hksz - 1) / hksz
+
+ !$acc parallel loop present(a, x, y)
+ do i = 1, m
+ y(i) = beta * y(i)
+ end do
+ ! This loop nest cannot be run with collapse, since
+ ! the inner loop extent varies.
+ !$acc parallel loop present(a, x, y)
+ do i = 1, nhacks
+ do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
+ y(a%irn(j)) = y(a%irn(j)) + alpha * a%val(j) * x(a%ja(j))
+ end do
+ end do
+ endif
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_c_oacc_hll_csmv
+end submodule psb_c_oacc_hll_csmv_impl
diff --git a/openacc/impl/psb_c_oacc_hll_inner_vect_sv.F90 b/openacc/impl/psb_c_oacc_hll_inner_vect_sv.F90
new file mode 100644
index 00000000..a8d486b2
--- /dev/null
+++ b/openacc/impl/psb_c_oacc_hll_inner_vect_sv.F90
@@ -0,0 +1,86 @@
+submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_inner_vect_sv_impl
+ use psb_base_mod
+contains
+ module subroutine psb_c_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans)
+ implicit none
+ class(psb_c_oacc_hll_sparse_mat), intent(in) :: a
+ complex(psb_spk_), intent(in) :: alpha, beta
+ class(psb_c_base_vect_type), intent(inout) :: x, y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ complex(psb_spk_), allocatable :: rx(:), ry(:)
+ logical :: tra
+ character :: trans_
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 'c_oacc_hll_inner_vect_sv'
+ logical, parameter :: debug = .false.
+ integer(psb_ipk_) :: i, j, nhacks, hksz
+
+ call psb_get_erraction(err_act)
+ info = psb_success_
+
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+
+ if (.not.a%is_asb()) then
+ info = psb_err_invalid_mat_state_
+ call psb_errpush(info, name)
+ goto 9999
+ endif
+
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
+
+ if (tra .or. (beta /= dzero)) then
+ call x%sync()
+ call y%sync()
+ call a%psb_c_hll_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans)
+ call y%set_host()
+ else
+ select type (xx => x)
+ type is (psb_c_vect_oacc)
+ select type(yy => y)
+ type is (psb_c_vect_oacc)
+ if (xx%is_host()) call xx%sync()
+ if (beta /= dzero) then
+ if (yy%is_host()) call yy%sync()
+ end if
+ nhacks = size(a%hkoffs) - 1
+ hksz = a%hksz
+ !$acc parallel loop present(a, xx, yy)
+ do i = 1, nhacks
+ do j = a%hkoffs(i), a%hkoffs(i+1) - 1
+ yy%v(a%irn(j)) = alpha * a%val(j) * xx%v(a%ja(j)) + beta * yy%v(a%irn(j))
+ end do
+ end do
+ call yy%set_dev()
+ class default
+ rx = xx%get_vect()
+ ry = y%get_vect()
+ call a%psb_c_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
+ call y%bld(ry)
+ end select
+ class default
+ rx = x%get_vect()
+ ry = y%get_vect()
+ call a%psb_c_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
+ call y%bld(ry)
+ end select
+ endif
+
+ if (info /= psb_success_) then
+ info = psb_err_from_subroutine_
+ call psb_errpush(info, name, a_err = 'hll_vect_sv')
+ goto 9999
+ endif
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+ end subroutine psb_c_oacc_hll_inner_vect_sv
+end submodule psb_c_oacc_hll_inner_vect_sv_impl
diff --git a/openacc/impl/psb_c_oacc_hll_mold.F90 b/openacc/impl/psb_c_oacc_hll_mold.F90
new file mode 100644
index 00000000..f480f3ab
--- /dev/null
+++ b/openacc/impl/psb_c_oacc_hll_mold.F90
@@ -0,0 +1,34 @@
+submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_mold_impl
+ use psb_base_mod
+contains
+ module subroutine psb_c_oacc_hll_mold(a, b, info)
+ implicit none
+ class(psb_c_oacc_hll_sparse_mat), intent(in) :: a
+ class(psb_c_base_sparse_mat), intent(inout), allocatable :: b
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 'hll_mold'
+ logical, parameter :: debug = .false.
+
+ call psb_get_erraction(err_act)
+
+ info = 0
+ if (allocated(b)) then
+ call b%free()
+ deallocate(b, stat=info)
+ end if
+ if (info == 0) allocate(psb_c_oacc_hll_sparse_mat :: b, stat=info)
+
+ if (info /= psb_success_) then
+ info = psb_err_alloc_dealloc_
+ call psb_errpush(info, name)
+ goto 9999
+ end if
+ return
+
+9999 call psb_error_handler(err_act)
+
+ return
+
+ end subroutine psb_c_oacc_hll_mold
+end submodule psb_c_oacc_hll_mold_impl
diff --git a/openacc/impl/psb_c_oacc_hll_mv_from_coo.F90 b/openacc/impl/psb_c_oacc_hll_mv_from_coo.F90
new file mode 100644
index 00000000..dec52d40
--- /dev/null
+++ b/openacc/impl/psb_c_oacc_hll_mv_from_coo.F90
@@ -0,0 +1,25 @@
+submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_mv_from_coo_impl
+ use psb_base_mod
+contains
+ module subroutine psb_c_oacc_hll_mv_from_coo(a, b, info)
+ implicit none
+
+ class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
+ class(psb_c_coo_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ info = psb_success_
+
+ call a%psb_c_hll_sparse_mat%mv_from_coo(b, info)
+ if (info /= 0) goto 9999
+
+ !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs)
+
+ return
+
+9999 continue
+ info = psb_err_alloc_dealloc_
+ return
+
+ end subroutine psb_c_oacc_hll_mv_from_coo
+end submodule psb_c_oacc_hll_mv_from_coo_impl
diff --git a/openacc/impl/psb_c_oacc_hll_mv_from_fmt.F90 b/openacc/impl/psb_c_oacc_hll_mv_from_fmt.F90
new file mode 100644
index 00000000..f2a064cb
--- /dev/null
+++ b/openacc/impl/psb_c_oacc_hll_mv_from_fmt.F90
@@ -0,0 +1,24 @@
+submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_mv_from_fmt_impl
+ use psb_base_mod
+contains
+ module subroutine psb_c_oacc_hll_mv_from_fmt(a, b, info)
+ implicit none
+
+ class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
+ class(psb_c_base_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ info = psb_success_
+
+ select type(b)
+ type is (psb_c_coo_sparse_mat)
+ call a%mv_from_coo(b, info)
+ class default
+ call a%psb_c_hll_sparse_mat%mv_from_fmt(b, info)
+ if (info /= 0) return
+
+ !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs)
+ end select
+
+ end subroutine psb_c_oacc_hll_mv_from_fmt
+end submodule psb_c_oacc_hll_mv_from_fmt_impl
diff --git a/openacc/impl/psb_c_oacc_hll_reallocate_nz.F90 b/openacc/impl/psb_c_oacc_hll_reallocate_nz.F90
new file mode 100644
index 00000000..52983d4e
--- /dev/null
+++ b/openacc/impl/psb_c_oacc_hll_reallocate_nz.F90
@@ -0,0 +1,29 @@
+submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_reallocate_nz_impl
+ use psb_base_mod
+contains
+ module subroutine psb_c_oacc_hll_reallocate_nz(nz, a)
+ implicit none
+ integer(psb_ipk_), intent(in) :: nz
+ class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name='c_oacc_hll_reallocate_nz'
+ logical, parameter :: debug=.false.
+ integer(psb_ipk_) :: hksz, nhacks
+
+ call psb_erractionsave(err_act)
+ info = psb_success_
+
+ call a%psb_c_hll_sparse_mat%reallocate(nz)
+
+ call a%set_dev()
+ if (info /= 0) goto 9999
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_c_oacc_hll_reallocate_nz
+end submodule psb_c_oacc_hll_reallocate_nz_impl
diff --git a/openacc/impl/psb_c_oacc_hll_scal.F90 b/openacc/impl/psb_c_oacc_hll_scal.F90
new file mode 100644
index 00000000..527a0ec1
--- /dev/null
+++ b/openacc/impl/psb_c_oacc_hll_scal.F90
@@ -0,0 +1,62 @@
+submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_scal_impl
+ use psb_base_mod
+contains
+ module subroutine psb_c_oacc_hll_scal(d, a, info, side)
+ implicit none
+ class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
+ complex(psb_spk_), intent(in) :: d(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, intent(in), optional :: side
+
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 'scal'
+ integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks
+
+ info = psb_success_
+ call psb_erractionsave(err_act)
+
+ if (a%is_host()) call a%sync()
+
+ hksz = a%hksz
+ nhacks = (a%get_nrows() + hksz - 1) / hksz
+ nzt = a%nzt
+
+ if (present(side)) then
+ if (side == 'L') then
+ ! $ a parallel loop collapse(2) present(a, d)
+ !$acc parallel loop present(a, d)
+ do i = 1, nhacks
+ do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
+ k = (j - a%hkoffs(i)) / nzt + (i - 1) * hksz + 1
+ a%val(j) = a%val(j) * d(k)
+ end do
+ end do
+ else if (side == 'R') then
+ ! $ a parallel loop collapse(2) present(a, d)
+ !$acc parallel loop present(a, d)
+ do i = 1, nhacks
+ do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
+ a%val(j) = a%val(j) * d(a%ja(j))
+ end do
+ end do
+ end if
+ else
+ ! $ a parallel loop collapse(2) present(a, d)
+ !$acc parallel loop present(a, d)
+ do i = 1, nhacks
+ do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
+ a%val(j) = a%val(j) * d(j - a%hkoffs(i) + 1)
+ end do
+ end do
+ end if
+
+ call a%set_dev()
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_c_oacc_hll_scal
+end submodule psb_c_oacc_hll_scal_impl
diff --git a/openacc/impl/psb_c_oacc_hll_scals.F90 b/openacc/impl/psb_c_oacc_hll_scals.F90
new file mode 100644
index 00000000..00f24721
--- /dev/null
+++ b/openacc/impl/psb_c_oacc_hll_scals.F90
@@ -0,0 +1,40 @@
+submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_scals_impl
+ use psb_base_mod
+contains
+ module subroutine psb_c_oacc_hll_scals(d, a, info)
+ implicit none
+ class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
+ complex(psb_spk_), intent(in) :: d
+ integer(psb_ipk_), intent(out) :: info
+
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 'scal'
+ integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks
+
+ info = psb_success_
+ call psb_erractionsave(err_act)
+
+ if (a%is_host()) call a%sync()
+
+ hksz = a%hksz
+ nhacks = (a%get_nrows() + hksz - 1) / hksz
+ nzt = a%nzt
+
+ ! $ a parallel loop collapse(2) present(a)
+ !$acc parallel loop present(a)
+ do i = 1, nhacks
+ do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
+ a%val(j) = a%val(j) * d
+ end do
+ end do
+
+ call a%set_dev()
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_c_oacc_hll_scals
+end submodule psb_c_oacc_hll_scals_impl
diff --git a/openacc/impl/psb_c_oacc_hll_vect_mv.F90 b/openacc/impl/psb_c_oacc_hll_vect_mv.F90
new file mode 100644
index 00000000..3b74d11a
--- /dev/null
+++ b/openacc/impl/psb_c_oacc_hll_vect_mv.F90
@@ -0,0 +1,67 @@
+submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_vect_mv_impl
+ use psb_base_mod
+contains
+ module subroutine psb_c_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans)
+ implicit none
+
+ complex(psb_spk_), intent(in) :: alpha, beta
+ class(psb_c_oacc_hll_sparse_mat), intent(in) :: a
+ class(psb_c_base_vect_type), intent(inout) :: x, y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ integer(psb_ipk_) :: m, n, nhacks, hksz
+
+ info = psb_success_
+ m = a%get_nrows()
+ n = a%get_ncols()
+ nhacks = size(a%hkoffs) - 1
+ hksz = a%hksz
+
+ if ((n /= size(x%v)) .or. (m /= size(y%v))) then
+ write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
+ info = psb_err_invalid_mat_state_
+ return
+ end if
+
+ if (a%is_host()) call a%sync()
+ if (x%is_host()) call x%sync()
+ if (y%is_host()) call y%sync()
+
+ call inner_spmv(m, nhacks, hksz, alpha, a%val, a%ja, a%hkoffs, x%v, beta, y%v, info)
+ call y%set_dev()
+
+ contains
+
+ subroutine inner_spmv(m, nhacks, hksz, alpha, val, ja, hkoffs, x, beta, y, info)
+ implicit none
+ integer(psb_ipk_) :: m, nhacks, hksz
+ complex(psb_spk_), intent(in) :: alpha, beta
+ complex(psb_spk_) :: val(:), x(:), y(:)
+ integer(psb_ipk_) :: ja(:), hkoffs(:)
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: i, j, idx, k
+ complex(psb_spk_) :: tmp
+
+ info = 0
+
+ !$acc parallel loop present(val, ja, hkoffs, x, y)
+ do i = 1, nhacks
+ do k = 0, hksz - 1
+ idx = hkoffs(i) + k
+ if (idx <= hkoffs(i + 1) - 1) then
+ tmp = 0.0_psb_dpk_
+ !$acc loop seq
+ do j = hkoffs(i) + k, hkoffs(i + 1) - 1, hksz
+ if (ja(j) > 0) then
+ tmp = tmp + val(j) * x(ja(j))
+ end if
+ end do
+ y(k + 1 + (i - 1) * hksz) = alpha * tmp + beta * y(k + 1 + (i - 1) * hksz)
+ end if
+ end do
+ end do
+ end subroutine inner_spmv
+
+ end subroutine psb_c_oacc_hll_vect_mv
+end submodule psb_c_oacc_hll_vect_mv_impl
diff --git a/openacc/impl/psb_d_oacc_ell_allocate_mnnz.F90 b/openacc/impl/psb_d_oacc_ell_allocate_mnnz.F90
index 4923e12c..b46c5454 100644
--- a/openacc/impl/psb_d_oacc_ell_allocate_mnnz.F90
+++ b/openacc/impl/psb_d_oacc_ell_allocate_mnnz.F90
@@ -29,7 +29,7 @@ contains
allocate(a%idiag(m))
end if
- a%val = 0.0_psb_dpk_
+ a%val = dzero
a%ja = -1
a%irn = 0
a%idiag = 0
diff --git a/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 b/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90
index 4e8402e7..c13d1edd 100644
--- a/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90
+++ b/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90
@@ -26,7 +26,7 @@ contains
allocate(a%irn(a%get_nrows()))
allocate(a%idiag(a%get_nrows()))
end if
- a%val = 0.0_psb_dpk_
+ a%val = dzero
a%ja = -1
a%irn = 0
a%idiag = 0
diff --git a/openacc/impl/psb_d_oacc_hll_allocate_mnnz.F90 b/openacc/impl/psb_d_oacc_hll_allocate_mnnz.F90
index 909ee90b..47a6933b 100644
--- a/openacc/impl/psb_d_oacc_hll_allocate_mnnz.F90
+++ b/openacc/impl/psb_d_oacc_hll_allocate_mnnz.F90
@@ -34,7 +34,7 @@ contains
allocate(a%hkoffs(nhacks))
end if
- a%val = 0.0_psb_dpk_
+ a%val = dzero
a%ja = -1
a%irn = 0
a%idiag = 0
diff --git a/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 b/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90
index 4a258c74..18bd768b 100644
--- a/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90
+++ b/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90
@@ -27,7 +27,7 @@ contains
allocate(a%idiag(a%get_nrows()))
allocate(a%hkoffs((a%get_nrows() + hacksize - 1) / hacksize))
end if
- a%val = 0.0_psb_dpk_
+ a%val = dzero
a%ja = -1
a%irn = 0
a%idiag = 0
diff --git a/openacc/impl/psb_s_oacc_ell_allocate_mnnz.F90 b/openacc/impl/psb_s_oacc_ell_allocate_mnnz.F90
new file mode 100644
index 00000000..38c19b78
--- /dev/null
+++ b/openacc/impl/psb_s_oacc_ell_allocate_mnnz.F90
@@ -0,0 +1,47 @@
+submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_allocate_mnnz_impl
+ use psb_base_mod
+contains
+ module subroutine psb_s_oacc_ell_allocate_mnnz(m, n, a, nz)
+ implicit none
+ integer(psb_ipk_), intent(in) :: m, n
+ class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_), intent(in), optional :: nz
+ integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: err_act, nz_
+ character(len=20) :: name='allocate_mnnz'
+ logical, parameter :: debug=.false.
+
+ call psb_erractionsave(err_act)
+ info = psb_success_
+
+ if (present(nz)) then
+ nz_ = nz
+ else
+ nz_ = 10
+ end if
+
+ call a%psb_s_ell_sparse_mat%allocate(m, n, nz_)
+
+ if (.not.allocated(a%val)) then
+ allocate(a%val(m, nz_))
+ allocate(a%ja(m, nz_))
+ allocate(a%irn(m))
+ allocate(a%idiag(m))
+ end if
+
+ a%val = szero
+ a%ja = -1
+ a%irn = 0
+ a%idiag = 0
+
+ call a%set_dev()
+ if (info /= 0) goto 9999
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_s_oacc_ell_allocate_mnnz
+end submodule psb_s_oacc_ell_allocate_mnnz_impl
diff --git a/openacc/impl/psb_s_oacc_ell_cp_from_coo.F90 b/openacc/impl/psb_s_oacc_ell_cp_from_coo.F90
new file mode 100644
index 00000000..9aaaff73
--- /dev/null
+++ b/openacc/impl/psb_s_oacc_ell_cp_from_coo.F90
@@ -0,0 +1,78 @@
+submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_cp_from_coo_impl
+ use psb_base_mod
+contains
+ module subroutine psb_s_oacc_ell_cp_from_coo(a, b, info)
+ implicit none
+
+ class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
+ class(psb_s_coo_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ integer(psb_ipk_) :: i, j, k, row, col, nz_per_row
+ real(psb_spk_) :: value
+ integer(psb_ipk_), allocatable :: row_counts(:)
+ integer(psb_ipk_) :: hacksize, nza
+
+ info = psb_success_
+ hacksize = 1
+
+ call a%set_nrows(b%get_nrows())
+ call a%set_ncols(b%get_ncols())
+ nz_per_row = a%nzt
+
+ if (.not.allocated(a%val)) then
+ allocate(a%val(a%get_nrows(), nz_per_row))
+ allocate(a%ja(a%get_nrows(), nz_per_row))
+ allocate(a%irn(a%get_nrows()))
+ allocate(a%idiag(a%get_nrows()))
+ end if
+ a%val = szero
+ a%ja = -1
+ a%irn = 0
+ a%idiag = 0
+
+ allocate(row_counts(a%get_nrows()))
+ row_counts = 0
+
+ nza = b%get_nzeros()
+
+ !$acc parallel loop present(b, a, row_counts)
+ do k = 1, nza
+ row = b%ia(k)
+ col = b%ja(k)
+ value = b%val(k)
+ if (row_counts(row) < nz_per_row) then
+ a%val(row, row_counts(row) + 1) = value
+ a%ja(row, row_counts(row) + 1) = col
+ row_counts(row) = row_counts(row) + 1
+ else
+ info = psb_err_invalid_mat_state_
+ !goto 9999
+ end if
+ end do
+
+ a%irn = row_counts
+
+ !$acc parallel loop present(a)
+ do i = 1, a%get_nrows()
+ do j = 1, nz_per_row
+ if (a%ja(i, j) == i) then
+ a%idiag(i) = j
+ exit
+ end if
+ end do
+ end do
+
+ deallocate(row_counts)
+
+ call a%set_dev()
+ if (info /= 0) goto 9999
+
+ return
+
+9999 continue
+ info = psb_err_alloc_dealloc_
+ return
+
+ end subroutine psb_s_oacc_ell_cp_from_coo
+end submodule psb_s_oacc_ell_cp_from_coo_impl
diff --git a/openacc/impl/psb_s_oacc_ell_cp_from_fmt.F90 b/openacc/impl/psb_s_oacc_ell_cp_from_fmt.F90
new file mode 100644
index 00000000..d4c1a233
--- /dev/null
+++ b/openacc/impl/psb_s_oacc_ell_cp_from_fmt.F90
@@ -0,0 +1,24 @@
+submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_cp_from_fmt_impl
+ use psb_base_mod
+contains
+ module subroutine psb_s_oacc_ell_cp_from_fmt(a, b, info)
+ implicit none
+
+ class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
+ class(psb_s_base_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ info = psb_success_
+
+ select type(b)
+ type is (psb_s_coo_sparse_mat)
+ call a%cp_from_coo(b, info)
+ class default
+ call a%psb_s_ell_sparse_mat%cp_from_fmt(b, info)
+ if (info /= 0) return
+
+ !$acc update device(a%val, a%ja, a%irn, a%idiag)
+ end select
+
+ end subroutine psb_s_oacc_ell_cp_from_fmt
+end submodule psb_s_oacc_ell_cp_from_fmt_impl
diff --git a/openacc/impl/psb_s_oacc_ell_csmm.F90 b/openacc/impl/psb_s_oacc_ell_csmm.F90
new file mode 100644
index 00000000..63219384
--- /dev/null
+++ b/openacc/impl/psb_s_oacc_ell_csmm.F90
@@ -0,0 +1,86 @@
+submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_csmm_impl
+ use psb_base_mod
+contains
+ module subroutine psb_s_oacc_ell_csmm(alpha, a, x, beta, y, info, trans)
+ implicit none
+ class(psb_s_oacc_ell_sparse_mat), intent(in) :: a
+ real(psb_spk_), intent(in) :: alpha, beta
+ real(psb_spk_), intent(in) :: x(:,:)
+ real(psb_spk_), intent(inout) :: y(:,:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ character :: trans_
+ integer(psb_ipk_) :: i, j, m, n, k, nxy, nzt
+ logical :: tra
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 's_oacc_ell_csmm'
+ logical, parameter :: debug = .false.
+
+ info = psb_success_
+ call psb_erractionsave(err_act)
+
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+
+ if (.not.a%is_asb()) then
+ info = psb_err_invalid_mat_state_
+ call psb_errpush(info, name)
+ goto 9999
+ endif
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
+
+ if (tra) then
+ m = a%get_ncols()
+ n = a%get_nrows()
+ else
+ n = a%get_ncols()
+ m = a%get_nrows()
+ end if
+
+ if (size(x,1) < n) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (size(y,1) < m) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (tra) then
+ call a%psb_s_ell_sparse_mat%spmm(alpha, x, beta, y, info, trans)
+ else
+ nxy = min(size(x,2), size(y,2))
+ nzt = a%nzt
+
+ !$acc parallel loop collapse(2) present(a, x, y)
+ do j = 1, nxy
+ do i = 1, m
+ y(i,j) = beta * y(i,j)
+ end do
+ end do
+
+ !$acc parallel loop collapse(2) present(a, x, y)
+ do j = 1, nxy
+ do i = 1, n
+ do k = 1, nzt
+ y(i, j) = y(i, j) + alpha * a%val(i, k) * x(a%ja(i, k), j)
+ end do
+ end do
+ end do
+ endif
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_s_oacc_ell_csmm
+end submodule psb_s_oacc_ell_csmm_impl
diff --git a/openacc/impl/psb_s_oacc_ell_csmv.F90 b/openacc/impl/psb_s_oacc_ell_csmv.F90
new file mode 100644
index 00000000..d4aaa9d4
--- /dev/null
+++ b/openacc/impl/psb_s_oacc_ell_csmv.F90
@@ -0,0 +1,82 @@
+submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_csmv_impl
+ use psb_base_mod
+contains
+ module subroutine psb_s_oacc_ell_csmv(alpha, a, x, beta, y, info, trans)
+ implicit none
+ class(psb_s_oacc_ell_sparse_mat), intent(in) :: a
+ real(psb_spk_), intent(in) :: alpha, beta
+ real(psb_spk_), intent(in) :: x(:)
+ real(psb_spk_), intent(inout) :: y(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ character :: trans_
+ integer(psb_ipk_) :: i, j, m, n, nzt
+ logical :: tra
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 's_oacc_ell_csmv'
+ logical, parameter :: debug = .false.
+
+ call psb_erractionsave(err_act)
+ info = psb_success_
+
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+
+ if (.not.a%is_asb()) then
+ info = psb_err_invalid_mat_state_
+ call psb_errpush(info, name)
+ goto 9999
+ endif
+
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
+
+ if (tra) then
+ m = a%get_ncols()
+ n = a%get_nrows()
+ else
+ n = a%get_ncols()
+ m = a%get_nrows()
+ end if
+
+ if (size(x,1) < n) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (size(y,1) < m) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (tra) then
+ call a%psb_s_ell_sparse_mat%spmm(alpha, x, beta, y, info, trans)
+ else
+ nzt = a%nzt
+
+ !$acc parallel loop present(a, x, y)
+ do i = 1, m
+ y(i) = beta * y(i)
+ end do
+
+ !$acc parallel loop present(a, x, y)
+ do i = 1, m
+ do j = 1, nzt
+ y(i) = y(i) + alpha * a%val(i, j) * x(a%ja(i, j))
+ end do
+ end do
+ endif
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_s_oacc_ell_csmv
+end submodule psb_s_oacc_ell_csmv_impl
diff --git a/openacc/impl/psb_s_oacc_ell_inner_vect_sv.F90 b/openacc/impl/psb_s_oacc_ell_inner_vect_sv.F90
new file mode 100644
index 00000000..ba42af12
--- /dev/null
+++ b/openacc/impl/psb_s_oacc_ell_inner_vect_sv.F90
@@ -0,0 +1,85 @@
+submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_inner_vect_sv_impl
+ use psb_base_mod
+contains
+ module subroutine psb_s_oacc_ell_inner_vect_sv(alpha, a, x, beta, y, info, trans)
+ implicit none
+ class(psb_s_oacc_ell_sparse_mat), intent(in) :: a
+ real(psb_spk_), intent(in) :: alpha, beta
+ class(psb_s_base_vect_type), intent(inout) :: x, y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ real(psb_spk_), allocatable :: rx(:), ry(:)
+ logical :: tra
+ character :: trans_
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 's_oacc_ell_inner_vect_sv'
+ logical, parameter :: debug = .false.
+ integer(psb_ipk_) :: i, j, nzt
+
+ call psb_get_erraction(err_act)
+ info = psb_success_
+
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+
+ if (.not.a%is_asb()) then
+ info = psb_err_invalid_mat_state_
+ call psb_errpush(info, name)
+ goto 9999
+ endif
+
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
+
+ if (tra .or. (beta /= dzero)) then
+ call x%sync()
+ call y%sync()
+ call a%psb_s_ell_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans)
+ call y%set_host()
+ else
+ select type (xx => x)
+ type is (psb_s_vect_oacc)
+ select type(yy => y)
+ type is (psb_s_vect_oacc)
+ if (xx%is_host()) call xx%sync()
+ if (beta /= dzero) then
+ if (yy%is_host()) call yy%sync()
+ end if
+ nzt = a%nzt
+ !$acc parallel loop present(a, xx, yy)
+ do i = 1, size(a%val, 1)
+ do j = 1, nzt
+ yy%v(i) = alpha * a%val(i, j) * xx%v(a%ja(i, j)) + beta * yy%v(i)
+ end do
+ end do
+ call yy%set_dev()
+ class default
+ rx = xx%get_vect()
+ ry = y%get_vect()
+ call a%psb_s_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
+ call y%bld(ry)
+ end select
+ class default
+ rx = x%get_vect()
+ ry = y%get_vect()
+ call a%psb_s_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
+ call y%bld(ry)
+ end select
+ endif
+
+ if (info /= psb_success_) then
+ info = psb_err_from_subroutine_
+ call psb_errpush(info, name, a_err = 'ell_vect_sv')
+ goto 9999
+ endif
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+ end subroutine psb_s_oacc_ell_inner_vect_sv
+end submodule psb_s_oacc_ell_inner_vect_sv_impl
diff --git a/openacc/impl/psb_s_oacc_ell_mold.F90 b/openacc/impl/psb_s_oacc_ell_mold.F90
new file mode 100644
index 00000000..92f18f25
--- /dev/null
+++ b/openacc/impl/psb_s_oacc_ell_mold.F90
@@ -0,0 +1,34 @@
+submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_mold_impl
+ use psb_base_mod
+contains
+ module subroutine psb_s_oacc_ell_mold(a, b, info)
+ implicit none
+ class(psb_s_oacc_ell_sparse_mat), intent(in) :: a
+ class(psb_s_base_sparse_mat), intent(inout), allocatable :: b
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 'ell_mold'
+ logical, parameter :: debug = .false.
+
+ call psb_get_erraction(err_act)
+
+ info = 0
+ if (allocated(b)) then
+ call b%free()
+ deallocate(b, stat=info)
+ end if
+ if (info == 0) allocate(psb_s_oacc_ell_sparse_mat :: b, stat=info)
+
+ if (info /= psb_success_) then
+ info = psb_err_alloc_dealloc_
+ call psb_errpush(info, name)
+ goto 9999
+ end if
+ return
+
+9999 call psb_error_handler(err_act)
+
+ return
+
+ end subroutine psb_s_oacc_ell_mold
+end submodule psb_s_oacc_ell_mold_impl
diff --git a/openacc/impl/psb_s_oacc_ell_mv_from_coo.F90 b/openacc/impl/psb_s_oacc_ell_mv_from_coo.F90
new file mode 100644
index 00000000..d6bbec13
--- /dev/null
+++ b/openacc/impl/psb_s_oacc_ell_mv_from_coo.F90
@@ -0,0 +1,25 @@
+submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_mv_from_coo_impl
+ use psb_base_mod
+contains
+ module subroutine psb_s_oacc_ell_mv_from_coo(a, b, info)
+ implicit none
+
+ class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
+ class(psb_s_coo_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ info = psb_success_
+
+ call a%psb_s_ell_sparse_mat%mv_from_coo(b, info)
+ if (info /= 0) goto 9999
+
+ !$acc update device(a%val, a%ja, a%irn, a%idiag)
+
+ return
+
+9999 continue
+ info = psb_err_alloc_dealloc_
+ return
+
+ end subroutine psb_s_oacc_ell_mv_from_coo
+end submodule psb_s_oacc_ell_mv_from_coo_impl
diff --git a/openacc/impl/psb_s_oacc_ell_mv_from_fmt.F90 b/openacc/impl/psb_s_oacc_ell_mv_from_fmt.F90
new file mode 100644
index 00000000..ebb82901
--- /dev/null
+++ b/openacc/impl/psb_s_oacc_ell_mv_from_fmt.F90
@@ -0,0 +1,24 @@
+submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_mv_from_fmt_impl
+ use psb_base_mod
+contains
+ module subroutine psb_s_oacc_ell_mv_from_fmt(a, b, info)
+ implicit none
+
+ class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
+ class(psb_s_base_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ info = psb_success_
+
+ select type(b)
+ type is (psb_s_coo_sparse_mat)
+ call a%mv_from_coo(b, info)
+ class default
+ call a%psb_s_ell_sparse_mat%mv_from_fmt(b, info)
+ if (info /= 0) return
+
+ !$acc update device(a%val, a%ja, a%irn, a%idiag)
+ end select
+
+ end subroutine psb_s_oacc_ell_mv_from_fmt
+end submodule psb_s_oacc_ell_mv_from_fmt_impl
diff --git a/openacc/impl/psb_s_oacc_ell_reallocate_nz.F90 b/openacc/impl/psb_s_oacc_ell_reallocate_nz.F90
new file mode 100644
index 00000000..373c2b67
--- /dev/null
+++ b/openacc/impl/psb_s_oacc_ell_reallocate_nz.F90
@@ -0,0 +1,28 @@
+submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_reallocate_nz_impl
+ use psb_base_mod
+contains
+ module subroutine psb_s_oacc_ell_reallocate_nz(nz, a)
+ implicit none
+ integer(psb_ipk_), intent(in) :: nz
+ class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name='s_oacc_ell_reallocate_nz'
+ logical, parameter :: debug=.false.
+
+ call psb_erractionsave(err_act)
+ info = psb_success_
+
+ call a%psb_s_ell_sparse_mat%reallocate(nz)
+
+ call a%set_dev()
+ if (info /= 0) goto 9999
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_s_oacc_ell_reallocate_nz
+end submodule psb_s_oacc_ell_reallocate_nz_impl
diff --git a/openacc/impl/psb_s_oacc_ell_scal.F90 b/openacc/impl/psb_s_oacc_ell_scal.F90
new file mode 100644
index 00000000..180d8f9a
--- /dev/null
+++ b/openacc/impl/psb_s_oacc_ell_scal.F90
@@ -0,0 +1,58 @@
+submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_scal_impl
+ use psb_base_mod
+contains
+ module subroutine psb_s_oacc_ell_scal(d, a, info, side)
+ implicit none
+ class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
+ real(psb_spk_), intent(in) :: d(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, intent(in), optional :: side
+
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name='scal'
+ logical, parameter :: debug=.false.
+ integer(psb_ipk_) :: i, j, m, nzt
+
+ info = psb_success_
+ call psb_erractionsave(err_act)
+
+ if (a%is_host()) call a%sync()
+
+ m = a%get_nrows()
+ nzt = a%nzt
+
+ if (present(side)) then
+ if (side == 'L') then
+ !$acc parallel loop collapse(2) present(a, d)
+ do i = 1, m
+ do j = 1, nzt
+ a%val(i, j) = a%val(i, j) * d(i)
+ end do
+ end do
+ else if (side == 'R') then
+ !$acc parallel loop collapse(2) present(a, d)
+ do i = 1, m
+ do j = 1, nzt
+ a%val(i, j) = a%val(i, j) * d(a%ja(i, j))
+ end do
+ end do
+ end if
+ else
+ !$acc parallel loop collapse(2) present(a, d)
+ do i = 1, m
+ do j = 1, nzt
+ a%val(i, j) = a%val(i, j) * d(j)
+ end do
+ end do
+ end if
+
+ call a%set_dev()
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_s_oacc_ell_scal
+end submodule psb_s_oacc_ell_scal_impl
diff --git a/openacc/impl/psb_s_oacc_ell_scals.F90 b/openacc/impl/psb_s_oacc_ell_scals.F90
new file mode 100644
index 00000000..c1c305af
--- /dev/null
+++ b/openacc/impl/psb_s_oacc_ell_scals.F90
@@ -0,0 +1,39 @@
+submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_scals_impl
+ use psb_base_mod
+contains
+ module subroutine psb_s_oacc_ell_scals(d, a, info)
+ implicit none
+ class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
+ real(psb_spk_), intent(in) :: d
+ integer(psb_ipk_), intent(out) :: info
+
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name='scal'
+ logical, parameter :: debug=.false.
+ integer(psb_ipk_) :: i, j, nzt, m
+
+ info = psb_success_
+ call psb_erractionsave(err_act)
+
+ if (a%is_host()) call a%sync()
+
+ m = a%get_nrows()
+ nzt = a%nzt
+
+ !$acc parallel loop collapse(2) present(a)
+ do i = 1, m
+ do j = 1, nzt
+ a%val(i, j) = a%val(i, j) * d
+ end do
+ end do
+
+ call a%set_dev()
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_s_oacc_ell_scals
+end submodule psb_s_oacc_ell_scals_impl
diff --git a/openacc/impl/psb_s_oacc_ell_vect_mv.F90 b/openacc/impl/psb_s_oacc_ell_vect_mv.F90
new file mode 100644
index 00000000..f48ba041
--- /dev/null
+++ b/openacc/impl/psb_s_oacc_ell_vect_mv.F90
@@ -0,0 +1,66 @@
+submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_vect_mv_impl
+ use psb_base_mod
+contains
+ module subroutine psb_s_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans)
+ implicit none
+
+ real(psb_spk_), intent(in) :: alpha, beta
+ class(psb_s_oacc_ell_sparse_mat), intent(in) :: a
+ class(psb_s_base_vect_type), intent(inout) :: x, y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ integer(psb_ipk_) :: m, n, nzt
+
+ info = psb_success_
+ m = a%get_nrows()
+ n = a%get_ncols()
+ nzt = a%nzt
+
+ if ((n /= size(x%v)) .or. (m /= size(y%v))) then
+ write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
+ info = psb_err_invalid_mat_state_
+ return
+ end if
+
+ if (a%is_host()) call a%sync()
+ if (x%is_host()) call x%sync()
+ if (y%is_host()) call y%sync()
+
+ call inner_spmv(m, n, nzt, alpha, a%val, a%ja, x%v, beta, y%v, info)
+ call y%set_dev()
+
+ contains
+
+ subroutine inner_spmv(m, n, nzt, alpha, val, ja, x, beta, y, info)
+ implicit none
+ integer(psb_ipk_) :: m, n, nzt
+ real(psb_spk_), intent(in) :: alpha, beta
+ real(psb_spk_) :: val(:,:), x(:), y(:)
+ integer(psb_ipk_) :: ja(:,:)
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: i, j, ii, isz
+ real(psb_spk_) :: tmp
+ integer(psb_ipk_), parameter :: vsz = 256
+
+ info = 0
+
+ !$acc parallel loop vector_length(vsz) private(isz)
+ do ii = 1, m, vsz
+ isz = min(vsz, m - ii + 1)
+ !$acc loop independent private(tmp)
+ do i = ii, ii + isz - 1
+ tmp = 0.0_psb_dpk_
+ !$acc loop seq
+ do j = 1, nzt
+ if (ja(i,j) > 0) then
+ tmp = tmp + val(i,j) * x(ja(i,j))
+ end if
+ end do
+ y(i) = alpha * tmp + beta * y(i)
+ end do
+ end do
+ end subroutine inner_spmv
+
+ end subroutine psb_s_oacc_ell_vect_mv
+end submodule psb_s_oacc_ell_vect_mv_impl
diff --git a/openacc/impl/psb_s_oacc_hll_allocate_mnnz.F90 b/openacc/impl/psb_s_oacc_hll_allocate_mnnz.F90
new file mode 100644
index 00000000..c67ea621
--- /dev/null
+++ b/openacc/impl/psb_s_oacc_hll_allocate_mnnz.F90
@@ -0,0 +1,53 @@
+submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_allocate_mnnz_impl
+ use psb_base_mod
+contains
+ module subroutine psb_s_oacc_hll_allocate_mnnz(m, n, a, nz)
+ implicit none
+ integer(psb_ipk_), intent(in) :: m, n
+ class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_), intent(in), optional :: nz
+ integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: err_act, nz_
+ character(len=20) :: name='allocate_mnnz'
+ logical, parameter :: debug=.false.
+ integer(psb_ipk_) :: hksz, nhacks
+
+ call psb_erractionsave(err_act)
+ info = psb_success_
+
+ if (present(nz)) then
+ nz_ = nz
+ else
+ nz_ = 10
+ end if
+
+ call a%psb_s_hll_sparse_mat%allocate(m, n, nz_)
+
+ hksz = a%hksz
+ nhacks = (m + hksz - 1) / hksz
+
+ if (.not.allocated(a%val)) then
+ allocate(a%val(nz_ * m))
+ allocate(a%ja(nz_ * m))
+ allocate(a%irn(m))
+ allocate(a%idiag(m))
+ allocate(a%hkoffs(nhacks))
+ end if
+
+ a%val = szero
+ a%ja = -1
+ a%irn = 0
+ a%idiag = 0
+ a%hkoffs = 0
+
+ call a%set_dev()
+ if (info /= 0) goto 9999
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_s_oacc_hll_allocate_mnnz
+end submodule psb_s_oacc_hll_allocate_mnnz_impl
diff --git a/openacc/impl/psb_s_oacc_hll_cp_from_coo.F90 b/openacc/impl/psb_s_oacc_hll_cp_from_coo.F90
new file mode 100644
index 00000000..34a0b5d5
--- /dev/null
+++ b/openacc/impl/psb_s_oacc_hll_cp_from_coo.F90
@@ -0,0 +1,85 @@
+submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_cp_from_coo_impl
+ use psb_base_mod
+contains
+ module subroutine psb_s_oacc_hll_cp_from_coo(a, b, info)
+ implicit none
+
+ class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a
+ class(psb_s_coo_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ integer(psb_ipk_) :: i, j, k, row, col, nz_per_row
+ real(psb_spk_) :: value
+ integer(psb_ipk_), allocatable :: row_counts(:)
+ integer(psb_ipk_) :: hacksize, nza
+
+ info = psb_success_
+ hacksize = 32 ! Assuming a default hack size of 32
+
+ call a%set_nrows(b%get_nrows())
+ call a%set_ncols(b%get_ncols())
+ nz_per_row = a%nzt
+
+ if (.not.allocated(a%val)) then
+ allocate(a%val(nz_per_row * a%get_nrows()))
+ allocate(a%ja(nz_per_row * a%get_nrows()))
+ allocate(a%irn(a%get_nrows()))
+ allocate(a%idiag(a%get_nrows()))
+ allocate(a%hkoffs((a%get_nrows() + hacksize - 1) / hacksize))
+ end if
+ a%val = szero
+ a%ja = -1
+ a%irn = 0
+ a%idiag = 0
+
+ allocate(row_counts(a%get_nrows()))
+ row_counts = 0
+
+ nza = b%get_nzeros()
+
+ !$acc parallel loop present(b, a, row_counts)
+ do k = 1, nza
+ row = b%ia(k)
+ col = b%ja(k)
+ value = b%val(k)
+ if (row_counts(row) < nz_per_row) then
+ a%val(row_counts(row) + 1 + (row - 1) * nz_per_row) = value
+ a%ja(row_counts(row) + 1 + (row - 1) * nz_per_row) = col
+ row_counts(row) = row_counts(row) + 1
+ else
+ info = psb_err_invalid_mat_state_
+ !goto 9999
+ end if
+ end do
+
+ a%irn = row_counts
+
+ !$acc parallel loop present(a)
+ do i = 1, a%get_nrows()
+ do j = 1, nz_per_row
+ if (a%ja(j + (i - 1) * nz_per_row) == i) then
+ a%idiag(i) = j
+ exit
+ end if
+ end do
+ end do
+
+ ! Calculate hkoffs for HLL format
+ !$acc parallel loop present(a)
+ do i = 1, size(a%hkoffs)
+ a%hkoffs(i) = (i - 1) * hacksize
+ end do
+
+ deallocate(row_counts)
+
+ call a%set_dev()
+ if (info /= 0) goto 9999
+
+ return
+
+9999 continue
+ info = psb_err_alloc_dealloc_
+ return
+
+ end subroutine psb_s_oacc_hll_cp_from_coo
+end submodule psb_s_oacc_hll_cp_from_coo_impl
diff --git a/openacc/impl/psb_s_oacc_hll_cp_from_fmt.F90 b/openacc/impl/psb_s_oacc_hll_cp_from_fmt.F90
new file mode 100644
index 00000000..4d023f8b
--- /dev/null
+++ b/openacc/impl/psb_s_oacc_hll_cp_from_fmt.F90
@@ -0,0 +1,24 @@
+submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_cp_from_fmt_impl
+ use psb_base_mod
+contains
+ module subroutine psb_s_oacc_hll_cp_from_fmt(a, b, info)
+ implicit none
+
+ class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a
+ class(psb_s_base_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ info = psb_success_
+
+ select type(b)
+ type is (psb_s_coo_sparse_mat)
+ call a%cp_from_coo(b, info)
+ class default
+ call a%psb_s_hll_sparse_mat%cp_from_fmt(b, info)
+ if (info /= 0) return
+
+ !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs)
+ end select
+
+ end subroutine psb_s_oacc_hll_cp_from_fmt
+end submodule psb_s_oacc_hll_cp_from_fmt_impl
diff --git a/openacc/impl/psb_s_oacc_hll_csmm.F90 b/openacc/impl/psb_s_oacc_hll_csmm.F90
new file mode 100644
index 00000000..803071eb
--- /dev/null
+++ b/openacc/impl/psb_s_oacc_hll_csmm.F90
@@ -0,0 +1,86 @@
+submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_csmm_impl
+ use psb_base_mod
+contains
+ module subroutine psb_s_oacc_hll_csmm(alpha, a, x, beta, y, info, trans)
+ implicit none
+ class(psb_s_oacc_hll_sparse_mat), intent(in) :: a
+ real(psb_spk_), intent(in) :: alpha, beta
+ real(psb_spk_), intent(in) :: x(:,:)
+ real(psb_spk_), intent(inout) :: y(:,:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ character :: trans_
+ integer(psb_ipk_) :: i, j, m, n, k, nxy, nhacks
+ logical :: tra
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 's_oacc_hll_csmm'
+ logical, parameter :: debug = .false.
+
+ info = psb_success_
+ call psb_erractionsave(err_act)
+
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+
+ if (.not.a%is_asb()) then
+ info = psb_err_invalid_mat_state_
+ call psb_errpush(info, name)
+ goto 9999
+ endif
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
+
+ if (tra) then
+ m = a%get_ncols()
+ n = a%get_nrows()
+ else
+ n = a%get_ncols()
+ m = a%get_nrows()
+ end if
+
+ if (size(x,1) < n) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (size(y,1) < m) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (tra) then
+ call a%psb_s_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans)
+ else
+ nxy = min(size(x,2), size(y,2))
+ nhacks = (a%get_nrows() + a%hksz - 1) / a%hksz
+
+ !$acc parallel loop collapse(2) present(a, x, y)
+ do j = 1, nxy
+ do i = 1, m
+ y(i,j) = beta * y(i,j)
+ end do
+ end do
+
+ !$acc parallel loop present(a, x, y)
+ do j = 1, nxy
+ do k = 1, nhacks
+ do i = a%hkoffs(k), a%hkoffs(k + 1) - 1
+ y(a%irn(i), j) = y(a%irn(i), j) + alpha * a%val(i) * x(a%ja(i), j)
+ end do
+ end do
+ end do
+ endif
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_s_oacc_hll_csmm
+end submodule psb_s_oacc_hll_csmm_impl
diff --git a/openacc/impl/psb_s_oacc_hll_csmv.F90 b/openacc/impl/psb_s_oacc_hll_csmv.F90
new file mode 100644
index 00000000..b3c0cae8
--- /dev/null
+++ b/openacc/impl/psb_s_oacc_hll_csmv.F90
@@ -0,0 +1,84 @@
+submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_csmv_impl
+ use psb_base_mod
+contains
+ module subroutine psb_s_oacc_hll_csmv(alpha, a, x, beta, y, info, trans)
+ implicit none
+ class(psb_s_oacc_hll_sparse_mat), intent(in) :: a
+ real(psb_spk_), intent(in) :: alpha, beta
+ real(psb_spk_), intent(in) :: x(:)
+ real(psb_spk_), intent(inout) :: y(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ character :: trans_
+ integer(psb_ipk_) :: i, j, m, n, hksz, nhacks
+ logical :: tra
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 's_oacc_hll_csmv'
+ logical, parameter :: debug = .false.
+
+ call psb_erractionsave(err_act)
+ info = psb_success_
+
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+
+ if (.not.a%is_asb()) then
+ info = psb_err_invalid_mat_state_
+ call psb_errpush(info, name)
+ goto 9999
+ endif
+
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
+
+ if (tra) then
+ m = a%get_ncols()
+ n = a%get_nrows()
+ else
+ n = a%get_ncols()
+ m = a%get_nrows()
+ end if
+
+ if (size(x,1) < n) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (size(y,1) < m) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (tra) then
+ call a%psb_s_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans)
+ else
+ hksz = a%hksz
+ nhacks = (a%get_nrows() + hksz - 1) / hksz
+
+ !$acc parallel loop present(a, x, y)
+ do i = 1, m
+ y(i) = beta * y(i)
+ end do
+ ! This loop nest cannot be run with collapse, since
+ ! the inner loop extent varies.
+ !$acc parallel loop present(a, x, y)
+ do i = 1, nhacks
+ do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
+ y(a%irn(j)) = y(a%irn(j)) + alpha * a%val(j) * x(a%ja(j))
+ end do
+ end do
+ endif
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_s_oacc_hll_csmv
+end submodule psb_s_oacc_hll_csmv_impl
diff --git a/openacc/impl/psb_s_oacc_hll_inner_vect_sv.F90 b/openacc/impl/psb_s_oacc_hll_inner_vect_sv.F90
new file mode 100644
index 00000000..900b8982
--- /dev/null
+++ b/openacc/impl/psb_s_oacc_hll_inner_vect_sv.F90
@@ -0,0 +1,86 @@
+submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_inner_vect_sv_impl
+ use psb_base_mod
+contains
+ module subroutine psb_s_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans)
+ implicit none
+ class(psb_s_oacc_hll_sparse_mat), intent(in) :: a
+ real(psb_spk_), intent(in) :: alpha, beta
+ class(psb_s_base_vect_type), intent(inout) :: x, y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ real(psb_spk_), allocatable :: rx(:), ry(:)
+ logical :: tra
+ character :: trans_
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 's_oacc_hll_inner_vect_sv'
+ logical, parameter :: debug = .false.
+ integer(psb_ipk_) :: i, j, nhacks, hksz
+
+ call psb_get_erraction(err_act)
+ info = psb_success_
+
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+
+ if (.not.a%is_asb()) then
+ info = psb_err_invalid_mat_state_
+ call psb_errpush(info, name)
+ goto 9999
+ endif
+
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
+
+ if (tra .or. (beta /= dzero)) then
+ call x%sync()
+ call y%sync()
+ call a%psb_s_hll_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans)
+ call y%set_host()
+ else
+ select type (xx => x)
+ type is (psb_s_vect_oacc)
+ select type(yy => y)
+ type is (psb_s_vect_oacc)
+ if (xx%is_host()) call xx%sync()
+ if (beta /= dzero) then
+ if (yy%is_host()) call yy%sync()
+ end if
+ nhacks = size(a%hkoffs) - 1
+ hksz = a%hksz
+ !$acc parallel loop present(a, xx, yy)
+ do i = 1, nhacks
+ do j = a%hkoffs(i), a%hkoffs(i+1) - 1
+ yy%v(a%irn(j)) = alpha * a%val(j) * xx%v(a%ja(j)) + beta * yy%v(a%irn(j))
+ end do
+ end do
+ call yy%set_dev()
+ class default
+ rx = xx%get_vect()
+ ry = y%get_vect()
+ call a%psb_s_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
+ call y%bld(ry)
+ end select
+ class default
+ rx = x%get_vect()
+ ry = y%get_vect()
+ call a%psb_s_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
+ call y%bld(ry)
+ end select
+ endif
+
+ if (info /= psb_success_) then
+ info = psb_err_from_subroutine_
+ call psb_errpush(info, name, a_err = 'hll_vect_sv')
+ goto 9999
+ endif
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+ end subroutine psb_s_oacc_hll_inner_vect_sv
+end submodule psb_s_oacc_hll_inner_vect_sv_impl
diff --git a/openacc/impl/psb_s_oacc_hll_mold.F90 b/openacc/impl/psb_s_oacc_hll_mold.F90
new file mode 100644
index 00000000..1e43b65b
--- /dev/null
+++ b/openacc/impl/psb_s_oacc_hll_mold.F90
@@ -0,0 +1,34 @@
+submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_mold_impl
+ use psb_base_mod
+contains
+ module subroutine psb_s_oacc_hll_mold(a, b, info)
+ implicit none
+ class(psb_s_oacc_hll_sparse_mat), intent(in) :: a
+ class(psb_s_base_sparse_mat), intent(inout), allocatable :: b
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 'hll_mold'
+ logical, parameter :: debug = .false.
+
+ call psb_get_erraction(err_act)
+
+ info = 0
+ if (allocated(b)) then
+ call b%free()
+ deallocate(b, stat=info)
+ end if
+ if (info == 0) allocate(psb_s_oacc_hll_sparse_mat :: b, stat=info)
+
+ if (info /= psb_success_) then
+ info = psb_err_alloc_dealloc_
+ call psb_errpush(info, name)
+ goto 9999
+ end if
+ return
+
+9999 call psb_error_handler(err_act)
+
+ return
+
+ end subroutine psb_s_oacc_hll_mold
+end submodule psb_s_oacc_hll_mold_impl
diff --git a/openacc/impl/psb_s_oacc_hll_mv_from_coo.F90 b/openacc/impl/psb_s_oacc_hll_mv_from_coo.F90
new file mode 100644
index 00000000..08b553b7
--- /dev/null
+++ b/openacc/impl/psb_s_oacc_hll_mv_from_coo.F90
@@ -0,0 +1,25 @@
+submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_mv_from_coo_impl
+ use psb_base_mod
+contains
+ module subroutine psb_s_oacc_hll_mv_from_coo(a, b, info)
+ implicit none
+
+ class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a
+ class(psb_s_coo_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ info = psb_success_
+
+ call a%psb_s_hll_sparse_mat%mv_from_coo(b, info)
+ if (info /= 0) goto 9999
+
+ !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs)
+
+ return
+
+9999 continue
+ info = psb_err_alloc_dealloc_
+ return
+
+ end subroutine psb_s_oacc_hll_mv_from_coo
+end submodule psb_s_oacc_hll_mv_from_coo_impl
diff --git a/openacc/impl/psb_s_oacc_hll_mv_from_fmt.F90 b/openacc/impl/psb_s_oacc_hll_mv_from_fmt.F90
new file mode 100644
index 00000000..d5867289
--- /dev/null
+++ b/openacc/impl/psb_s_oacc_hll_mv_from_fmt.F90
@@ -0,0 +1,24 @@
+submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_mv_from_fmt_impl
+ use psb_base_mod
+contains
+ module subroutine psb_s_oacc_hll_mv_from_fmt(a, b, info)
+ implicit none
+
+ class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a
+ class(psb_s_base_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ info = psb_success_
+
+ select type(b)
+ type is (psb_s_coo_sparse_mat)
+ call a%mv_from_coo(b, info)
+ class default
+ call a%psb_s_hll_sparse_mat%mv_from_fmt(b, info)
+ if (info /= 0) return
+
+ !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs)
+ end select
+
+ end subroutine psb_s_oacc_hll_mv_from_fmt
+end submodule psb_s_oacc_hll_mv_from_fmt_impl
diff --git a/openacc/impl/psb_s_oacc_hll_reallocate_nz.F90 b/openacc/impl/psb_s_oacc_hll_reallocate_nz.F90
new file mode 100644
index 00000000..7768d1ed
--- /dev/null
+++ b/openacc/impl/psb_s_oacc_hll_reallocate_nz.F90
@@ -0,0 +1,29 @@
+submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_reallocate_nz_impl
+ use psb_base_mod
+contains
+ module subroutine psb_s_oacc_hll_reallocate_nz(nz, a)
+ implicit none
+ integer(psb_ipk_), intent(in) :: nz
+ class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name='s_oacc_hll_reallocate_nz'
+ logical, parameter :: debug=.false.
+ integer(psb_ipk_) :: hksz, nhacks
+
+ call psb_erractionsave(err_act)
+ info = psb_success_
+
+ call a%psb_s_hll_sparse_mat%reallocate(nz)
+
+ call a%set_dev()
+ if (info /= 0) goto 9999
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_s_oacc_hll_reallocate_nz
+end submodule psb_s_oacc_hll_reallocate_nz_impl
diff --git a/openacc/impl/psb_s_oacc_hll_scal.F90 b/openacc/impl/psb_s_oacc_hll_scal.F90
new file mode 100644
index 00000000..ae36465e
--- /dev/null
+++ b/openacc/impl/psb_s_oacc_hll_scal.F90
@@ -0,0 +1,62 @@
+submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_scal_impl
+ use psb_base_mod
+contains
+ module subroutine psb_s_oacc_hll_scal(d, a, info, side)
+ implicit none
+ class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a
+ real(psb_spk_), intent(in) :: d(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, intent(in), optional :: side
+
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 'scal'
+ integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks
+
+ info = psb_success_
+ call psb_erractionsave(err_act)
+
+ if (a%is_host()) call a%sync()
+
+ hksz = a%hksz
+ nhacks = (a%get_nrows() + hksz - 1) / hksz
+ nzt = a%nzt
+
+ if (present(side)) then
+ if (side == 'L') then
+ ! $ a parallel loop collapse(2) present(a, d)
+ !$acc parallel loop present(a, d)
+ do i = 1, nhacks
+ do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
+ k = (j - a%hkoffs(i)) / nzt + (i - 1) * hksz + 1
+ a%val(j) = a%val(j) * d(k)
+ end do
+ end do
+ else if (side == 'R') then
+ ! $ a parallel loop collapse(2) present(a, d)
+ !$acc parallel loop present(a, d)
+ do i = 1, nhacks
+ do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
+ a%val(j) = a%val(j) * d(a%ja(j))
+ end do
+ end do
+ end if
+ else
+ ! $ a parallel loop collapse(2) present(a, d)
+ !$acc parallel loop present(a, d)
+ do i = 1, nhacks
+ do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
+ a%val(j) = a%val(j) * d(j - a%hkoffs(i) + 1)
+ end do
+ end do
+ end if
+
+ call a%set_dev()
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_s_oacc_hll_scal
+end submodule psb_s_oacc_hll_scal_impl
diff --git a/openacc/impl/psb_s_oacc_hll_scals.F90 b/openacc/impl/psb_s_oacc_hll_scals.F90
new file mode 100644
index 00000000..360ea84d
--- /dev/null
+++ b/openacc/impl/psb_s_oacc_hll_scals.F90
@@ -0,0 +1,40 @@
+submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_scals_impl
+ use psb_base_mod
+contains
+ module subroutine psb_s_oacc_hll_scals(d, a, info)
+ implicit none
+ class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a
+ real(psb_spk_), intent(in) :: d
+ integer(psb_ipk_), intent(out) :: info
+
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 'scal'
+ integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks
+
+ info = psb_success_
+ call psb_erractionsave(err_act)
+
+ if (a%is_host()) call a%sync()
+
+ hksz = a%hksz
+ nhacks = (a%get_nrows() + hksz - 1) / hksz
+ nzt = a%nzt
+
+ ! $ a parallel loop collapse(2) present(a)
+ !$acc parallel loop present(a)
+ do i = 1, nhacks
+ do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
+ a%val(j) = a%val(j) * d
+ end do
+ end do
+
+ call a%set_dev()
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_s_oacc_hll_scals
+end submodule psb_s_oacc_hll_scals_impl
diff --git a/openacc/impl/psb_s_oacc_hll_vect_mv.F90 b/openacc/impl/psb_s_oacc_hll_vect_mv.F90
new file mode 100644
index 00000000..9d9e9197
--- /dev/null
+++ b/openacc/impl/psb_s_oacc_hll_vect_mv.F90
@@ -0,0 +1,67 @@
+submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_vect_mv_impl
+ use psb_base_mod
+contains
+ module subroutine psb_s_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans)
+ implicit none
+
+ real(psb_spk_), intent(in) :: alpha, beta
+ class(psb_s_oacc_hll_sparse_mat), intent(in) :: a
+ class(psb_s_base_vect_type), intent(inout) :: x, y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ integer(psb_ipk_) :: m, n, nhacks, hksz
+
+ info = psb_success_
+ m = a%get_nrows()
+ n = a%get_ncols()
+ nhacks = size(a%hkoffs) - 1
+ hksz = a%hksz
+
+ if ((n /= size(x%v)) .or. (m /= size(y%v))) then
+ write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
+ info = psb_err_invalid_mat_state_
+ return
+ end if
+
+ if (a%is_host()) call a%sync()
+ if (x%is_host()) call x%sync()
+ if (y%is_host()) call y%sync()
+
+ call inner_spmv(m, nhacks, hksz, alpha, a%val, a%ja, a%hkoffs, x%v, beta, y%v, info)
+ call y%set_dev()
+
+ contains
+
+ subroutine inner_spmv(m, nhacks, hksz, alpha, val, ja, hkoffs, x, beta, y, info)
+ implicit none
+ integer(psb_ipk_) :: m, nhacks, hksz
+ real(psb_spk_), intent(in) :: alpha, beta
+ real(psb_spk_) :: val(:), x(:), y(:)
+ integer(psb_ipk_) :: ja(:), hkoffs(:)
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: i, j, idx, k
+ real(psb_spk_) :: tmp
+
+ info = 0
+
+ !$acc parallel loop present(val, ja, hkoffs, x, y)
+ do i = 1, nhacks
+ do k = 0, hksz - 1
+ idx = hkoffs(i) + k
+ if (idx <= hkoffs(i + 1) - 1) then
+ tmp = 0.0_psb_dpk_
+ !$acc loop seq
+ do j = hkoffs(i) + k, hkoffs(i + 1) - 1, hksz
+ if (ja(j) > 0) then
+ tmp = tmp + val(j) * x(ja(j))
+ end if
+ end do
+ y(k + 1 + (i - 1) * hksz) = alpha * tmp + beta * y(k + 1 + (i - 1) * hksz)
+ end if
+ end do
+ end do
+ end subroutine inner_spmv
+
+ end subroutine psb_s_oacc_hll_vect_mv
+end submodule psb_s_oacc_hll_vect_mv_impl
diff --git a/openacc/impl/psb_z_oacc_ell_allocate_mnnz.F90 b/openacc/impl/psb_z_oacc_ell_allocate_mnnz.F90
new file mode 100644
index 00000000..48a5e202
--- /dev/null
+++ b/openacc/impl/psb_z_oacc_ell_allocate_mnnz.F90
@@ -0,0 +1,47 @@
+submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_allocate_mnnz_impl
+ use psb_base_mod
+contains
+ module subroutine psb_z_oacc_ell_allocate_mnnz(m, n, a, nz)
+ implicit none
+ integer(psb_ipk_), intent(in) :: m, n
+ class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_), intent(in), optional :: nz
+ integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: err_act, nz_
+ character(len=20) :: name='allocate_mnnz'
+ logical, parameter :: debug=.false.
+
+ call psb_erractionsave(err_act)
+ info = psb_success_
+
+ if (present(nz)) then
+ nz_ = nz
+ else
+ nz_ = 10
+ end if
+
+ call a%psb_z_ell_sparse_mat%allocate(m, n, nz_)
+
+ if (.not.allocated(a%val)) then
+ allocate(a%val(m, nz_))
+ allocate(a%ja(m, nz_))
+ allocate(a%irn(m))
+ allocate(a%idiag(m))
+ end if
+
+ a%val = zzero
+ a%ja = -1
+ a%irn = 0
+ a%idiag = 0
+
+ call a%set_dev()
+ if (info /= 0) goto 9999
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_z_oacc_ell_allocate_mnnz
+end submodule psb_z_oacc_ell_allocate_mnnz_impl
diff --git a/openacc/impl/psb_z_oacc_ell_cp_from_coo.F90 b/openacc/impl/psb_z_oacc_ell_cp_from_coo.F90
new file mode 100644
index 00000000..e4d3b731
--- /dev/null
+++ b/openacc/impl/psb_z_oacc_ell_cp_from_coo.F90
@@ -0,0 +1,78 @@
+submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_cp_from_coo_impl
+ use psb_base_mod
+contains
+ module subroutine psb_z_oacc_ell_cp_from_coo(a, b, info)
+ implicit none
+
+ class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a
+ class(psb_z_coo_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ integer(psb_ipk_) :: i, j, k, row, col, nz_per_row
+ complex(psb_dpk_) :: value
+ integer(psb_ipk_), allocatable :: row_counts(:)
+ integer(psb_ipk_) :: hacksize, nza
+
+ info = psb_success_
+ hacksize = 1
+
+ call a%set_nrows(b%get_nrows())
+ call a%set_ncols(b%get_ncols())
+ nz_per_row = a%nzt
+
+ if (.not.allocated(a%val)) then
+ allocate(a%val(a%get_nrows(), nz_per_row))
+ allocate(a%ja(a%get_nrows(), nz_per_row))
+ allocate(a%irn(a%get_nrows()))
+ allocate(a%idiag(a%get_nrows()))
+ end if
+ a%val = zzero
+ a%ja = -1
+ a%irn = 0
+ a%idiag = 0
+
+ allocate(row_counts(a%get_nrows()))
+ row_counts = 0
+
+ nza = b%get_nzeros()
+
+ !$acc parallel loop present(b, a, row_counts)
+ do k = 1, nza
+ row = b%ia(k)
+ col = b%ja(k)
+ value = b%val(k)
+ if (row_counts(row) < nz_per_row) then
+ a%val(row, row_counts(row) + 1) = value
+ a%ja(row, row_counts(row) + 1) = col
+ row_counts(row) = row_counts(row) + 1
+ else
+ info = psb_err_invalid_mat_state_
+ !goto 9999
+ end if
+ end do
+
+ a%irn = row_counts
+
+ !$acc parallel loop present(a)
+ do i = 1, a%get_nrows()
+ do j = 1, nz_per_row
+ if (a%ja(i, j) == i) then
+ a%idiag(i) = j
+ exit
+ end if
+ end do
+ end do
+
+ deallocate(row_counts)
+
+ call a%set_dev()
+ if (info /= 0) goto 9999
+
+ return
+
+9999 continue
+ info = psb_err_alloc_dealloc_
+ return
+
+ end subroutine psb_z_oacc_ell_cp_from_coo
+end submodule psb_z_oacc_ell_cp_from_coo_impl
diff --git a/openacc/impl/psb_z_oacc_ell_cp_from_fmt.F90 b/openacc/impl/psb_z_oacc_ell_cp_from_fmt.F90
new file mode 100644
index 00000000..98404ae2
--- /dev/null
+++ b/openacc/impl/psb_z_oacc_ell_cp_from_fmt.F90
@@ -0,0 +1,24 @@
+submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_cp_from_fmt_impl
+ use psb_base_mod
+contains
+ module subroutine psb_z_oacc_ell_cp_from_fmt(a, b, info)
+ implicit none
+
+ class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a
+ class(psb_z_base_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ info = psb_success_
+
+ select type(b)
+ type is (psb_z_coo_sparse_mat)
+ call a%cp_from_coo(b, info)
+ class default
+ call a%psb_z_ell_sparse_mat%cp_from_fmt(b, info)
+ if (info /= 0) return
+
+ !$acc update device(a%val, a%ja, a%irn, a%idiag)
+ end select
+
+ end subroutine psb_z_oacc_ell_cp_from_fmt
+end submodule psb_z_oacc_ell_cp_from_fmt_impl
diff --git a/openacc/impl/psb_z_oacc_ell_csmm.F90 b/openacc/impl/psb_z_oacc_ell_csmm.F90
new file mode 100644
index 00000000..406ca8c5
--- /dev/null
+++ b/openacc/impl/psb_z_oacc_ell_csmm.F90
@@ -0,0 +1,86 @@
+submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_csmm_impl
+ use psb_base_mod
+contains
+ module subroutine psb_z_oacc_ell_csmm(alpha, a, x, beta, y, info, trans)
+ implicit none
+ class(psb_z_oacc_ell_sparse_mat), intent(in) :: a
+ complex(psb_dpk_), intent(in) :: alpha, beta
+ complex(psb_dpk_), intent(in) :: x(:,:)
+ complex(psb_dpk_), intent(inout) :: y(:,:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ character :: trans_
+ integer(psb_ipk_) :: i, j, m, n, k, nxy, nzt
+ logical :: tra
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 'z_oacc_ell_csmm'
+ logical, parameter :: debug = .false.
+
+ info = psb_success_
+ call psb_erractionsave(err_act)
+
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+
+ if (.not.a%is_asb()) then
+ info = psb_err_invalid_mat_state_
+ call psb_errpush(info, name)
+ goto 9999
+ endif
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
+
+ if (tra) then
+ m = a%get_ncols()
+ n = a%get_nrows()
+ else
+ n = a%get_ncols()
+ m = a%get_nrows()
+ end if
+
+ if (size(x,1) < n) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (size(y,1) < m) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (tra) then
+ call a%psb_z_ell_sparse_mat%spmm(alpha, x, beta, y, info, trans)
+ else
+ nxy = min(size(x,2), size(y,2))
+ nzt = a%nzt
+
+ !$acc parallel loop collapse(2) present(a, x, y)
+ do j = 1, nxy
+ do i = 1, m
+ y(i,j) = beta * y(i,j)
+ end do
+ end do
+
+ !$acc parallel loop collapse(2) present(a, x, y)
+ do j = 1, nxy
+ do i = 1, n
+ do k = 1, nzt
+ y(i, j) = y(i, j) + alpha * a%val(i, k) * x(a%ja(i, k), j)
+ end do
+ end do
+ end do
+ endif
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_z_oacc_ell_csmm
+end submodule psb_z_oacc_ell_csmm_impl
diff --git a/openacc/impl/psb_z_oacc_ell_csmv.F90 b/openacc/impl/psb_z_oacc_ell_csmv.F90
new file mode 100644
index 00000000..502dd4f1
--- /dev/null
+++ b/openacc/impl/psb_z_oacc_ell_csmv.F90
@@ -0,0 +1,82 @@
+submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_csmv_impl
+ use psb_base_mod
+contains
+ module subroutine psb_z_oacc_ell_csmv(alpha, a, x, beta, y, info, trans)
+ implicit none
+ class(psb_z_oacc_ell_sparse_mat), intent(in) :: a
+ complex(psb_dpk_), intent(in) :: alpha, beta
+ complex(psb_dpk_), intent(in) :: x(:)
+ complex(psb_dpk_), intent(inout) :: y(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ character :: trans_
+ integer(psb_ipk_) :: i, j, m, n, nzt
+ logical :: tra
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 'z_oacc_ell_csmv'
+ logical, parameter :: debug = .false.
+
+ call psb_erractionsave(err_act)
+ info = psb_success_
+
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+
+ if (.not.a%is_asb()) then
+ info = psb_err_invalid_mat_state_
+ call psb_errpush(info, name)
+ goto 9999
+ endif
+
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
+
+ if (tra) then
+ m = a%get_ncols()
+ n = a%get_nrows()
+ else
+ n = a%get_ncols()
+ m = a%get_nrows()
+ end if
+
+ if (size(x,1) < n) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (size(y,1) < m) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (tra) then
+ call a%psb_z_ell_sparse_mat%spmm(alpha, x, beta, y, info, trans)
+ else
+ nzt = a%nzt
+
+ !$acc parallel loop present(a, x, y)
+ do i = 1, m
+ y(i) = beta * y(i)
+ end do
+
+ !$acc parallel loop present(a, x, y)
+ do i = 1, m
+ do j = 1, nzt
+ y(i) = y(i) + alpha * a%val(i, j) * x(a%ja(i, j))
+ end do
+ end do
+ endif
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_z_oacc_ell_csmv
+end submodule psb_z_oacc_ell_csmv_impl
diff --git a/openacc/impl/psb_z_oacc_ell_inner_vect_sv.F90 b/openacc/impl/psb_z_oacc_ell_inner_vect_sv.F90
new file mode 100644
index 00000000..f445a6b4
--- /dev/null
+++ b/openacc/impl/psb_z_oacc_ell_inner_vect_sv.F90
@@ -0,0 +1,85 @@
+submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_inner_vect_sv_impl
+ use psb_base_mod
+contains
+ module subroutine psb_z_oacc_ell_inner_vect_sv(alpha, a, x, beta, y, info, trans)
+ implicit none
+ class(psb_z_oacc_ell_sparse_mat), intent(in) :: a
+ complex(psb_dpk_), intent(in) :: alpha, beta
+ class(psb_z_base_vect_type), intent(inout) :: x, y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ complex(psb_dpk_), allocatable :: rx(:), ry(:)
+ logical :: tra
+ character :: trans_
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 'z_oacc_ell_inner_vect_sv'
+ logical, parameter :: debug = .false.
+ integer(psb_ipk_) :: i, j, nzt
+
+ call psb_get_erraction(err_act)
+ info = psb_success_
+
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+
+ if (.not.a%is_asb()) then
+ info = psb_err_invalid_mat_state_
+ call psb_errpush(info, name)
+ goto 9999
+ endif
+
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
+
+ if (tra .or. (beta /= dzero)) then
+ call x%sync()
+ call y%sync()
+ call a%psb_z_ell_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans)
+ call y%set_host()
+ else
+ select type (xx => x)
+ type is (psb_z_vect_oacc)
+ select type(yy => y)
+ type is (psb_z_vect_oacc)
+ if (xx%is_host()) call xx%sync()
+ if (beta /= dzero) then
+ if (yy%is_host()) call yy%sync()
+ end if
+ nzt = a%nzt
+ !$acc parallel loop present(a, xx, yy)
+ do i = 1, size(a%val, 1)
+ do j = 1, nzt
+ yy%v(i) = alpha * a%val(i, j) * xx%v(a%ja(i, j)) + beta * yy%v(i)
+ end do
+ end do
+ call yy%set_dev()
+ class default
+ rx = xx%get_vect()
+ ry = y%get_vect()
+ call a%psb_z_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
+ call y%bld(ry)
+ end select
+ class default
+ rx = x%get_vect()
+ ry = y%get_vect()
+ call a%psb_z_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
+ call y%bld(ry)
+ end select
+ endif
+
+ if (info /= psb_success_) then
+ info = psb_err_from_subroutine_
+ call psb_errpush(info, name, a_err = 'ell_vect_sv')
+ goto 9999
+ endif
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+ end subroutine psb_z_oacc_ell_inner_vect_sv
+end submodule psb_z_oacc_ell_inner_vect_sv_impl
diff --git a/openacc/impl/psb_z_oacc_ell_mold.F90 b/openacc/impl/psb_z_oacc_ell_mold.F90
new file mode 100644
index 00000000..fcc222de
--- /dev/null
+++ b/openacc/impl/psb_z_oacc_ell_mold.F90
@@ -0,0 +1,34 @@
+submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_mold_impl
+ use psb_base_mod
+contains
+ module subroutine psb_z_oacc_ell_mold(a, b, info)
+ implicit none
+ class(psb_z_oacc_ell_sparse_mat), intent(in) :: a
+ class(psb_z_base_sparse_mat), intent(inout), allocatable :: b
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 'ell_mold'
+ logical, parameter :: debug = .false.
+
+ call psb_get_erraction(err_act)
+
+ info = 0
+ if (allocated(b)) then
+ call b%free()
+ deallocate(b, stat=info)
+ end if
+ if (info == 0) allocate(psb_z_oacc_ell_sparse_mat :: b, stat=info)
+
+ if (info /= psb_success_) then
+ info = psb_err_alloc_dealloc_
+ call psb_errpush(info, name)
+ goto 9999
+ end if
+ return
+
+9999 call psb_error_handler(err_act)
+
+ return
+
+ end subroutine psb_z_oacc_ell_mold
+end submodule psb_z_oacc_ell_mold_impl
diff --git a/openacc/impl/psb_z_oacc_ell_mv_from_coo.F90 b/openacc/impl/psb_z_oacc_ell_mv_from_coo.F90
new file mode 100644
index 00000000..26388e5e
--- /dev/null
+++ b/openacc/impl/psb_z_oacc_ell_mv_from_coo.F90
@@ -0,0 +1,25 @@
+submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_mv_from_coo_impl
+ use psb_base_mod
+contains
+ module subroutine psb_z_oacc_ell_mv_from_coo(a, b, info)
+ implicit none
+
+ class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a
+ class(psb_z_coo_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ info = psb_success_
+
+ call a%psb_z_ell_sparse_mat%mv_from_coo(b, info)
+ if (info /= 0) goto 9999
+
+ !$acc update device(a%val, a%ja, a%irn, a%idiag)
+
+ return
+
+9999 continue
+ info = psb_err_alloc_dealloc_
+ return
+
+ end subroutine psb_z_oacc_ell_mv_from_coo
+end submodule psb_z_oacc_ell_mv_from_coo_impl
diff --git a/openacc/impl/psb_z_oacc_ell_mv_from_fmt.F90 b/openacc/impl/psb_z_oacc_ell_mv_from_fmt.F90
new file mode 100644
index 00000000..e0f75828
--- /dev/null
+++ b/openacc/impl/psb_z_oacc_ell_mv_from_fmt.F90
@@ -0,0 +1,24 @@
+submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_mv_from_fmt_impl
+ use psb_base_mod
+contains
+ module subroutine psb_z_oacc_ell_mv_from_fmt(a, b, info)
+ implicit none
+
+ class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a
+ class(psb_z_base_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ info = psb_success_
+
+ select type(b)
+ type is (psb_z_coo_sparse_mat)
+ call a%mv_from_coo(b, info)
+ class default
+ call a%psb_z_ell_sparse_mat%mv_from_fmt(b, info)
+ if (info /= 0) return
+
+ !$acc update device(a%val, a%ja, a%irn, a%idiag)
+ end select
+
+ end subroutine psb_z_oacc_ell_mv_from_fmt
+end submodule psb_z_oacc_ell_mv_from_fmt_impl
diff --git a/openacc/impl/psb_z_oacc_ell_reallocate_nz.F90 b/openacc/impl/psb_z_oacc_ell_reallocate_nz.F90
new file mode 100644
index 00000000..8fd3ad77
--- /dev/null
+++ b/openacc/impl/psb_z_oacc_ell_reallocate_nz.F90
@@ -0,0 +1,28 @@
+submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_reallocate_nz_impl
+ use psb_base_mod
+contains
+ module subroutine psb_z_oacc_ell_reallocate_nz(nz, a)
+ implicit none
+ integer(psb_ipk_), intent(in) :: nz
+ class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name='z_oacc_ell_reallocate_nz'
+ logical, parameter :: debug=.false.
+
+ call psb_erractionsave(err_act)
+ info = psb_success_
+
+ call a%psb_z_ell_sparse_mat%reallocate(nz)
+
+ call a%set_dev()
+ if (info /= 0) goto 9999
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_z_oacc_ell_reallocate_nz
+end submodule psb_z_oacc_ell_reallocate_nz_impl
diff --git a/openacc/impl/psb_z_oacc_ell_scal.F90 b/openacc/impl/psb_z_oacc_ell_scal.F90
new file mode 100644
index 00000000..e3d25ccb
--- /dev/null
+++ b/openacc/impl/psb_z_oacc_ell_scal.F90
@@ -0,0 +1,58 @@
+submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_scal_impl
+ use psb_base_mod
+contains
+ module subroutine psb_z_oacc_ell_scal(d, a, info, side)
+ implicit none
+ class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a
+ complex(psb_dpk_), intent(in) :: d(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, intent(in), optional :: side
+
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name='scal'
+ logical, parameter :: debug=.false.
+ integer(psb_ipk_) :: i, j, m, nzt
+
+ info = psb_success_
+ call psb_erractionsave(err_act)
+
+ if (a%is_host()) call a%sync()
+
+ m = a%get_nrows()
+ nzt = a%nzt
+
+ if (present(side)) then
+ if (side == 'L') then
+ !$acc parallel loop collapse(2) present(a, d)
+ do i = 1, m
+ do j = 1, nzt
+ a%val(i, j) = a%val(i, j) * d(i)
+ end do
+ end do
+ else if (side == 'R') then
+ !$acc parallel loop collapse(2) present(a, d)
+ do i = 1, m
+ do j = 1, nzt
+ a%val(i, j) = a%val(i, j) * d(a%ja(i, j))
+ end do
+ end do
+ end if
+ else
+ !$acc parallel loop collapse(2) present(a, d)
+ do i = 1, m
+ do j = 1, nzt
+ a%val(i, j) = a%val(i, j) * d(j)
+ end do
+ end do
+ end if
+
+ call a%set_dev()
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_z_oacc_ell_scal
+end submodule psb_z_oacc_ell_scal_impl
diff --git a/openacc/impl/psb_z_oacc_ell_scals.F90 b/openacc/impl/psb_z_oacc_ell_scals.F90
new file mode 100644
index 00000000..c382627a
--- /dev/null
+++ b/openacc/impl/psb_z_oacc_ell_scals.F90
@@ -0,0 +1,39 @@
+submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_scals_impl
+ use psb_base_mod
+contains
+ module subroutine psb_z_oacc_ell_scals(d, a, info)
+ implicit none
+ class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a
+ complex(psb_dpk_), intent(in) :: d
+ integer(psb_ipk_), intent(out) :: info
+
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name='scal'
+ logical, parameter :: debug=.false.
+ integer(psb_ipk_) :: i, j, nzt, m
+
+ info = psb_success_
+ call psb_erractionsave(err_act)
+
+ if (a%is_host()) call a%sync()
+
+ m = a%get_nrows()
+ nzt = a%nzt
+
+ !$acc parallel loop collapse(2) present(a)
+ do i = 1, m
+ do j = 1, nzt
+ a%val(i, j) = a%val(i, j) * d
+ end do
+ end do
+
+ call a%set_dev()
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_z_oacc_ell_scals
+end submodule psb_z_oacc_ell_scals_impl
diff --git a/openacc/impl/psb_z_oacc_ell_vect_mv.F90 b/openacc/impl/psb_z_oacc_ell_vect_mv.F90
new file mode 100644
index 00000000..ecb61adf
--- /dev/null
+++ b/openacc/impl/psb_z_oacc_ell_vect_mv.F90
@@ -0,0 +1,66 @@
+submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_vect_mv_impl
+ use psb_base_mod
+contains
+ module subroutine psb_z_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans)
+ implicit none
+
+ complex(psb_dpk_), intent(in) :: alpha, beta
+ class(psb_z_oacc_ell_sparse_mat), intent(in) :: a
+ class(psb_z_base_vect_type), intent(inout) :: x, y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ integer(psb_ipk_) :: m, n, nzt
+
+ info = psb_success_
+ m = a%get_nrows()
+ n = a%get_ncols()
+ nzt = a%nzt
+
+ if ((n /= size(x%v)) .or. (m /= size(y%v))) then
+ write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
+ info = psb_err_invalid_mat_state_
+ return
+ end if
+
+ if (a%is_host()) call a%sync()
+ if (x%is_host()) call x%sync()
+ if (y%is_host()) call y%sync()
+
+ call inner_spmv(m, n, nzt, alpha, a%val, a%ja, x%v, beta, y%v, info)
+ call y%set_dev()
+
+ contains
+
+ subroutine inner_spmv(m, n, nzt, alpha, val, ja, x, beta, y, info)
+ implicit none
+ integer(psb_ipk_) :: m, n, nzt
+ complex(psb_dpk_), intent(in) :: alpha, beta
+ complex(psb_dpk_) :: val(:,:), x(:), y(:)
+ integer(psb_ipk_) :: ja(:,:)
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: i, j, ii, isz
+ complex(psb_dpk_) :: tmp
+ integer(psb_ipk_), parameter :: vsz = 256
+
+ info = 0
+
+ !$acc parallel loop vector_length(vsz) private(isz)
+ do ii = 1, m, vsz
+ isz = min(vsz, m - ii + 1)
+ !$acc loop independent private(tmp)
+ do i = ii, ii + isz - 1
+ tmp = 0.0_psb_dpk_
+ !$acc loop seq
+ do j = 1, nzt
+ if (ja(i,j) > 0) then
+ tmp = tmp + val(i,j) * x(ja(i,j))
+ end if
+ end do
+ y(i) = alpha * tmp + beta * y(i)
+ end do
+ end do
+ end subroutine inner_spmv
+
+ end subroutine psb_z_oacc_ell_vect_mv
+end submodule psb_z_oacc_ell_vect_mv_impl
diff --git a/openacc/impl/psb_z_oacc_hll_allocate_mnnz.F90 b/openacc/impl/psb_z_oacc_hll_allocate_mnnz.F90
new file mode 100644
index 00000000..c398d259
--- /dev/null
+++ b/openacc/impl/psb_z_oacc_hll_allocate_mnnz.F90
@@ -0,0 +1,53 @@
+submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_allocate_mnnz_impl
+ use psb_base_mod
+contains
+ module subroutine psb_z_oacc_hll_allocate_mnnz(m, n, a, nz)
+ implicit none
+ integer(psb_ipk_), intent(in) :: m, n
+ class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_), intent(in), optional :: nz
+ integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: err_act, nz_
+ character(len=20) :: name='allocate_mnnz'
+ logical, parameter :: debug=.false.
+ integer(psb_ipk_) :: hksz, nhacks
+
+ call psb_erractionsave(err_act)
+ info = psb_success_
+
+ if (present(nz)) then
+ nz_ = nz
+ else
+ nz_ = 10
+ end if
+
+ call a%psb_z_hll_sparse_mat%allocate(m, n, nz_)
+
+ hksz = a%hksz
+ nhacks = (m + hksz - 1) / hksz
+
+ if (.not.allocated(a%val)) then
+ allocate(a%val(nz_ * m))
+ allocate(a%ja(nz_ * m))
+ allocate(a%irn(m))
+ allocate(a%idiag(m))
+ allocate(a%hkoffs(nhacks))
+ end if
+
+ a%val = zzero
+ a%ja = -1
+ a%irn = 0
+ a%idiag = 0
+ a%hkoffs = 0
+
+ call a%set_dev()
+ if (info /= 0) goto 9999
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_z_oacc_hll_allocate_mnnz
+end submodule psb_z_oacc_hll_allocate_mnnz_impl
diff --git a/openacc/impl/psb_z_oacc_hll_cp_from_coo.F90 b/openacc/impl/psb_z_oacc_hll_cp_from_coo.F90
new file mode 100644
index 00000000..62be2252
--- /dev/null
+++ b/openacc/impl/psb_z_oacc_hll_cp_from_coo.F90
@@ -0,0 +1,85 @@
+submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_cp_from_coo_impl
+ use psb_base_mod
+contains
+ module subroutine psb_z_oacc_hll_cp_from_coo(a, b, info)
+ implicit none
+
+ class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a
+ class(psb_z_coo_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ integer(psb_ipk_) :: i, j, k, row, col, nz_per_row
+ complex(psb_dpk_) :: value
+ integer(psb_ipk_), allocatable :: row_counts(:)
+ integer(psb_ipk_) :: hacksize, nza
+
+ info = psb_success_
+ hacksize = 32 ! Assuming a default hack size of 32
+
+ call a%set_nrows(b%get_nrows())
+ call a%set_ncols(b%get_ncols())
+ nz_per_row = a%nzt
+
+ if (.not.allocated(a%val)) then
+ allocate(a%val(nz_per_row * a%get_nrows()))
+ allocate(a%ja(nz_per_row * a%get_nrows()))
+ allocate(a%irn(a%get_nrows()))
+ allocate(a%idiag(a%get_nrows()))
+ allocate(a%hkoffs((a%get_nrows() + hacksize - 1) / hacksize))
+ end if
+ a%val = zzero
+ a%ja = -1
+ a%irn = 0
+ a%idiag = 0
+
+ allocate(row_counts(a%get_nrows()))
+ row_counts = 0
+
+ nza = b%get_nzeros()
+
+ !$acc parallel loop present(b, a, row_counts)
+ do k = 1, nza
+ row = b%ia(k)
+ col = b%ja(k)
+ value = b%val(k)
+ if (row_counts(row) < nz_per_row) then
+ a%val(row_counts(row) + 1 + (row - 1) * nz_per_row) = value
+ a%ja(row_counts(row) + 1 + (row - 1) * nz_per_row) = col
+ row_counts(row) = row_counts(row) + 1
+ else
+ info = psb_err_invalid_mat_state_
+ !goto 9999
+ end if
+ end do
+
+ a%irn = row_counts
+
+ !$acc parallel loop present(a)
+ do i = 1, a%get_nrows()
+ do j = 1, nz_per_row
+ if (a%ja(j + (i - 1) * nz_per_row) == i) then
+ a%idiag(i) = j
+ exit
+ end if
+ end do
+ end do
+
+ ! Calculate hkoffs for HLL format
+ !$acc parallel loop present(a)
+ do i = 1, size(a%hkoffs)
+ a%hkoffs(i) = (i - 1) * hacksize
+ end do
+
+ deallocate(row_counts)
+
+ call a%set_dev()
+ if (info /= 0) goto 9999
+
+ return
+
+9999 continue
+ info = psb_err_alloc_dealloc_
+ return
+
+ end subroutine psb_z_oacc_hll_cp_from_coo
+end submodule psb_z_oacc_hll_cp_from_coo_impl
diff --git a/openacc/impl/psb_z_oacc_hll_cp_from_fmt.F90 b/openacc/impl/psb_z_oacc_hll_cp_from_fmt.F90
new file mode 100644
index 00000000..f267e1c6
--- /dev/null
+++ b/openacc/impl/psb_z_oacc_hll_cp_from_fmt.F90
@@ -0,0 +1,24 @@
+submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_cp_from_fmt_impl
+ use psb_base_mod
+contains
+ module subroutine psb_z_oacc_hll_cp_from_fmt(a, b, info)
+ implicit none
+
+ class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a
+ class(psb_z_base_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ info = psb_success_
+
+ select type(b)
+ type is (psb_z_coo_sparse_mat)
+ call a%cp_from_coo(b, info)
+ class default
+ call a%psb_z_hll_sparse_mat%cp_from_fmt(b, info)
+ if (info /= 0) return
+
+ !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs)
+ end select
+
+ end subroutine psb_z_oacc_hll_cp_from_fmt
+end submodule psb_z_oacc_hll_cp_from_fmt_impl
diff --git a/openacc/impl/psb_z_oacc_hll_csmm.F90 b/openacc/impl/psb_z_oacc_hll_csmm.F90
new file mode 100644
index 00000000..3cfe5b32
--- /dev/null
+++ b/openacc/impl/psb_z_oacc_hll_csmm.F90
@@ -0,0 +1,86 @@
+submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_csmm_impl
+ use psb_base_mod
+contains
+ module subroutine psb_z_oacc_hll_csmm(alpha, a, x, beta, y, info, trans)
+ implicit none
+ class(psb_z_oacc_hll_sparse_mat), intent(in) :: a
+ complex(psb_dpk_), intent(in) :: alpha, beta
+ complex(psb_dpk_), intent(in) :: x(:,:)
+ complex(psb_dpk_), intent(inout) :: y(:,:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ character :: trans_
+ integer(psb_ipk_) :: i, j, m, n, k, nxy, nhacks
+ logical :: tra
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 'z_oacc_hll_csmm'
+ logical, parameter :: debug = .false.
+
+ info = psb_success_
+ call psb_erractionsave(err_act)
+
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+
+ if (.not.a%is_asb()) then
+ info = psb_err_invalid_mat_state_
+ call psb_errpush(info, name)
+ goto 9999
+ endif
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
+
+ if (tra) then
+ m = a%get_ncols()
+ n = a%get_nrows()
+ else
+ n = a%get_ncols()
+ m = a%get_nrows()
+ end if
+
+ if (size(x,1) < n) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (size(y,1) < m) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (tra) then
+ call a%psb_z_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans)
+ else
+ nxy = min(size(x,2), size(y,2))
+ nhacks = (a%get_nrows() + a%hksz - 1) / a%hksz
+
+ !$acc parallel loop collapse(2) present(a, x, y)
+ do j = 1, nxy
+ do i = 1, m
+ y(i,j) = beta * y(i,j)
+ end do
+ end do
+
+ !$acc parallel loop present(a, x, y)
+ do j = 1, nxy
+ do k = 1, nhacks
+ do i = a%hkoffs(k), a%hkoffs(k + 1) - 1
+ y(a%irn(i), j) = y(a%irn(i), j) + alpha * a%val(i) * x(a%ja(i), j)
+ end do
+ end do
+ end do
+ endif
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_z_oacc_hll_csmm
+end submodule psb_z_oacc_hll_csmm_impl
diff --git a/openacc/impl/psb_z_oacc_hll_csmv.F90 b/openacc/impl/psb_z_oacc_hll_csmv.F90
new file mode 100644
index 00000000..923bc061
--- /dev/null
+++ b/openacc/impl/psb_z_oacc_hll_csmv.F90
@@ -0,0 +1,84 @@
+submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_csmv_impl
+ use psb_base_mod
+contains
+ module subroutine psb_z_oacc_hll_csmv(alpha, a, x, beta, y, info, trans)
+ implicit none
+ class(psb_z_oacc_hll_sparse_mat), intent(in) :: a
+ complex(psb_dpk_), intent(in) :: alpha, beta
+ complex(psb_dpk_), intent(in) :: x(:)
+ complex(psb_dpk_), intent(inout) :: y(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ character :: trans_
+ integer(psb_ipk_) :: i, j, m, n, hksz, nhacks
+ logical :: tra
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 'z_oacc_hll_csmv'
+ logical, parameter :: debug = .false.
+
+ call psb_erractionsave(err_act)
+ info = psb_success_
+
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+
+ if (.not.a%is_asb()) then
+ info = psb_err_invalid_mat_state_
+ call psb_errpush(info, name)
+ goto 9999
+ endif
+
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
+
+ if (tra) then
+ m = a%get_ncols()
+ n = a%get_nrows()
+ else
+ n = a%get_ncols()
+ m = a%get_nrows()
+ end if
+
+ if (size(x,1) < n) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (size(y,1) < m) then
+ info = 36
+ call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
+ goto 9999
+ end if
+
+ if (tra) then
+ call a%psb_z_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans)
+ else
+ hksz = a%hksz
+ nhacks = (a%get_nrows() + hksz - 1) / hksz
+
+ !$acc parallel loop present(a, x, y)
+ do i = 1, m
+ y(i) = beta * y(i)
+ end do
+ ! This loop nest cannot be run with collapse, since
+ ! the inner loop extent varies.
+ !$acc parallel loop present(a, x, y)
+ do i = 1, nhacks
+ do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
+ y(a%irn(j)) = y(a%irn(j)) + alpha * a%val(j) * x(a%ja(j))
+ end do
+ end do
+ endif
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_z_oacc_hll_csmv
+end submodule psb_z_oacc_hll_csmv_impl
diff --git a/openacc/impl/psb_z_oacc_hll_inner_vect_sv.F90 b/openacc/impl/psb_z_oacc_hll_inner_vect_sv.F90
new file mode 100644
index 00000000..1d068542
--- /dev/null
+++ b/openacc/impl/psb_z_oacc_hll_inner_vect_sv.F90
@@ -0,0 +1,86 @@
+submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_inner_vect_sv_impl
+ use psb_base_mod
+contains
+ module subroutine psb_z_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans)
+ implicit none
+ class(psb_z_oacc_hll_sparse_mat), intent(in) :: a
+ complex(psb_dpk_), intent(in) :: alpha, beta
+ class(psb_z_base_vect_type), intent(inout) :: x, y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ complex(psb_dpk_), allocatable :: rx(:), ry(:)
+ logical :: tra
+ character :: trans_
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 'z_oacc_hll_inner_vect_sv'
+ logical, parameter :: debug = .false.
+ integer(psb_ipk_) :: i, j, nhacks, hksz
+
+ call psb_get_erraction(err_act)
+ info = psb_success_
+
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+
+ if (.not.a%is_asb()) then
+ info = psb_err_invalid_mat_state_
+ call psb_errpush(info, name)
+ goto 9999
+ endif
+
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
+
+ if (tra .or. (beta /= dzero)) then
+ call x%sync()
+ call y%sync()
+ call a%psb_z_hll_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans)
+ call y%set_host()
+ else
+ select type (xx => x)
+ type is (psb_z_vect_oacc)
+ select type(yy => y)
+ type is (psb_z_vect_oacc)
+ if (xx%is_host()) call xx%sync()
+ if (beta /= dzero) then
+ if (yy%is_host()) call yy%sync()
+ end if
+ nhacks = size(a%hkoffs) - 1
+ hksz = a%hksz
+ !$acc parallel loop present(a, xx, yy)
+ do i = 1, nhacks
+ do j = a%hkoffs(i), a%hkoffs(i+1) - 1
+ yy%v(a%irn(j)) = alpha * a%val(j) * xx%v(a%ja(j)) + beta * yy%v(a%irn(j))
+ end do
+ end do
+ call yy%set_dev()
+ class default
+ rx = xx%get_vect()
+ ry = y%get_vect()
+ call a%psb_z_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
+ call y%bld(ry)
+ end select
+ class default
+ rx = x%get_vect()
+ ry = y%get_vect()
+ call a%psb_z_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
+ call y%bld(ry)
+ end select
+ endif
+
+ if (info /= psb_success_) then
+ info = psb_err_from_subroutine_
+ call psb_errpush(info, name, a_err = 'hll_vect_sv')
+ goto 9999
+ endif
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+ end subroutine psb_z_oacc_hll_inner_vect_sv
+end submodule psb_z_oacc_hll_inner_vect_sv_impl
diff --git a/openacc/impl/psb_z_oacc_hll_mold.F90 b/openacc/impl/psb_z_oacc_hll_mold.F90
new file mode 100644
index 00000000..f614ad89
--- /dev/null
+++ b/openacc/impl/psb_z_oacc_hll_mold.F90
@@ -0,0 +1,34 @@
+submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_mold_impl
+ use psb_base_mod
+contains
+ module subroutine psb_z_oacc_hll_mold(a, b, info)
+ implicit none
+ class(psb_z_oacc_hll_sparse_mat), intent(in) :: a
+ class(psb_z_base_sparse_mat), intent(inout), allocatable :: b
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 'hll_mold'
+ logical, parameter :: debug = .false.
+
+ call psb_get_erraction(err_act)
+
+ info = 0
+ if (allocated(b)) then
+ call b%free()
+ deallocate(b, stat=info)
+ end if
+ if (info == 0) allocate(psb_z_oacc_hll_sparse_mat :: b, stat=info)
+
+ if (info /= psb_success_) then
+ info = psb_err_alloc_dealloc_
+ call psb_errpush(info, name)
+ goto 9999
+ end if
+ return
+
+9999 call psb_error_handler(err_act)
+
+ return
+
+ end subroutine psb_z_oacc_hll_mold
+end submodule psb_z_oacc_hll_mold_impl
diff --git a/openacc/impl/psb_z_oacc_hll_mv_from_coo.F90 b/openacc/impl/psb_z_oacc_hll_mv_from_coo.F90
new file mode 100644
index 00000000..2ff574d3
--- /dev/null
+++ b/openacc/impl/psb_z_oacc_hll_mv_from_coo.F90
@@ -0,0 +1,25 @@
+submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_mv_from_coo_impl
+ use psb_base_mod
+contains
+ module subroutine psb_z_oacc_hll_mv_from_coo(a, b, info)
+ implicit none
+
+ class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a
+ class(psb_z_coo_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ info = psb_success_
+
+ call a%psb_z_hll_sparse_mat%mv_from_coo(b, info)
+ if (info /= 0) goto 9999
+
+ !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs)
+
+ return
+
+9999 continue
+ info = psb_err_alloc_dealloc_
+ return
+
+ end subroutine psb_z_oacc_hll_mv_from_coo
+end submodule psb_z_oacc_hll_mv_from_coo_impl
diff --git a/openacc/impl/psb_z_oacc_hll_mv_from_fmt.F90 b/openacc/impl/psb_z_oacc_hll_mv_from_fmt.F90
new file mode 100644
index 00000000..5fa00e38
--- /dev/null
+++ b/openacc/impl/psb_z_oacc_hll_mv_from_fmt.F90
@@ -0,0 +1,24 @@
+submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_mv_from_fmt_impl
+ use psb_base_mod
+contains
+ module subroutine psb_z_oacc_hll_mv_from_fmt(a, b, info)
+ implicit none
+
+ class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a
+ class(psb_z_base_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+
+ info = psb_success_
+
+ select type(b)
+ type is (psb_z_coo_sparse_mat)
+ call a%mv_from_coo(b, info)
+ class default
+ call a%psb_z_hll_sparse_mat%mv_from_fmt(b, info)
+ if (info /= 0) return
+
+ !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs)
+ end select
+
+ end subroutine psb_z_oacc_hll_mv_from_fmt
+end submodule psb_z_oacc_hll_mv_from_fmt_impl
diff --git a/openacc/impl/psb_z_oacc_hll_reallocate_nz.F90 b/openacc/impl/psb_z_oacc_hll_reallocate_nz.F90
new file mode 100644
index 00000000..5b49efe5
--- /dev/null
+++ b/openacc/impl/psb_z_oacc_hll_reallocate_nz.F90
@@ -0,0 +1,29 @@
+submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_reallocate_nz_impl
+ use psb_base_mod
+contains
+ module subroutine psb_z_oacc_hll_reallocate_nz(nz, a)
+ implicit none
+ integer(psb_ipk_), intent(in) :: nz
+ class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name='z_oacc_hll_reallocate_nz'
+ logical, parameter :: debug=.false.
+ integer(psb_ipk_) :: hksz, nhacks
+
+ call psb_erractionsave(err_act)
+ info = psb_success_
+
+ call a%psb_z_hll_sparse_mat%reallocate(nz)
+
+ call a%set_dev()
+ if (info /= 0) goto 9999
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_z_oacc_hll_reallocate_nz
+end submodule psb_z_oacc_hll_reallocate_nz_impl
diff --git a/openacc/impl/psb_z_oacc_hll_scal.F90 b/openacc/impl/psb_z_oacc_hll_scal.F90
new file mode 100644
index 00000000..a2f9aee7
--- /dev/null
+++ b/openacc/impl/psb_z_oacc_hll_scal.F90
@@ -0,0 +1,62 @@
+submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_scal_impl
+ use psb_base_mod
+contains
+ module subroutine psb_z_oacc_hll_scal(d, a, info, side)
+ implicit none
+ class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a
+ complex(psb_dpk_), intent(in) :: d(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, intent(in), optional :: side
+
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 'scal'
+ integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks
+
+ info = psb_success_
+ call psb_erractionsave(err_act)
+
+ if (a%is_host()) call a%sync()
+
+ hksz = a%hksz
+ nhacks = (a%get_nrows() + hksz - 1) / hksz
+ nzt = a%nzt
+
+ if (present(side)) then
+ if (side == 'L') then
+ ! $ a parallel loop collapse(2) present(a, d)
+ !$acc parallel loop present(a, d)
+ do i = 1, nhacks
+ do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
+ k = (j - a%hkoffs(i)) / nzt + (i - 1) * hksz + 1
+ a%val(j) = a%val(j) * d(k)
+ end do
+ end do
+ else if (side == 'R') then
+ ! $ a parallel loop collapse(2) present(a, d)
+ !$acc parallel loop present(a, d)
+ do i = 1, nhacks
+ do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
+ a%val(j) = a%val(j) * d(a%ja(j))
+ end do
+ end do
+ end if
+ else
+ ! $ a parallel loop collapse(2) present(a, d)
+ !$acc parallel loop present(a, d)
+ do i = 1, nhacks
+ do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
+ a%val(j) = a%val(j) * d(j - a%hkoffs(i) + 1)
+ end do
+ end do
+ end if
+
+ call a%set_dev()
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_z_oacc_hll_scal
+end submodule psb_z_oacc_hll_scal_impl
diff --git a/openacc/impl/psb_z_oacc_hll_scals.F90 b/openacc/impl/psb_z_oacc_hll_scals.F90
new file mode 100644
index 00000000..888115cd
--- /dev/null
+++ b/openacc/impl/psb_z_oacc_hll_scals.F90
@@ -0,0 +1,40 @@
+submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_scals_impl
+ use psb_base_mod
+contains
+ module subroutine psb_z_oacc_hll_scals(d, a, info)
+ implicit none
+ class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a
+ complex(psb_dpk_), intent(in) :: d
+ integer(psb_ipk_), intent(out) :: info
+
+ integer(psb_ipk_) :: err_act
+ character(len=20) :: name = 'scal'
+ integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks
+
+ info = psb_success_
+ call psb_erractionsave(err_act)
+
+ if (a%is_host()) call a%sync()
+
+ hksz = a%hksz
+ nhacks = (a%get_nrows() + hksz - 1) / hksz
+ nzt = a%nzt
+
+ ! $ a parallel loop collapse(2) present(a)
+ !$acc parallel loop present(a)
+ do i = 1, nhacks
+ do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
+ a%val(j) = a%val(j) * d
+ end do
+ end do
+
+ call a%set_dev()
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(err_act)
+ return
+
+ end subroutine psb_z_oacc_hll_scals
+end submodule psb_z_oacc_hll_scals_impl
diff --git a/openacc/impl/psb_z_oacc_hll_vect_mv.F90 b/openacc/impl/psb_z_oacc_hll_vect_mv.F90
new file mode 100644
index 00000000..89d970c0
--- /dev/null
+++ b/openacc/impl/psb_z_oacc_hll_vect_mv.F90
@@ -0,0 +1,67 @@
+submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_vect_mv_impl
+ use psb_base_mod
+contains
+ module subroutine psb_z_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans)
+ implicit none
+
+ complex(psb_dpk_), intent(in) :: alpha, beta
+ class(psb_z_oacc_hll_sparse_mat), intent(in) :: a
+ class(psb_z_base_vect_type), intent(inout) :: x, y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+
+ integer(psb_ipk_) :: m, n, nhacks, hksz
+
+ info = psb_success_
+ m = a%get_nrows()
+ n = a%get_ncols()
+ nhacks = size(a%hkoffs) - 1
+ hksz = a%hksz
+
+ if ((n /= size(x%v)) .or. (m /= size(y%v))) then
+ write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
+ info = psb_err_invalid_mat_state_
+ return
+ end if
+
+ if (a%is_host()) call a%sync()
+ if (x%is_host()) call x%sync()
+ if (y%is_host()) call y%sync()
+
+ call inner_spmv(m, nhacks, hksz, alpha, a%val, a%ja, a%hkoffs, x%v, beta, y%v, info)
+ call y%set_dev()
+
+ contains
+
+ subroutine inner_spmv(m, nhacks, hksz, alpha, val, ja, hkoffs, x, beta, y, info)
+ implicit none
+ integer(psb_ipk_) :: m, nhacks, hksz
+ complex(psb_dpk_), intent(in) :: alpha, beta
+ complex(psb_dpk_) :: val(:), x(:), y(:)
+ integer(psb_ipk_) :: ja(:), hkoffs(:)
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_) :: i, j, idx, k
+ complex(psb_dpk_) :: tmp
+
+ info = 0
+
+ !$acc parallel loop present(val, ja, hkoffs, x, y)
+ do i = 1, nhacks
+ do k = 0, hksz - 1
+ idx = hkoffs(i) + k
+ if (idx <= hkoffs(i + 1) - 1) then
+ tmp = 0.0_psb_dpk_
+ !$acc loop seq
+ do j = hkoffs(i) + k, hkoffs(i + 1) - 1, hksz
+ if (ja(j) > 0) then
+ tmp = tmp + val(j) * x(ja(j))
+ end if
+ end do
+ y(k + 1 + (i - 1) * hksz) = alpha * tmp + beta * y(k + 1 + (i - 1) * hksz)
+ end if
+ end do
+ end do
+ end subroutine inner_spmv
+
+ end subroutine psb_z_oacc_hll_vect_mv
+end submodule psb_z_oacc_hll_vect_mv_impl
diff --git a/openacc/psb_c_oacc_ell_mat_mod.F90 b/openacc/psb_c_oacc_ell_mat_mod.F90
new file mode 100644
index 00000000..102d41c5
--- /dev/null
+++ b/openacc/psb_c_oacc_ell_mat_mod.F90
@@ -0,0 +1,341 @@
+module psb_c_oacc_ell_mat_mod
+ use iso_c_binding
+ use psb_c_mat_mod
+ use psb_c_ell_mat_mod
+ use psb_c_oacc_vect_mod
+
+ integer(psb_ipk_), parameter, private :: is_host = -1
+ integer(psb_ipk_), parameter, private :: is_sync = 0
+ integer(psb_ipk_), parameter, private :: is_dev = 1
+
+ type, extends(psb_c_ell_sparse_mat) :: psb_c_oacc_ell_sparse_mat
+ integer(psb_ipk_) :: devstate = is_host
+ contains
+ procedure, nopass :: get_fmt => c_oacc_ell_get_fmt
+ procedure, pass(a) :: sizeof => c_oacc_ell_sizeof
+ procedure, pass(a) :: is_host => c_oacc_ell_is_host
+ procedure, pass(a) :: is_sync => c_oacc_ell_is_sync
+ procedure, pass(a) :: is_dev => c_oacc_ell_is_dev
+ procedure, pass(a) :: set_host => c_oacc_ell_set_host
+ procedure, pass(a) :: set_sync => c_oacc_ell_set_sync
+ procedure, pass(a) :: set_dev => c_oacc_ell_set_dev
+ procedure, pass(a) :: sync_space => c_oacc_ell_sync_space
+ procedure, pass(a) :: sync => c_oacc_ell_sync
+ procedure, pass(a) :: free => c_oacc_ell_free
+ procedure, pass(a) :: vect_mv => psb_c_oacc_ell_vect_mv
+ procedure, pass(a) :: in_vect_sv => psb_c_oacc_ell_inner_vect_sv
+ procedure, pass(a) :: csmm => psb_c_oacc_ell_csmm
+ procedure, pass(a) :: csmv => psb_c_oacc_ell_csmv
+ procedure, pass(a) :: scals => psb_c_oacc_ell_scals
+ procedure, pass(a) :: scalv => psb_c_oacc_ell_scal
+ procedure, pass(a) :: reallocate_nz => psb_c_oacc_ell_reallocate_nz
+ procedure, pass(a) :: allocate_mnnz => psb_c_oacc_ell_allocate_mnnz
+ procedure, pass(a) :: cp_from_coo => psb_c_oacc_ell_cp_from_coo
+ procedure, pass(a) :: cp_from_fmt => psb_c_oacc_ell_cp_from_fmt
+ procedure, pass(a) :: mv_from_coo => psb_c_oacc_ell_mv_from_coo
+ procedure, pass(a) :: mv_from_fmt => psb_c_oacc_ell_mv_from_fmt
+ procedure, pass(a) :: mold => psb_c_oacc_ell_mold
+
+ end type psb_c_oacc_ell_sparse_mat
+
+ interface
+ module subroutine psb_c_oacc_ell_mold(a,b,info)
+ class(psb_c_oacc_ell_sparse_mat), intent(in) :: a
+ class(psb_c_base_sparse_mat), intent(inout), allocatable :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_c_oacc_ell_mold
+ end interface
+
+ interface
+ module subroutine psb_c_oacc_ell_cp_from_fmt(a,b,info)
+ class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
+ class(psb_c_base_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_c_oacc_ell_cp_from_fmt
+ end interface
+
+ interface
+ module subroutine psb_c_oacc_ell_mv_from_coo(a,b,info)
+ class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
+ class(psb_c_coo_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_c_oacc_ell_mv_from_coo
+ end interface
+
+ interface
+ module subroutine psb_c_oacc_ell_mv_from_fmt(a,b,info)
+ class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
+ class(psb_c_base_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_c_oacc_ell_mv_from_fmt
+ end interface
+
+ interface
+ module subroutine psb_c_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans)
+ class(psb_c_oacc_ell_sparse_mat), intent(in) :: a
+ complex(psb_spk_), intent(in) :: alpha, beta
+ class(psb_c_base_vect_type), intent(inout) :: x, y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_c_oacc_ell_vect_mv
+ end interface
+
+ interface
+ module subroutine psb_c_oacc_ell_inner_vect_sv(alpha, a, x, beta, y, info, trans)
+ class(psb_c_oacc_ell_sparse_mat), intent(in) :: a
+ complex(psb_spk_), intent(in) :: alpha, beta
+ class(psb_c_base_vect_type), intent(inout) :: x,y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_c_oacc_ell_inner_vect_sv
+ end interface
+
+ interface
+ module subroutine psb_c_oacc_ell_csmm(alpha, a, x, beta, y, info, trans)
+ class(psb_c_oacc_ell_sparse_mat), intent(in) :: a
+ complex(psb_spk_), intent(in) :: alpha, beta, x(:,:)
+ complex(psb_spk_), intent(inout) :: y(:,:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_c_oacc_ell_csmm
+ end interface
+
+ interface
+ module subroutine psb_c_oacc_ell_csmv(alpha, a, x, beta, y, info, trans)
+ class(psb_c_oacc_ell_sparse_mat), intent(in) :: a
+ complex(psb_spk_), intent(in) :: alpha, beta, x(:)
+ complex(psb_spk_), intent(inout) :: y(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_c_oacc_ell_csmv
+ end interface
+
+ interface
+ module subroutine psb_c_oacc_ell_scals(d, a, info)
+ class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
+ complex(psb_spk_), intent(in) :: d
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_c_oacc_ell_scals
+ end interface
+
+ interface
+ module subroutine psb_c_oacc_ell_scal(d,a,info,side)
+ class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
+ complex(psb_spk_), intent(in) :: d(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: side
+ end subroutine psb_c_oacc_ell_scal
+ end interface
+
+ interface
+ module subroutine psb_c_oacc_ell_reallocate_nz(nz,a)
+ class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_), intent(in) :: nz
+ end subroutine psb_c_oacc_ell_reallocate_nz
+ end interface
+
+ interface
+ module subroutine psb_c_oacc_ell_allocate_mnnz(m,n,a,nz)
+ class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_), intent(in) :: m,n
+ integer(psb_ipk_), intent(in), optional :: nz
+ end subroutine psb_c_oacc_ell_allocate_mnnz
+ end interface
+
+ interface
+ module subroutine psb_c_oacc_ell_cp_from_coo(a,b,info)
+ class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
+ class(psb_c_coo_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_c_oacc_ell_cp_from_coo
+ end interface
+
+contains
+
+ subroutine c_oacc_ell_free(a)
+ use psb_base_mod
+ implicit none
+ class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_) :: info
+
+ if (allocated(a%val)) then
+ !$acc exit data delete(a%val)
+ end if
+ if (allocated(a%ja)) then
+ !$acc exit data delete(a%ja)
+ end if
+ if (allocated(a%irn)) then
+ !$acc exit data delete(a%irn)
+ end if
+ if (allocated(a%idiag)) then
+ !$acc exit data delete(a%idiag)
+ end if
+
+ call a%psb_c_ell_sparse_mat%free()
+
+ return
+ end subroutine c_oacc_ell_free
+
+
+ function c_oacc_ell_sizeof(a) result(res)
+ implicit none
+ class(psb_c_oacc_ell_sparse_mat), intent(in) :: a
+ integer(psb_epk_) :: res
+
+ if (a%is_dev()) call a%sync()
+
+ res = 8
+ res = res + psb_sizeof_dp * size(a%val)
+ res = res + psb_sizeof_ip * size(a%ja)
+ res = res + psb_sizeof_ip * size(a%irn)
+ res = res + psb_sizeof_ip * size(a%idiag)
+
+ end function c_oacc_ell_sizeof
+
+ subroutine c_oacc_ell_sync_space(a)
+ implicit none
+ class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
+
+ if (allocated(a%val)) then
+ call c_oacc_create_dev(a%val)
+ end if
+ if (allocated(a%ja)) then
+ call i_oacc_create_dev(a%ja)
+ end if
+ if (allocated(a%irn)) then
+ call i_oacc_create_dev_scalar(a%irn)
+ end if
+ if (allocated(a%idiag)) then
+ call i_oacc_create_dev_scalar(a%idiag)
+ end if
+
+ contains
+ subroutine c_oacc_create_dev(v)
+ implicit none
+ complex(psb_spk_), intent(in) :: v(:,:)
+ !$acc enter data copyin(v)
+ end subroutine c_oacc_create_dev
+
+ subroutine i_oacc_create_dev(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:,:)
+ !$acc enter data copyin(v)
+ end subroutine i_oacc_create_dev
+
+ subroutine i_oacc_create_dev_scalar(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc enter data copyin(v)
+ end subroutine i_oacc_create_dev_scalar
+ end subroutine c_oacc_ell_sync_space
+
+
+
+ function c_oacc_ell_is_host(a) result(res)
+ implicit none
+ class(psb_c_oacc_ell_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_host)
+ end function c_oacc_ell_is_host
+
+ function c_oacc_ell_is_sync(a) result(res)
+ implicit none
+ class(psb_c_oacc_ell_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_sync)
+ end function c_oacc_ell_is_sync
+
+ function c_oacc_ell_is_dev(a) result(res)
+ implicit none
+ class(psb_c_oacc_ell_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_dev)
+ end function c_oacc_ell_is_dev
+
+ subroutine c_oacc_ell_set_host(a)
+ implicit none
+ class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_host
+ end subroutine c_oacc_ell_set_host
+
+ subroutine c_oacc_ell_set_sync(a)
+ implicit none
+ class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_sync
+ end subroutine c_oacc_ell_set_sync
+
+ subroutine c_oacc_ell_set_dev(a)
+ implicit none
+ class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_dev
+ end subroutine c_oacc_ell_set_dev
+
+ function c_oacc_ell_get_fmt() result(res)
+ implicit none
+ character(len=5) :: res
+ res = 'ELL_oacc'
+ end function c_oacc_ell_get_fmt
+
+ subroutine c_oacc_ell_sync(a)
+ implicit none
+ class(psb_c_oacc_ell_sparse_mat), target, intent(in) :: a
+ class(psb_c_oacc_ell_sparse_mat), pointer :: tmpa
+ integer(psb_ipk_) :: info
+
+ tmpa => a
+ if (a%is_dev()) then
+ call c_oacc_ell_to_host(a%val)
+ call i_oacc_ell_to_host(a%ja)
+ call i_oacc_ell_to_host_scalar(a%irn)
+ call i_oacc_ell_to_host_scalar(a%idiag)
+ else if (a%is_host()) then
+ call c_oacc_ell_to_dev(a%val)
+ call i_oacc_ell_to_dev(a%ja)
+ call i_oacc_ell_to_dev_scalar(a%irn)
+ call i_oacc_ell_to_dev_scalar(a%idiag)
+ end if
+ call tmpa%set_sync()
+ end subroutine c_oacc_ell_sync
+
+ subroutine c_oacc_ell_to_host(v)
+ implicit none
+ complex(psb_spk_), intent(in) :: v(:,:)
+ !$acc update self(v)
+ end subroutine c_oacc_ell_to_host
+
+ subroutine c_oacc_ell_to_host_scalar(v)
+ implicit none
+ complex(psb_spk_), intent(in) :: v(:)
+ !$acc update self(v)
+ end subroutine c_oacc_ell_to_host_scalar
+
+ subroutine i_oacc_ell_to_dev(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:,:)
+ !$acc update device(v)
+ end subroutine i_oacc_ell_to_dev
+
+ subroutine i_oacc_ell_to_dev_scalar(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc update device(v)
+ end subroutine i_oacc_ell_to_dev_scalar
+
+ subroutine i_oacc_ell_to_host(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:,:)
+ !$acc update self(v)
+ end subroutine i_oacc_ell_to_host
+
+ subroutine i_oacc_ell_to_host_scalar(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc update self(v)
+ end subroutine i_oacc_ell_to_host_scalar
+end module psb_c_oacc_ell_mat_mod
diff --git a/openacc/psb_c_oacc_hll_mat_mod.F90 b/openacc/psb_c_oacc_hll_mat_mod.F90
new file mode 100644
index 00000000..faad0a1b
--- /dev/null
+++ b/openacc/psb_c_oacc_hll_mat_mod.F90
@@ -0,0 +1,352 @@
+module psb_c_oacc_hll_mat_mod
+ use iso_c_binding
+ use psb_c_mat_mod
+ use psb_c_hll_mat_mod
+ use psb_c_oacc_vect_mod
+
+ integer(psb_ipk_), parameter, private :: is_host = -1
+ integer(psb_ipk_), parameter, private :: is_sync = 0
+ integer(psb_ipk_), parameter, private :: is_dev = 1
+
+ type, extends(psb_c_hll_sparse_mat) :: psb_c_oacc_hll_sparse_mat
+ integer(psb_ipk_) :: devstate = is_host
+ contains
+ procedure, nopass :: get_fmt => c_oacc_hll_get_fmt
+ procedure, pass(a) :: sizeof => c_oacc_hll_sizeof
+ procedure, pass(a) :: is_host => c_oacc_hll_is_host
+ procedure, pass(a) :: is_sync => c_oacc_hll_is_sync
+ procedure, pass(a) :: is_dev => c_oacc_hll_is_dev
+ procedure, pass(a) :: set_host => c_oacc_hll_set_host
+ procedure, pass(a) :: set_sync => c_oacc_hll_set_sync
+ procedure, pass(a) :: set_dev => c_oacc_hll_set_dev
+ procedure, pass(a) :: sync_space => c_oacc_hll_sync_space
+ procedure, pass(a) :: sync => c_oacc_hll_sync
+ procedure, pass(a) :: free => c_oacc_hll_free
+ procedure, pass(a) :: vect_mv => psb_c_oacc_hll_vect_mv
+ procedure, pass(a) :: in_vect_sv => psb_c_oacc_hll_inner_vect_sv
+ procedure, pass(a) :: csmm => psb_c_oacc_hll_csmm
+ procedure, pass(a) :: csmv => psb_c_oacc_hll_csmv
+ procedure, pass(a) :: scals => psb_c_oacc_hll_scals
+ procedure, pass(a) :: scalv => psb_c_oacc_hll_scal
+ procedure, pass(a) :: reallocate_nz => psb_c_oacc_hll_reallocate_nz
+ procedure, pass(a) :: allocate_mnnz => psb_c_oacc_hll_allocate_mnnz
+ procedure, pass(a) :: cp_from_coo => psb_c_oacc_hll_cp_from_coo
+ procedure, pass(a) :: cp_from_fmt => psb_c_oacc_hll_cp_from_fmt
+ procedure, pass(a) :: mv_from_coo => psb_c_oacc_hll_mv_from_coo
+ procedure, pass(a) :: mv_from_fmt => psb_c_oacc_hll_mv_from_fmt
+ procedure, pass(a) :: mold => psb_c_oacc_hll_mold
+
+ end type psb_c_oacc_hll_sparse_mat
+
+ interface
+ module subroutine psb_c_oacc_hll_mold(a,b,info)
+ class(psb_c_oacc_hll_sparse_mat), intent(in) :: a
+ class(psb_c_base_sparse_mat), intent(inout), allocatable :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_c_oacc_hll_mold
+ end interface
+
+ interface
+ module subroutine psb_c_oacc_hll_cp_from_fmt(a,b,info)
+ class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
+ class(psb_c_base_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_c_oacc_hll_cp_from_fmt
+ end interface
+
+ interface
+ module subroutine psb_c_oacc_hll_mv_from_coo(a,b,info)
+ class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
+ class(psb_c_coo_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_c_oacc_hll_mv_from_coo
+ end interface
+
+ interface
+ module subroutine psb_c_oacc_hll_mv_from_fmt(a,b,info)
+ class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
+ class(psb_c_base_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_c_oacc_hll_mv_from_fmt
+ end interface
+
+ interface
+ module subroutine psb_c_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans)
+ class(psb_c_oacc_hll_sparse_mat), intent(in) :: a
+ complex(psb_spk_), intent(in) :: alpha, beta
+ class(psb_c_base_vect_type), intent(inout) :: x, y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_c_oacc_hll_vect_mv
+ end interface
+
+ interface
+ module subroutine psb_c_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans)
+ class(psb_c_oacc_hll_sparse_mat), intent(in) :: a
+ complex(psb_spk_), intent(in) :: alpha, beta
+ class(psb_c_base_vect_type), intent(inout) :: x,y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_c_oacc_hll_inner_vect_sv
+ end interface
+
+ interface
+ module subroutine psb_c_oacc_hll_csmm(alpha, a, x, beta, y, info, trans)
+ class(psb_c_oacc_hll_sparse_mat), intent(in) :: a
+ complex(psb_spk_), intent(in) :: alpha, beta, x(:,:)
+ complex(psb_spk_), intent(inout) :: y(:,:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_c_oacc_hll_csmm
+ end interface
+
+ interface
+ module subroutine psb_c_oacc_hll_csmv(alpha, a, x, beta, y, info, trans)
+ class(psb_c_oacc_hll_sparse_mat), intent(in) :: a
+ complex(psb_spk_), intent(in) :: alpha, beta, x(:)
+ complex(psb_spk_), intent(inout) :: y(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_c_oacc_hll_csmv
+ end interface
+
+ interface
+ module subroutine psb_c_oacc_hll_scals(d, a, info)
+ class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
+ complex(psb_spk_), intent(in) :: d
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_c_oacc_hll_scals
+ end interface
+
+ interface
+ module subroutine psb_c_oacc_hll_scal(d,a,info,side)
+ class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
+ complex(psb_spk_), intent(in) :: d(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: side
+ end subroutine psb_c_oacc_hll_scal
+ end interface
+
+ interface
+ module subroutine psb_c_oacc_hll_reallocate_nz(nz,a)
+ class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_), intent(in) :: nz
+ end subroutine psb_c_oacc_hll_reallocate_nz
+ end interface
+
+ interface
+ module subroutine psb_c_oacc_hll_allocate_mnnz(m,n,a,nz)
+ class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_), intent(in) :: m,n
+ integer(psb_ipk_), intent(in), optional :: nz
+ end subroutine psb_c_oacc_hll_allocate_mnnz
+ end interface
+
+ interface
+ module subroutine psb_c_oacc_hll_cp_from_coo(a,b,info)
+ class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
+ class(psb_c_coo_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_c_oacc_hll_cp_from_coo
+ end interface
+
+contains
+
+ subroutine c_oacc_hll_free(a)
+ use psb_base_mod
+ implicit none
+ class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_) :: info
+
+ if (allocated(a%val)) then
+ !$acc exit data delete(a%val)
+ end if
+ if (allocated(a%ja)) then
+ !$acc exit data delete(a%ja)
+ end if
+ if (allocated(a%irn)) then
+ !$acc exit data delete(a%irn)
+ end if
+ if (allocated(a%idiag)) then
+ !$acc exit data delete(a%idiag)
+ end if
+ if (allocated(a%hkoffs)) then
+ !$acc exit data delete(a%hkoffs)
+ end if
+
+ call a%psb_c_hll_sparse_mat%free()
+
+ return
+ end subroutine c_oacc_hll_free
+
+ function c_oacc_hll_sizeof(a) result(res)
+ implicit none
+ class(psb_c_oacc_hll_sparse_mat), intent(in) :: a
+ integer(psb_epk_) :: res
+
+ if (a%is_dev()) call a%sync()
+
+ res = 8
+ res = res + psb_sizeof_dp * size(a%val)
+ res = res + psb_sizeof_ip * size(a%ja)
+ res = res + psb_sizeof_ip * size(a%irn)
+ res = res + psb_sizeof_ip * size(a%idiag)
+ res = res + psb_sizeof_ip * size(a%hkoffs)
+ end function c_oacc_hll_sizeof
+
+
+
+ function c_oacc_hll_is_host(a) result(res)
+ implicit none
+ class(psb_c_oacc_hll_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_host)
+ end function c_oacc_hll_is_host
+
+ function c_oacc_hll_is_sync(a) result(res)
+ implicit none
+ class(psb_c_oacc_hll_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_sync)
+ end function c_oacc_hll_is_sync
+
+ function c_oacc_hll_is_dev(a) result(res)
+ implicit none
+ class(psb_c_oacc_hll_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_dev)
+ end function c_oacc_hll_is_dev
+
+ subroutine c_oacc_hll_set_host(a)
+ implicit none
+ class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_host
+ end subroutine c_oacc_hll_set_host
+
+ subroutine c_oacc_hll_set_sync(a)
+ implicit none
+ class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_sync
+ end subroutine c_oacc_hll_set_sync
+
+ subroutine c_oacc_hll_set_dev(a)
+ implicit none
+ class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_dev
+ end subroutine c_oacc_hll_set_dev
+
+ function c_oacc_hll_get_fmt() result(res)
+ implicit none
+ character(len=5) :: res
+ res = 'HLL_oacc'
+ end function c_oacc_hll_get_fmt
+
+ subroutine c_oacc_hll_sync_space(a)
+ implicit none
+ class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
+
+ if (allocated(a%val)) then
+ call c_oacc_create_dev(a%val)
+ end if
+ if (allocated(a%ja)) then
+ call i_oacc_create_dev(a%ja)
+ end if
+ if (allocated(a%irn)) then
+ call i_oacc_create_dev_scalar(a%irn)
+ end if
+ if (allocated(a%idiag)) then
+ call i_oacc_create_dev_scalar(a%idiag)
+ end if
+ if (allocated(a%hkoffs)) then
+ call i_oacc_create_dev_scalar(a%hkoffs)
+ end if
+
+ contains
+ subroutine c_oacc_create_dev(v)
+ implicit none
+ complex(psb_spk_), intent(in) :: v(:)
+ !$acc enter data copyin(v)
+ end subroutine c_oacc_create_dev
+
+ subroutine i_oacc_create_dev(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc enter data copyin(v)
+ end subroutine i_oacc_create_dev
+
+ subroutine i_oacc_create_dev_scalar(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc enter data copyin(v)
+ end subroutine i_oacc_create_dev_scalar
+
+ end subroutine c_oacc_hll_sync_space
+
+
+ subroutine c_oacc_hll_sync(a)
+ implicit none
+ class(psb_c_oacc_hll_sparse_mat), target, intent(in) :: a
+ class(psb_c_oacc_hll_sparse_mat), pointer :: tmpa
+ integer(psb_ipk_) :: info
+
+ tmpa => a
+ if (a%is_dev()) then
+ call c_oacc_hll_to_host(a%val)
+ call i_oacc_hll_to_host(a%ja)
+ call i_oacc_hll_to_host_scalar(a%irn)
+ call i_oacc_hll_to_host_scalar(a%idiag)
+ call i_oacc_hll_to_host_scalar(a%hkoffs)
+ else if (a%is_host()) then
+ call c_oacc_hll_to_dev(a%val)
+ call i_oacc_hll_to_dev(a%ja)
+ call i_oacc_hll_to_dev_scalar(a%irn)
+ call i_oacc_hll_to_dev_scalar(a%idiag)
+ call i_oacc_hll_to_dev_scalar(a%hkoffs)
+ end if
+ call tmpa%set_sync()
+ end subroutine c_oacc_hll_sync
+
+ subroutine c_oacc_hll_to_host(v)
+ implicit none
+ complex(psb_spk_), intent(in) :: v(:)
+ !$acc update self(v)
+ end subroutine c_oacc_hll_to_host
+
+ subroutine c_oacc_hll_to_dev(v)
+ implicit none
+ complex(psb_spk_), intent(in) :: v(:)
+ !$acc update device(v)
+ end subroutine c_oacc_hll_to_dev
+
+ subroutine i_oacc_hll_to_host(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc update self(v)
+ end subroutine i_oacc_hll_to_host
+
+ subroutine i_oacc_hll_to_dev(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc update device(v)
+ end subroutine i_oacc_hll_to_dev
+
+ subroutine i_oacc_hll_to_host_scalar(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc update self(v)
+ end subroutine i_oacc_hll_to_host_scalar
+
+ subroutine i_oacc_hll_to_dev_scalar(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc update device(v)
+ end subroutine i_oacc_hll_to_dev_scalar
+
+
+end module psb_c_oacc_hll_mat_mod
diff --git a/openacc/psb_c_oacc_vect_mod.F90 b/openacc/psb_c_oacc_vect_mod.F90
index 6f9545ea..9225f159 100644
--- a/openacc/psb_c_oacc_vect_mod.F90
+++ b/openacc/psb_c_oacc_vect_mod.F90
@@ -42,6 +42,7 @@ module psb_c_oacc_vect_mod
procedure, pass(y) :: sctb_buf => c_oacc_sctb_buf
procedure, pass(x) :: get_size => c_oacc_get_size
+
procedure, pass(x) :: dot_v => c_oacc_vect_dot
procedure, pass(x) :: dot_a => c_oacc_dot_a
procedure, pass(y) :: axpby_v => c_oacc_axpby_v
@@ -70,7 +71,6 @@ module psb_c_oacc_vect_mod
end subroutine c_oacc_mlt_v
end interface
-
interface
subroutine c_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
import
@@ -83,7 +83,7 @@ module psb_c_oacc_vect_mod
character(len=1), intent(in), optional :: conjgx, conjgy
end subroutine c_oacc_mlt_v_2
end interface
-
+
contains
subroutine c_oacc_absval1(x)
@@ -432,7 +432,7 @@ contains
select type(ii => idx)
class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space(info)
+ if (ii%is_host()) call ii%sync_space()
if (y%is_host()) call y%sync_space()
!$acc parallel loop
@@ -459,7 +459,7 @@ contains
select type(ii => idx)
class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space(info)
+ if (ii%is_host()) call ii%sync_space()
class default
call psb_errpush(info, 'c_oacc_sctb_x')
return
@@ -475,8 +475,6 @@ contains
call y%set_dev()
end subroutine c_oacc_sctb_x
-
-
subroutine c_oacc_sctb(n, idx, x, beta, y)
use psb_base_mod
implicit none
@@ -498,7 +496,6 @@ contains
call y%set_host()
end subroutine c_oacc_sctb
-
subroutine c_oacc_gthzbuf(i, n, idx, x)
use psb_base_mod
implicit none
@@ -515,7 +512,7 @@ contains
select type(ii => idx)
class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space(info)
+ if (ii%is_host()) call ii%sync_space()
class default
call psb_errpush(info, 'c_oacc_gthzbuf')
return
@@ -542,7 +539,7 @@ contains
select type(ii => idx)
class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space(info)
+ if (ii%is_host()) call ii%sync_space()
class default
call psb_errpush(info, 'c_oacc_gthzv_x')
return
@@ -577,7 +574,7 @@ contains
select type(vval => val)
type is (psb_c_vect_oacc)
if (vval%is_host()) call vval%sync_space()
- if (virl%is_host()) call virl%sync_space(info)
+ if (virl%is_host()) call virl%sync_space()
if (x%is_host()) call x%sync_space()
!$acc parallel loop
do i = 1, n
@@ -591,7 +588,7 @@ contains
if (.not.done_oacc) then
select type(virl => irl)
type is (psb_i_vect_oacc)
- if (virl%is_dev()) call virl%sync_space(info)
+ if (virl%is_dev()) call virl%sync_space()
end select
select type(vval => val)
type is (psb_c_vect_oacc)
@@ -607,8 +604,6 @@ contains
end subroutine c_oacc_ins_v
-
-
subroutine c_oacc_ins_a(n, irl, val, dupl, x, info)
use psi_serial_mod
implicit none
@@ -628,8 +623,6 @@ contains
end subroutine c_oacc_ins_a
-
-
subroutine c_oacc_bld_mn(x, n)
use psb_base_mod
implicit none
@@ -668,7 +661,6 @@ contains
end subroutine c_oacc_bld_x
-
subroutine c_oacc_asb_m(n, x, info)
use psb_base_mod
implicit none
@@ -696,8 +688,6 @@ contains
end if
end subroutine c_oacc_asb_m
-
-
subroutine c_oacc_set_scal(x, val, first, last)
class(psb_c_vect_oacc), intent(inout) :: x
complex(psb_spk_), intent(in) :: val
@@ -718,8 +708,6 @@ contains
call x%set_dev()
end subroutine c_oacc_set_scal
-
-
subroutine c_oacc_zero(x)
use psi_serial_mod
implicit none
@@ -743,6 +731,7 @@ contains
end function c_oacc_get_fmt
+
function c_oacc_vect_dot(n, x, y) result(res)
implicit none
class(psb_c_vect_oacc), intent(inout) :: x
@@ -776,9 +765,6 @@ contains
end function c_oacc_vect_dot
-
-
-
function c_oacc_dot_a(n, x, y) result(res)
implicit none
class(psb_c_vect_oacc), intent(inout) :: x
@@ -910,7 +896,6 @@ contains
end if
end subroutine c_oacc_vect_all
-
subroutine c_oacc_vect_free(x, info)
implicit none
class(psb_c_vect_oacc), intent(inout) :: x
diff --git a/openacc/psb_d_oacc_ell_mat_mod.F90 b/openacc/psb_d_oacc_ell_mat_mod.F90
index 8c5946ba..042c0ff3 100644
--- a/openacc/psb_d_oacc_ell_mat_mod.F90
+++ b/openacc/psb_d_oacc_ell_mat_mod.F90
@@ -1,343 +1,341 @@
module psb_d_oacc_ell_mat_mod
- use iso_c_binding
- use psb_d_mat_mod
- use psb_d_ell_mat_mod
- use psb_d_oacc_vect_mod
-
- integer(psb_ipk_), parameter, private :: is_host = -1
- integer(psb_ipk_), parameter, private :: is_sync = 0
- integer(psb_ipk_), parameter, private :: is_dev = 1
-
- type, extends(psb_d_ell_sparse_mat) :: psb_d_oacc_ell_sparse_mat
- integer(psb_ipk_) :: devstate = is_host
- contains
- procedure, nopass :: get_fmt => d_oacc_ell_get_fmt
- procedure, pass(a) :: sizeof => d_oacc_ell_sizeof
- procedure, pass(a) :: is_host => d_oacc_ell_is_host
- procedure, pass(a) :: is_sync => d_oacc_ell_is_sync
- procedure, pass(a) :: is_dev => d_oacc_ell_is_dev
- procedure, pass(a) :: set_host => d_oacc_ell_set_host
- procedure, pass(a) :: set_sync => d_oacc_ell_set_sync
- procedure, pass(a) :: set_dev => d_oacc_ell_set_dev
- procedure, pass(a) :: sync_space => d_oacc_ell_sync_space
- procedure, pass(a) :: sync => d_oacc_ell_sync
- procedure, pass(a) :: free => d_oacc_ell_free
- procedure, pass(a) :: vect_mv => psb_d_oacc_ell_vect_mv
- procedure, pass(a) :: in_vect_sv => psb_d_oacc_ell_inner_vect_sv
- procedure, pass(a) :: csmm => psb_d_oacc_ell_csmm
- procedure, pass(a) :: csmv => psb_d_oacc_ell_csmv
- procedure, pass(a) :: scals => psb_d_oacc_ell_scals
- procedure, pass(a) :: scalv => psb_d_oacc_ell_scal
- procedure, pass(a) :: reallocate_nz => psb_d_oacc_ell_reallocate_nz
- procedure, pass(a) :: allocate_mnnz => psb_d_oacc_ell_allocate_mnnz
- procedure, pass(a) :: cp_from_coo => psb_d_oacc_ell_cp_from_coo
- procedure, pass(a) :: cp_from_fmt => psb_d_oacc_ell_cp_from_fmt
- procedure, pass(a) :: mv_from_coo => psb_d_oacc_ell_mv_from_coo
- procedure, pass(a) :: mv_from_fmt => psb_d_oacc_ell_mv_from_fmt
- procedure, pass(a) :: mold => psb_d_oacc_ell_mold
-
- end type psb_d_oacc_ell_sparse_mat
-
- interface
- module subroutine psb_d_oacc_ell_mold(a,b,info)
- class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
- class(psb_d_base_sparse_mat), intent(inout), allocatable :: b
- integer(psb_ipk_), intent(out) :: info
- end subroutine psb_d_oacc_ell_mold
- end interface
-
- interface
- module subroutine psb_d_oacc_ell_cp_from_fmt(a,b,info)
- class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
- class(psb_d_base_sparse_mat), intent(in) :: b
- integer(psb_ipk_), intent(out) :: info
- end subroutine psb_d_oacc_ell_cp_from_fmt
- end interface
-
- interface
- module subroutine psb_d_oacc_ell_mv_from_coo(a,b,info)
- class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
- class(psb_d_coo_sparse_mat), intent(inout) :: b
- integer(psb_ipk_), intent(out) :: info
- end subroutine psb_d_oacc_ell_mv_from_coo
- end interface
-
- interface
- module subroutine psb_d_oacc_ell_mv_from_fmt(a,b,info)
- class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
- class(psb_d_base_sparse_mat), intent(inout) :: b
- integer(psb_ipk_), intent(out) :: info
- end subroutine psb_d_oacc_ell_mv_from_fmt
- end interface
-
- interface
- module subroutine psb_d_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans)
- class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
- real(psb_dpk_), intent(in) :: alpha, beta
- class(psb_d_base_vect_type), intent(inout) :: x, y
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
- end subroutine psb_d_oacc_ell_vect_mv
- end interface
-
- interface
- module subroutine psb_d_oacc_ell_inner_vect_sv(alpha, a, x, beta, y, info, trans)
- class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
- real(psb_dpk_), intent(in) :: alpha, beta
- class(psb_d_base_vect_type), intent(inout) :: x,y
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
- end subroutine psb_d_oacc_ell_inner_vect_sv
- end interface
-
- interface
- module subroutine psb_d_oacc_ell_csmm(alpha, a, x, beta, y, info, trans)
- class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
- real(psb_dpk_), intent(in) :: alpha, beta, x(:,:)
- real(psb_dpk_), intent(inout) :: y(:,:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
- end subroutine psb_d_oacc_ell_csmm
- end interface
-
- interface
- module subroutine psb_d_oacc_ell_csmv(alpha, a, x, beta, y, info, trans)
- class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
- real(psb_dpk_), intent(in) :: alpha, beta, x(:)
- real(psb_dpk_), intent(inout) :: y(:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
- end subroutine psb_d_oacc_ell_csmv
- end interface
-
- interface
- module subroutine psb_d_oacc_ell_scals(d, a, info)
- class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
- real(psb_dpk_), intent(in) :: d
- integer(psb_ipk_), intent(out) :: info
- end subroutine psb_d_oacc_ell_scals
- end interface
-
- interface
- module subroutine psb_d_oacc_ell_scal(d,a,info,side)
- class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
- real(psb_dpk_), intent(in) :: d(:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: side
- end subroutine psb_d_oacc_ell_scal
- end interface
-
- interface
- module subroutine psb_d_oacc_ell_reallocate_nz(nz,a)
- class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
- integer(psb_ipk_), intent(in) :: nz
- end subroutine psb_d_oacc_ell_reallocate_nz
- end interface
-
- interface
- module subroutine psb_d_oacc_ell_allocate_mnnz(m,n,a,nz)
- class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
- integer(psb_ipk_), intent(in) :: m,n
- integer(psb_ipk_), intent(in), optional :: nz
- end subroutine psb_d_oacc_ell_allocate_mnnz
- end interface
-
- interface
- module subroutine psb_d_oacc_ell_cp_from_coo(a,b,info)
- class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
- class(psb_d_coo_sparse_mat), intent(in) :: b
- integer(psb_ipk_), intent(out) :: info
- end subroutine psb_d_oacc_ell_cp_from_coo
- end interface
-
- contains
-
- subroutine d_oacc_ell_free(a)
- use psb_base_mod
- implicit none
- class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
- integer(psb_ipk_) :: info
-
- if (allocated(a%val)) then
- !$acc exit data delete(a%val)
- end if
- if (allocated(a%ja)) then
- !$acc exit data delete(a%ja)
- end if
- if (allocated(a%irn)) then
- !$acc exit data delete(a%irn)
- end if
- if (allocated(a%idiag)) then
- !$acc exit data delete(a%idiag)
- end if
-
- call a%psb_d_ell_sparse_mat%free()
-
- return
- end subroutine d_oacc_ell_free
-
-
- function d_oacc_ell_sizeof(a) result(res)
- implicit none
- class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
- integer(psb_epk_) :: res
-
- if (a%is_dev()) call a%sync()
-
- res = 8
- res = res + psb_sizeof_dp * size(a%val)
- res = res + psb_sizeof_ip * size(a%ja)
- res = res + psb_sizeof_ip * size(a%irn)
- res = res + psb_sizeof_ip * size(a%idiag)
-
- end function d_oacc_ell_sizeof
-
- subroutine d_oacc_ell_sync_space(a)
- implicit none
- class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
-
- if (allocated(a%val)) then
- call d_oacc_create_dev(a%val)
- end if
- if (allocated(a%ja)) then
- call i_oacc_create_dev(a%ja)
- end if
- if (allocated(a%irn)) then
- call i_oacc_create_dev_scalar(a%irn)
- end if
- if (allocated(a%idiag)) then
- call i_oacc_create_dev_scalar(a%idiag)
- end if
-
- contains
- subroutine d_oacc_create_dev(v)
- implicit none
- real(psb_dpk_), intent(in) :: v(:,:)
- !$acc enter data copyin(v)
- end subroutine d_oacc_create_dev
-
- subroutine i_oacc_create_dev(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:,:)
- !$acc enter data copyin(v)
- end subroutine i_oacc_create_dev
-
- subroutine i_oacc_create_dev_scalar(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc enter data copyin(v)
- end subroutine i_oacc_create_dev_scalar
- end subroutine d_oacc_ell_sync_space
-
-
-
- function d_oacc_ell_is_host(a) result(res)
- implicit none
- class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
- logical :: res
-
- res = (a%devstate == is_host)
- end function d_oacc_ell_is_host
-
- function d_oacc_ell_is_sync(a) result(res)
- implicit none
- class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
- logical :: res
-
- res = (a%devstate == is_sync)
- end function d_oacc_ell_is_sync
-
- function d_oacc_ell_is_dev(a) result(res)
- implicit none
- class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
- logical :: res
-
- res = (a%devstate == is_dev)
- end function d_oacc_ell_is_dev
-
- subroutine d_oacc_ell_set_host(a)
- implicit none
- class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
-
- a%devstate = is_host
- end subroutine d_oacc_ell_set_host
-
- subroutine d_oacc_ell_set_sync(a)
- implicit none
- class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
-
- a%devstate = is_sync
- end subroutine d_oacc_ell_set_sync
-
- subroutine d_oacc_ell_set_dev(a)
- implicit none
- class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
-
- a%devstate = is_dev
- end subroutine d_oacc_ell_set_dev
-
- function d_oacc_ell_get_fmt() result(res)
- implicit none
- character(len=5) :: res
- res = 'ELL_oacc'
- end function d_oacc_ell_get_fmt
-
- subroutine d_oacc_ell_sync(a)
- implicit none
- class(psb_d_oacc_ell_sparse_mat), target, intent(in) :: a
- class(psb_d_oacc_ell_sparse_mat), pointer :: tmpa
- integer(psb_ipk_) :: info
-
- tmpa => a
- if (a%is_dev()) then
- call d_oacc_ell_to_host(a%val)
- call i_oacc_ell_to_host(a%ja)
- call i_oacc_ell_to_host_scalar(a%irn)
- call i_oacc_ell_to_host_scalar(a%idiag)
- else if (a%is_host()) then
- call d_oacc_ell_to_dev(a%val)
- call i_oacc_ell_to_dev(a%ja)
- call i_oacc_ell_to_dev_scalar(a%irn)
- call i_oacc_ell_to_dev_scalar(a%idiag)
- end if
- call tmpa%set_sync()
- end subroutine d_oacc_ell_sync
-
- subroutine d_oacc_ell_to_host(v)
- implicit none
- real(psb_dpk_), intent(in) :: v(:,:)
- !$acc update self(v)
- end subroutine d_oacc_ell_to_host
-
- subroutine d_oacc_ell_to_host_scalar(v)
- implicit none
- real(psb_dpk_), intent(in) :: v(:)
- !$acc update self(v)
- end subroutine d_oacc_ell_to_host_scalar
-
- subroutine i_oacc_ell_to_dev(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:,:)
- !$acc update device(v)
- end subroutine i_oacc_ell_to_dev
-
- subroutine i_oacc_ell_to_dev_scalar(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc update device(v)
- end subroutine i_oacc_ell_to_dev_scalar
-
- subroutine i_oacc_ell_to_host(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:,:)
- !$acc update self(v)
- end subroutine i_oacc_ell_to_host
-
- subroutine i_oacc_ell_to_host_scalar(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc update self(v)
- end subroutine i_oacc_ell_to_host_scalar
-
-
-end module psb_d_oacc_ell_mat_mod
\ No newline at end of file
+ use iso_c_binding
+ use psb_d_mat_mod
+ use psb_d_ell_mat_mod
+ use psb_d_oacc_vect_mod
+
+ integer(psb_ipk_), parameter, private :: is_host = -1
+ integer(psb_ipk_), parameter, private :: is_sync = 0
+ integer(psb_ipk_), parameter, private :: is_dev = 1
+
+ type, extends(psb_d_ell_sparse_mat) :: psb_d_oacc_ell_sparse_mat
+ integer(psb_ipk_) :: devstate = is_host
+ contains
+ procedure, nopass :: get_fmt => d_oacc_ell_get_fmt
+ procedure, pass(a) :: sizeof => d_oacc_ell_sizeof
+ procedure, pass(a) :: is_host => d_oacc_ell_is_host
+ procedure, pass(a) :: is_sync => d_oacc_ell_is_sync
+ procedure, pass(a) :: is_dev => d_oacc_ell_is_dev
+ procedure, pass(a) :: set_host => d_oacc_ell_set_host
+ procedure, pass(a) :: set_sync => d_oacc_ell_set_sync
+ procedure, pass(a) :: set_dev => d_oacc_ell_set_dev
+ procedure, pass(a) :: sync_space => d_oacc_ell_sync_space
+ procedure, pass(a) :: sync => d_oacc_ell_sync
+ procedure, pass(a) :: free => d_oacc_ell_free
+ procedure, pass(a) :: vect_mv => psb_d_oacc_ell_vect_mv
+ procedure, pass(a) :: in_vect_sv => psb_d_oacc_ell_inner_vect_sv
+ procedure, pass(a) :: csmm => psb_d_oacc_ell_csmm
+ procedure, pass(a) :: csmv => psb_d_oacc_ell_csmv
+ procedure, pass(a) :: scals => psb_d_oacc_ell_scals
+ procedure, pass(a) :: scalv => psb_d_oacc_ell_scal
+ procedure, pass(a) :: reallocate_nz => psb_d_oacc_ell_reallocate_nz
+ procedure, pass(a) :: allocate_mnnz => psb_d_oacc_ell_allocate_mnnz
+ procedure, pass(a) :: cp_from_coo => psb_d_oacc_ell_cp_from_coo
+ procedure, pass(a) :: cp_from_fmt => psb_d_oacc_ell_cp_from_fmt
+ procedure, pass(a) :: mv_from_coo => psb_d_oacc_ell_mv_from_coo
+ procedure, pass(a) :: mv_from_fmt => psb_d_oacc_ell_mv_from_fmt
+ procedure, pass(a) :: mold => psb_d_oacc_ell_mold
+
+ end type psb_d_oacc_ell_sparse_mat
+
+ interface
+ module subroutine psb_d_oacc_ell_mold(a,b,info)
+ class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
+ class(psb_d_base_sparse_mat), intent(inout), allocatable :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_d_oacc_ell_mold
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_ell_cp_from_fmt(a,b,info)
+ class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
+ class(psb_d_base_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_d_oacc_ell_cp_from_fmt
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_ell_mv_from_coo(a,b,info)
+ class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
+ class(psb_d_coo_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_d_oacc_ell_mv_from_coo
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_ell_mv_from_fmt(a,b,info)
+ class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
+ class(psb_d_base_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_d_oacc_ell_mv_from_fmt
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans)
+ class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
+ real(psb_dpk_), intent(in) :: alpha, beta
+ class(psb_d_base_vect_type), intent(inout) :: x, y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_d_oacc_ell_vect_mv
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_ell_inner_vect_sv(alpha, a, x, beta, y, info, trans)
+ class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
+ real(psb_dpk_), intent(in) :: alpha, beta
+ class(psb_d_base_vect_type), intent(inout) :: x,y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_d_oacc_ell_inner_vect_sv
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_ell_csmm(alpha, a, x, beta, y, info, trans)
+ class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
+ real(psb_dpk_), intent(in) :: alpha, beta, x(:,:)
+ real(psb_dpk_), intent(inout) :: y(:,:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_d_oacc_ell_csmm
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_ell_csmv(alpha, a, x, beta, y, info, trans)
+ class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
+ real(psb_dpk_), intent(in) :: alpha, beta, x(:)
+ real(psb_dpk_), intent(inout) :: y(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_d_oacc_ell_csmv
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_ell_scals(d, a, info)
+ class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
+ real(psb_dpk_), intent(in) :: d
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_d_oacc_ell_scals
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_ell_scal(d,a,info,side)
+ class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
+ real(psb_dpk_), intent(in) :: d(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: side
+ end subroutine psb_d_oacc_ell_scal
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_ell_reallocate_nz(nz,a)
+ class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_), intent(in) :: nz
+ end subroutine psb_d_oacc_ell_reallocate_nz
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_ell_allocate_mnnz(m,n,a,nz)
+ class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_), intent(in) :: m,n
+ integer(psb_ipk_), intent(in), optional :: nz
+ end subroutine psb_d_oacc_ell_allocate_mnnz
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_ell_cp_from_coo(a,b,info)
+ class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
+ class(psb_d_coo_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_d_oacc_ell_cp_from_coo
+ end interface
+
+contains
+
+ subroutine d_oacc_ell_free(a)
+ use psb_base_mod
+ implicit none
+ class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_) :: info
+
+ if (allocated(a%val)) then
+ !$acc exit data delete(a%val)
+ end if
+ if (allocated(a%ja)) then
+ !$acc exit data delete(a%ja)
+ end if
+ if (allocated(a%irn)) then
+ !$acc exit data delete(a%irn)
+ end if
+ if (allocated(a%idiag)) then
+ !$acc exit data delete(a%idiag)
+ end if
+
+ call a%psb_d_ell_sparse_mat%free()
+
+ return
+ end subroutine d_oacc_ell_free
+
+
+ function d_oacc_ell_sizeof(a) result(res)
+ implicit none
+ class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
+ integer(psb_epk_) :: res
+
+ if (a%is_dev()) call a%sync()
+
+ res = 8
+ res = res + psb_sizeof_dp * size(a%val)
+ res = res + psb_sizeof_ip * size(a%ja)
+ res = res + psb_sizeof_ip * size(a%irn)
+ res = res + psb_sizeof_ip * size(a%idiag)
+
+ end function d_oacc_ell_sizeof
+
+ subroutine d_oacc_ell_sync_space(a)
+ implicit none
+ class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
+
+ if (allocated(a%val)) then
+ call d_oacc_create_dev(a%val)
+ end if
+ if (allocated(a%ja)) then
+ call i_oacc_create_dev(a%ja)
+ end if
+ if (allocated(a%irn)) then
+ call i_oacc_create_dev_scalar(a%irn)
+ end if
+ if (allocated(a%idiag)) then
+ call i_oacc_create_dev_scalar(a%idiag)
+ end if
+
+ contains
+ subroutine d_oacc_create_dev(v)
+ implicit none
+ real(psb_dpk_), intent(in) :: v(:,:)
+ !$acc enter data copyin(v)
+ end subroutine d_oacc_create_dev
+
+ subroutine i_oacc_create_dev(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:,:)
+ !$acc enter data copyin(v)
+ end subroutine i_oacc_create_dev
+
+ subroutine i_oacc_create_dev_scalar(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc enter data copyin(v)
+ end subroutine i_oacc_create_dev_scalar
+ end subroutine d_oacc_ell_sync_space
+
+
+
+ function d_oacc_ell_is_host(a) result(res)
+ implicit none
+ class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_host)
+ end function d_oacc_ell_is_host
+
+ function d_oacc_ell_is_sync(a) result(res)
+ implicit none
+ class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_sync)
+ end function d_oacc_ell_is_sync
+
+ function d_oacc_ell_is_dev(a) result(res)
+ implicit none
+ class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_dev)
+ end function d_oacc_ell_is_dev
+
+ subroutine d_oacc_ell_set_host(a)
+ implicit none
+ class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_host
+ end subroutine d_oacc_ell_set_host
+
+ subroutine d_oacc_ell_set_sync(a)
+ implicit none
+ class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_sync
+ end subroutine d_oacc_ell_set_sync
+
+ subroutine d_oacc_ell_set_dev(a)
+ implicit none
+ class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_dev
+ end subroutine d_oacc_ell_set_dev
+
+ function d_oacc_ell_get_fmt() result(res)
+ implicit none
+ character(len=5) :: res
+ res = 'ELL_oacc'
+ end function d_oacc_ell_get_fmt
+
+ subroutine d_oacc_ell_sync(a)
+ implicit none
+ class(psb_d_oacc_ell_sparse_mat), target, intent(in) :: a
+ class(psb_d_oacc_ell_sparse_mat), pointer :: tmpa
+ integer(psb_ipk_) :: info
+
+ tmpa => a
+ if (a%is_dev()) then
+ call d_oacc_ell_to_host(a%val)
+ call i_oacc_ell_to_host(a%ja)
+ call i_oacc_ell_to_host_scalar(a%irn)
+ call i_oacc_ell_to_host_scalar(a%idiag)
+ else if (a%is_host()) then
+ call d_oacc_ell_to_dev(a%val)
+ call i_oacc_ell_to_dev(a%ja)
+ call i_oacc_ell_to_dev_scalar(a%irn)
+ call i_oacc_ell_to_dev_scalar(a%idiag)
+ end if
+ call tmpa%set_sync()
+ end subroutine d_oacc_ell_sync
+
+ subroutine d_oacc_ell_to_host(v)
+ implicit none
+ real(psb_dpk_), intent(in) :: v(:,:)
+ !$acc update self(v)
+ end subroutine d_oacc_ell_to_host
+
+ subroutine d_oacc_ell_to_host_scalar(v)
+ implicit none
+ real(psb_dpk_), intent(in) :: v(:)
+ !$acc update self(v)
+ end subroutine d_oacc_ell_to_host_scalar
+
+ subroutine i_oacc_ell_to_dev(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:,:)
+ !$acc update device(v)
+ end subroutine i_oacc_ell_to_dev
+
+ subroutine i_oacc_ell_to_dev_scalar(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc update device(v)
+ end subroutine i_oacc_ell_to_dev_scalar
+
+ subroutine i_oacc_ell_to_host(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:,:)
+ !$acc update self(v)
+ end subroutine i_oacc_ell_to_host
+
+ subroutine i_oacc_ell_to_host_scalar(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc update self(v)
+ end subroutine i_oacc_ell_to_host_scalar
+end module psb_d_oacc_ell_mat_mod
diff --git a/openacc/psb_d_oacc_hll_mat_mod.F90 b/openacc/psb_d_oacc_hll_mat_mod.F90
index 530af94a..b1c36a65 100644
--- a/openacc/psb_d_oacc_hll_mat_mod.F90
+++ b/openacc/psb_d_oacc_hll_mat_mod.F90
@@ -1,352 +1,352 @@
module psb_d_oacc_hll_mat_mod
- use iso_c_binding
- use psb_d_mat_mod
- use psb_d_hll_mat_mod
- use psb_d_oacc_vect_mod
-
- integer(psb_ipk_), parameter, private :: is_host = -1
- integer(psb_ipk_), parameter, private :: is_sync = 0
- integer(psb_ipk_), parameter, private :: is_dev = 1
-
- type, extends(psb_d_hll_sparse_mat) :: psb_d_oacc_hll_sparse_mat
- integer(psb_ipk_) :: devstate = is_host
- contains
- procedure, nopass :: get_fmt => d_oacc_hll_get_fmt
- procedure, pass(a) :: sizeof => d_oacc_hll_sizeof
- procedure, pass(a) :: is_host => d_oacc_hll_is_host
- procedure, pass(a) :: is_sync => d_oacc_hll_is_sync
- procedure, pass(a) :: is_dev => d_oacc_hll_is_dev
- procedure, pass(a) :: set_host => d_oacc_hll_set_host
- procedure, pass(a) :: set_sync => d_oacc_hll_set_sync
- procedure, pass(a) :: set_dev => d_oacc_hll_set_dev
- procedure, pass(a) :: sync_space => d_oacc_hll_sync_space
- procedure, pass(a) :: sync => d_oacc_hll_sync
- procedure, pass(a) :: free => d_oacc_hll_free
- procedure, pass(a) :: vect_mv => psb_d_oacc_hll_vect_mv
- procedure, pass(a) :: in_vect_sv => psb_d_oacc_hll_inner_vect_sv
- procedure, pass(a) :: csmm => psb_d_oacc_hll_csmm
- procedure, pass(a) :: csmv => psb_d_oacc_hll_csmv
- procedure, pass(a) :: scals => psb_d_oacc_hll_scals
- procedure, pass(a) :: scalv => psb_d_oacc_hll_scal
- procedure, pass(a) :: reallocate_nz => psb_d_oacc_hll_reallocate_nz
- procedure, pass(a) :: allocate_mnnz => psb_d_oacc_hll_allocate_mnnz
- procedure, pass(a) :: cp_from_coo => psb_d_oacc_hll_cp_from_coo
- procedure, pass(a) :: cp_from_fmt => psb_d_oacc_hll_cp_from_fmt
- procedure, pass(a) :: mv_from_coo => psb_d_oacc_hll_mv_from_coo
- procedure, pass(a) :: mv_from_fmt => psb_d_oacc_hll_mv_from_fmt
- procedure, pass(a) :: mold => psb_d_oacc_hll_mold
-
- end type psb_d_oacc_hll_sparse_mat
-
- interface
- module subroutine psb_d_oacc_hll_mold(a,b,info)
- class(psb_d_oacc_hll_sparse_mat), intent(in) :: a
- class(psb_d_base_sparse_mat), intent(inout), allocatable :: b
- integer(psb_ipk_), intent(out) :: info
- end subroutine psb_d_oacc_hll_mold
- end interface
-
- interface
- module subroutine psb_d_oacc_hll_cp_from_fmt(a,b,info)
- class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
- class(psb_d_base_sparse_mat), intent(in) :: b
- integer(psb_ipk_), intent(out) :: info
- end subroutine psb_d_oacc_hll_cp_from_fmt
- end interface
-
- interface
- module subroutine psb_d_oacc_hll_mv_from_coo(a,b,info)
- class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
- class(psb_d_coo_sparse_mat), intent(inout) :: b
- integer(psb_ipk_), intent(out) :: info
- end subroutine psb_d_oacc_hll_mv_from_coo
- end interface
-
- interface
- module subroutine psb_d_oacc_hll_mv_from_fmt(a,b,info)
- class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
- class(psb_d_base_sparse_mat), intent(inout) :: b
- integer(psb_ipk_), intent(out) :: info
- end subroutine psb_d_oacc_hll_mv_from_fmt
- end interface
-
- interface
- module subroutine psb_d_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans)
- class(psb_d_oacc_hll_sparse_mat), intent(in) :: a
- real(psb_dpk_), intent(in) :: alpha, beta
- class(psb_d_base_vect_type), intent(inout) :: x, y
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
- end subroutine psb_d_oacc_hll_vect_mv
- end interface
-
- interface
- module subroutine psb_d_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans)
- class(psb_d_oacc_hll_sparse_mat), intent(in) :: a
- real(psb_dpk_), intent(in) :: alpha, beta
- class(psb_d_base_vect_type), intent(inout) :: x,y
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
- end subroutine psb_d_oacc_hll_inner_vect_sv
- end interface
-
- interface
- module subroutine psb_d_oacc_hll_csmm(alpha, a, x, beta, y, info, trans)
- class(psb_d_oacc_hll_sparse_mat), intent(in) :: a
- real(psb_dpk_), intent(in) :: alpha, beta, x(:,:)
- real(psb_dpk_), intent(inout) :: y(:,:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
- end subroutine psb_d_oacc_hll_csmm
- end interface
-
- interface
- module subroutine psb_d_oacc_hll_csmv(alpha, a, x, beta, y, info, trans)
- class(psb_d_oacc_hll_sparse_mat), intent(in) :: a
- real(psb_dpk_), intent(in) :: alpha, beta, x(:)
- real(psb_dpk_), intent(inout) :: y(:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
- end subroutine psb_d_oacc_hll_csmv
- end interface
-
- interface
- module subroutine psb_d_oacc_hll_scals(d, a, info)
- class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
- real(psb_dpk_), intent(in) :: d
- integer(psb_ipk_), intent(out) :: info
- end subroutine psb_d_oacc_hll_scals
- end interface
-
- interface
- module subroutine psb_d_oacc_hll_scal(d,a,info,side)
- class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
- real(psb_dpk_), intent(in) :: d(:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: side
- end subroutine psb_d_oacc_hll_scal
- end interface
-
- interface
- module subroutine psb_d_oacc_hll_reallocate_nz(nz,a)
- class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
- integer(psb_ipk_), intent(in) :: nz
- end subroutine psb_d_oacc_hll_reallocate_nz
- end interface
-
- interface
- module subroutine psb_d_oacc_hll_allocate_mnnz(m,n,a,nz)
- class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
- integer(psb_ipk_), intent(in) :: m,n
- integer(psb_ipk_), intent(in), optional :: nz
- end subroutine psb_d_oacc_hll_allocate_mnnz
- end interface
-
- interface
- module subroutine psb_d_oacc_hll_cp_from_coo(a,b,info)
- class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
- class(psb_d_coo_sparse_mat), intent(in) :: b
- integer(psb_ipk_), intent(out) :: info
- end subroutine psb_d_oacc_hll_cp_from_coo
- end interface
-
- contains
-
- subroutine d_oacc_hll_free(a)
- use psb_base_mod
- implicit none
- class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
- integer(psb_ipk_) :: info
-
- if (allocated(a%val)) then
- !$acc exit data delete(a%val)
- end if
- if (allocated(a%ja)) then
- !$acc exit data delete(a%ja)
- end if
- if (allocated(a%irn)) then
- !$acc exit data delete(a%irn)
- end if
- if (allocated(a%idiag)) then
- !$acc exit data delete(a%idiag)
- end if
- if (allocated(a%hkoffs)) then
- !$acc exit data delete(a%hkoffs)
- end if
-
- call a%psb_d_hll_sparse_mat%free()
-
- return
- end subroutine d_oacc_hll_free
-
- function d_oacc_hll_sizeof(a) result(res)
- implicit none
- class(psb_d_oacc_hll_sparse_mat), intent(in) :: a
- integer(psb_epk_) :: res
-
- if (a%is_dev()) call a%sync()
-
- res = 8
- res = res + psb_sizeof_dp * size(a%val)
- res = res + psb_sizeof_ip * size(a%ja)
- res = res + psb_sizeof_ip * size(a%irn)
- res = res + psb_sizeof_ip * size(a%idiag)
- res = res + psb_sizeof_ip * size(a%hkoffs)
- end function d_oacc_hll_sizeof
-
-
-
- function d_oacc_hll_is_host(a) result(res)
- implicit none
- class(psb_d_oacc_hll_sparse_mat), intent(in) :: a
- logical :: res
-
- res = (a%devstate == is_host)
- end function d_oacc_hll_is_host
-
- function d_oacc_hll_is_sync(a) result(res)
- implicit none
- class(psb_d_oacc_hll_sparse_mat), intent(in) :: a
- logical :: res
-
- res = (a%devstate == is_sync)
- end function d_oacc_hll_is_sync
-
- function d_oacc_hll_is_dev(a) result(res)
- implicit none
- class(psb_d_oacc_hll_sparse_mat), intent(in) :: a
- logical :: res
-
- res = (a%devstate == is_dev)
- end function d_oacc_hll_is_dev
-
- subroutine d_oacc_hll_set_host(a)
- implicit none
- class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
-
- a%devstate = is_host
- end subroutine d_oacc_hll_set_host
-
- subroutine d_oacc_hll_set_sync(a)
- implicit none
- class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
-
- a%devstate = is_sync
- end subroutine d_oacc_hll_set_sync
-
- subroutine d_oacc_hll_set_dev(a)
- implicit none
- class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
-
- a%devstate = is_dev
- end subroutine d_oacc_hll_set_dev
-
- function d_oacc_hll_get_fmt() result(res)
- implicit none
- character(len=5) :: res
- res = 'HLL_oacc'
- end function d_oacc_hll_get_fmt
-
- subroutine d_oacc_hll_sync_space(a)
- implicit none
- class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
-
- if (allocated(a%val)) then
- call d_oacc_create_dev(a%val)
- end if
- if (allocated(a%ja)) then
- call i_oacc_create_dev(a%ja)
- end if
- if (allocated(a%irn)) then
- call i_oacc_create_dev_scalar(a%irn)
- end if
- if (allocated(a%idiag)) then
- call i_oacc_create_dev_scalar(a%idiag)
- end if
- if (allocated(a%hkoffs)) then
- call i_oacc_create_dev_scalar(a%hkoffs)
- end if
-
- contains
- subroutine d_oacc_create_dev(v)
- implicit none
- real(psb_dpk_), intent(in) :: v(:)
- !$acc enter data copyin(v)
- end subroutine d_oacc_create_dev
-
- subroutine i_oacc_create_dev(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc enter data copyin(v)
- end subroutine i_oacc_create_dev
-
- subroutine i_oacc_create_dev_scalar(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc enter data copyin(v)
- end subroutine i_oacc_create_dev_scalar
-
- end subroutine d_oacc_hll_sync_space
-
-
- subroutine d_oacc_hll_sync(a)
- implicit none
- class(psb_d_oacc_hll_sparse_mat), target, intent(in) :: a
- class(psb_d_oacc_hll_sparse_mat), pointer :: tmpa
- integer(psb_ipk_) :: info
-
- tmpa => a
- if (a%is_dev()) then
- call d_oacc_hll_to_host(a%val)
- call i_oacc_hll_to_host(a%ja)
- call i_oacc_hll_to_host_scalar(a%irn)
- call i_oacc_hll_to_host_scalar(a%idiag)
- call i_oacc_hll_to_host_scalar(a%hkoffs)
- else if (a%is_host()) then
- call d_oacc_hll_to_dev(a%val)
- call i_oacc_hll_to_dev(a%ja)
- call i_oacc_hll_to_dev_scalar(a%irn)
- call i_oacc_hll_to_dev_scalar(a%idiag)
- call i_oacc_hll_to_dev_scalar(a%hkoffs)
- end if
- call tmpa%set_sync()
- end subroutine d_oacc_hll_sync
-
- subroutine d_oacc_hll_to_host(v)
- implicit none
- real(psb_dpk_), intent(in) :: v(:)
- !$acc update self(v)
- end subroutine d_oacc_hll_to_host
-
- subroutine d_oacc_hll_to_dev(v)
- implicit none
- real(psb_dpk_), intent(in) :: v(:)
- !$acc update device(v)
- end subroutine d_oacc_hll_to_dev
-
- subroutine i_oacc_hll_to_host(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc update self(v)
- end subroutine i_oacc_hll_to_host
-
- subroutine i_oacc_hll_to_dev(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc update device(v)
- end subroutine i_oacc_hll_to_dev
-
- subroutine i_oacc_hll_to_host_scalar(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc update self(v)
- end subroutine i_oacc_hll_to_host_scalar
-
- subroutine i_oacc_hll_to_dev_scalar(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc update device(v)
- end subroutine i_oacc_hll_to_dev_scalar
-
-
-end module psb_d_oacc_hll_mat_mod
\ No newline at end of file
+ use iso_c_binding
+ use psb_d_mat_mod
+ use psb_d_hll_mat_mod
+ use psb_d_oacc_vect_mod
+
+ integer(psb_ipk_), parameter, private :: is_host = -1
+ integer(psb_ipk_), parameter, private :: is_sync = 0
+ integer(psb_ipk_), parameter, private :: is_dev = 1
+
+ type, extends(psb_d_hll_sparse_mat) :: psb_d_oacc_hll_sparse_mat
+ integer(psb_ipk_) :: devstate = is_host
+ contains
+ procedure, nopass :: get_fmt => d_oacc_hll_get_fmt
+ procedure, pass(a) :: sizeof => d_oacc_hll_sizeof
+ procedure, pass(a) :: is_host => d_oacc_hll_is_host
+ procedure, pass(a) :: is_sync => d_oacc_hll_is_sync
+ procedure, pass(a) :: is_dev => d_oacc_hll_is_dev
+ procedure, pass(a) :: set_host => d_oacc_hll_set_host
+ procedure, pass(a) :: set_sync => d_oacc_hll_set_sync
+ procedure, pass(a) :: set_dev => d_oacc_hll_set_dev
+ procedure, pass(a) :: sync_space => d_oacc_hll_sync_space
+ procedure, pass(a) :: sync => d_oacc_hll_sync
+ procedure, pass(a) :: free => d_oacc_hll_free
+ procedure, pass(a) :: vect_mv => psb_d_oacc_hll_vect_mv
+ procedure, pass(a) :: in_vect_sv => psb_d_oacc_hll_inner_vect_sv
+ procedure, pass(a) :: csmm => psb_d_oacc_hll_csmm
+ procedure, pass(a) :: csmv => psb_d_oacc_hll_csmv
+ procedure, pass(a) :: scals => psb_d_oacc_hll_scals
+ procedure, pass(a) :: scalv => psb_d_oacc_hll_scal
+ procedure, pass(a) :: reallocate_nz => psb_d_oacc_hll_reallocate_nz
+ procedure, pass(a) :: allocate_mnnz => psb_d_oacc_hll_allocate_mnnz
+ procedure, pass(a) :: cp_from_coo => psb_d_oacc_hll_cp_from_coo
+ procedure, pass(a) :: cp_from_fmt => psb_d_oacc_hll_cp_from_fmt
+ procedure, pass(a) :: mv_from_coo => psb_d_oacc_hll_mv_from_coo
+ procedure, pass(a) :: mv_from_fmt => psb_d_oacc_hll_mv_from_fmt
+ procedure, pass(a) :: mold => psb_d_oacc_hll_mold
+
+ end type psb_d_oacc_hll_sparse_mat
+
+ interface
+ module subroutine psb_d_oacc_hll_mold(a,b,info)
+ class(psb_d_oacc_hll_sparse_mat), intent(in) :: a
+ class(psb_d_base_sparse_mat), intent(inout), allocatable :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_d_oacc_hll_mold
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_hll_cp_from_fmt(a,b,info)
+ class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
+ class(psb_d_base_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_d_oacc_hll_cp_from_fmt
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_hll_mv_from_coo(a,b,info)
+ class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
+ class(psb_d_coo_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_d_oacc_hll_mv_from_coo
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_hll_mv_from_fmt(a,b,info)
+ class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
+ class(psb_d_base_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_d_oacc_hll_mv_from_fmt
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans)
+ class(psb_d_oacc_hll_sparse_mat), intent(in) :: a
+ real(psb_dpk_), intent(in) :: alpha, beta
+ class(psb_d_base_vect_type), intent(inout) :: x, y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_d_oacc_hll_vect_mv
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans)
+ class(psb_d_oacc_hll_sparse_mat), intent(in) :: a
+ real(psb_dpk_), intent(in) :: alpha, beta
+ class(psb_d_base_vect_type), intent(inout) :: x,y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_d_oacc_hll_inner_vect_sv
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_hll_csmm(alpha, a, x, beta, y, info, trans)
+ class(psb_d_oacc_hll_sparse_mat), intent(in) :: a
+ real(psb_dpk_), intent(in) :: alpha, beta, x(:,:)
+ real(psb_dpk_), intent(inout) :: y(:,:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_d_oacc_hll_csmm
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_hll_csmv(alpha, a, x, beta, y, info, trans)
+ class(psb_d_oacc_hll_sparse_mat), intent(in) :: a
+ real(psb_dpk_), intent(in) :: alpha, beta, x(:)
+ real(psb_dpk_), intent(inout) :: y(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_d_oacc_hll_csmv
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_hll_scals(d, a, info)
+ class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
+ real(psb_dpk_), intent(in) :: d
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_d_oacc_hll_scals
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_hll_scal(d,a,info,side)
+ class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
+ real(psb_dpk_), intent(in) :: d(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: side
+ end subroutine psb_d_oacc_hll_scal
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_hll_reallocate_nz(nz,a)
+ class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_), intent(in) :: nz
+ end subroutine psb_d_oacc_hll_reallocate_nz
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_hll_allocate_mnnz(m,n,a,nz)
+ class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_), intent(in) :: m,n
+ integer(psb_ipk_), intent(in), optional :: nz
+ end subroutine psb_d_oacc_hll_allocate_mnnz
+ end interface
+
+ interface
+ module subroutine psb_d_oacc_hll_cp_from_coo(a,b,info)
+ class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
+ class(psb_d_coo_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_d_oacc_hll_cp_from_coo
+ end interface
+
+contains
+
+ subroutine d_oacc_hll_free(a)
+ use psb_base_mod
+ implicit none
+ class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_) :: info
+
+ if (allocated(a%val)) then
+ !$acc exit data delete(a%val)
+ end if
+ if (allocated(a%ja)) then
+ !$acc exit data delete(a%ja)
+ end if
+ if (allocated(a%irn)) then
+ !$acc exit data delete(a%irn)
+ end if
+ if (allocated(a%idiag)) then
+ !$acc exit data delete(a%idiag)
+ end if
+ if (allocated(a%hkoffs)) then
+ !$acc exit data delete(a%hkoffs)
+ end if
+
+ call a%psb_d_hll_sparse_mat%free()
+
+ return
+ end subroutine d_oacc_hll_free
+
+ function d_oacc_hll_sizeof(a) result(res)
+ implicit none
+ class(psb_d_oacc_hll_sparse_mat), intent(in) :: a
+ integer(psb_epk_) :: res
+
+ if (a%is_dev()) call a%sync()
+
+ res = 8
+ res = res + psb_sizeof_dp * size(a%val)
+ res = res + psb_sizeof_ip * size(a%ja)
+ res = res + psb_sizeof_ip * size(a%irn)
+ res = res + psb_sizeof_ip * size(a%idiag)
+ res = res + psb_sizeof_ip * size(a%hkoffs)
+ end function d_oacc_hll_sizeof
+
+
+
+ function d_oacc_hll_is_host(a) result(res)
+ implicit none
+ class(psb_d_oacc_hll_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_host)
+ end function d_oacc_hll_is_host
+
+ function d_oacc_hll_is_sync(a) result(res)
+ implicit none
+ class(psb_d_oacc_hll_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_sync)
+ end function d_oacc_hll_is_sync
+
+ function d_oacc_hll_is_dev(a) result(res)
+ implicit none
+ class(psb_d_oacc_hll_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_dev)
+ end function d_oacc_hll_is_dev
+
+ subroutine d_oacc_hll_set_host(a)
+ implicit none
+ class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_host
+ end subroutine d_oacc_hll_set_host
+
+ subroutine d_oacc_hll_set_sync(a)
+ implicit none
+ class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_sync
+ end subroutine d_oacc_hll_set_sync
+
+ subroutine d_oacc_hll_set_dev(a)
+ implicit none
+ class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_dev
+ end subroutine d_oacc_hll_set_dev
+
+ function d_oacc_hll_get_fmt() result(res)
+ implicit none
+ character(len=5) :: res
+ res = 'HLL_oacc'
+ end function d_oacc_hll_get_fmt
+
+ subroutine d_oacc_hll_sync_space(a)
+ implicit none
+ class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
+
+ if (allocated(a%val)) then
+ call d_oacc_create_dev(a%val)
+ end if
+ if (allocated(a%ja)) then
+ call i_oacc_create_dev(a%ja)
+ end if
+ if (allocated(a%irn)) then
+ call i_oacc_create_dev_scalar(a%irn)
+ end if
+ if (allocated(a%idiag)) then
+ call i_oacc_create_dev_scalar(a%idiag)
+ end if
+ if (allocated(a%hkoffs)) then
+ call i_oacc_create_dev_scalar(a%hkoffs)
+ end if
+
+ contains
+ subroutine d_oacc_create_dev(v)
+ implicit none
+ real(psb_dpk_), intent(in) :: v(:)
+ !$acc enter data copyin(v)
+ end subroutine d_oacc_create_dev
+
+ subroutine i_oacc_create_dev(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc enter data copyin(v)
+ end subroutine i_oacc_create_dev
+
+ subroutine i_oacc_create_dev_scalar(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc enter data copyin(v)
+ end subroutine i_oacc_create_dev_scalar
+
+ end subroutine d_oacc_hll_sync_space
+
+
+ subroutine d_oacc_hll_sync(a)
+ implicit none
+ class(psb_d_oacc_hll_sparse_mat), target, intent(in) :: a
+ class(psb_d_oacc_hll_sparse_mat), pointer :: tmpa
+ integer(psb_ipk_) :: info
+
+ tmpa => a
+ if (a%is_dev()) then
+ call d_oacc_hll_to_host(a%val)
+ call i_oacc_hll_to_host(a%ja)
+ call i_oacc_hll_to_host_scalar(a%irn)
+ call i_oacc_hll_to_host_scalar(a%idiag)
+ call i_oacc_hll_to_host_scalar(a%hkoffs)
+ else if (a%is_host()) then
+ call d_oacc_hll_to_dev(a%val)
+ call i_oacc_hll_to_dev(a%ja)
+ call i_oacc_hll_to_dev_scalar(a%irn)
+ call i_oacc_hll_to_dev_scalar(a%idiag)
+ call i_oacc_hll_to_dev_scalar(a%hkoffs)
+ end if
+ call tmpa%set_sync()
+ end subroutine d_oacc_hll_sync
+
+ subroutine d_oacc_hll_to_host(v)
+ implicit none
+ real(psb_dpk_), intent(in) :: v(:)
+ !$acc update self(v)
+ end subroutine d_oacc_hll_to_host
+
+ subroutine d_oacc_hll_to_dev(v)
+ implicit none
+ real(psb_dpk_), intent(in) :: v(:)
+ !$acc update device(v)
+ end subroutine d_oacc_hll_to_dev
+
+ subroutine i_oacc_hll_to_host(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc update self(v)
+ end subroutine i_oacc_hll_to_host
+
+ subroutine i_oacc_hll_to_dev(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc update device(v)
+ end subroutine i_oacc_hll_to_dev
+
+ subroutine i_oacc_hll_to_host_scalar(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc update self(v)
+ end subroutine i_oacc_hll_to_host_scalar
+
+ subroutine i_oacc_hll_to_dev_scalar(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc update device(v)
+ end subroutine i_oacc_hll_to_dev_scalar
+
+
+end module psb_d_oacc_hll_mat_mod
diff --git a/openacc/psb_d_oacc_vect_mod.F90 b/openacc/psb_d_oacc_vect_mod.F90
index 7d51766d..0dff0f27 100644
--- a/openacc/psb_d_oacc_vect_mod.F90
+++ b/openacc/psb_d_oacc_vect_mod.F90
@@ -42,6 +42,7 @@ module psb_d_oacc_vect_mod
procedure, pass(y) :: sctb_buf => d_oacc_sctb_buf
procedure, pass(x) :: get_size => d_oacc_get_size
+
procedure, pass(x) :: dot_v => d_oacc_vect_dot
procedure, pass(x) :: dot_a => d_oacc_dot_a
procedure, pass(y) :: axpby_v => d_oacc_axpby_v
@@ -70,7 +71,6 @@ module psb_d_oacc_vect_mod
end subroutine d_oacc_mlt_v
end interface
-
interface
subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
import
@@ -83,7 +83,7 @@ module psb_d_oacc_vect_mod
character(len=1), intent(in), optional :: conjgx, conjgy
end subroutine d_oacc_mlt_v_2
end interface
-
+
contains
subroutine d_oacc_absval1(x)
@@ -432,7 +432,7 @@ contains
select type(ii => idx)
class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space(info)
+ if (ii%is_host()) call ii%sync_space()
if (y%is_host()) call y%sync_space()
!$acc parallel loop
@@ -459,7 +459,7 @@ contains
select type(ii => idx)
class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space(info)
+ if (ii%is_host()) call ii%sync_space()
class default
call psb_errpush(info, 'd_oacc_sctb_x')
return
@@ -475,8 +475,6 @@ contains
call y%set_dev()
end subroutine d_oacc_sctb_x
-
-
subroutine d_oacc_sctb(n, idx, x, beta, y)
use psb_base_mod
implicit none
@@ -498,7 +496,6 @@ contains
call y%set_host()
end subroutine d_oacc_sctb
-
subroutine d_oacc_gthzbuf(i, n, idx, x)
use psb_base_mod
implicit none
@@ -515,7 +512,7 @@ contains
select type(ii => idx)
class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space(info)
+ if (ii%is_host()) call ii%sync_space()
class default
call psb_errpush(info, 'd_oacc_gthzbuf')
return
@@ -542,7 +539,7 @@ contains
select type(ii => idx)
class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space(info)
+ if (ii%is_host()) call ii%sync_space()
class default
call psb_errpush(info, 'd_oacc_gthzv_x')
return
@@ -577,7 +574,7 @@ contains
select type(vval => val)
type is (psb_d_vect_oacc)
if (vval%is_host()) call vval%sync_space()
- if (virl%is_host()) call virl%sync_space(info)
+ if (virl%is_host()) call virl%sync_space()
if (x%is_host()) call x%sync_space()
!$acc parallel loop
do i = 1, n
@@ -591,7 +588,7 @@ contains
if (.not.done_oacc) then
select type(virl => irl)
type is (psb_i_vect_oacc)
- if (virl%is_dev()) call virl%sync_space(info)
+ if (virl%is_dev()) call virl%sync_space()
end select
select type(vval => val)
type is (psb_d_vect_oacc)
@@ -607,8 +604,6 @@ contains
end subroutine d_oacc_ins_v
-
-
subroutine d_oacc_ins_a(n, irl, val, dupl, x, info)
use psi_serial_mod
implicit none
@@ -628,8 +623,6 @@ contains
end subroutine d_oacc_ins_a
-
-
subroutine d_oacc_bld_mn(x, n)
use psb_base_mod
implicit none
@@ -668,7 +661,6 @@ contains
end subroutine d_oacc_bld_x
-
subroutine d_oacc_asb_m(n, x, info)
use psb_base_mod
implicit none
@@ -696,8 +688,6 @@ contains
end if
end subroutine d_oacc_asb_m
-
-
subroutine d_oacc_set_scal(x, val, first, last)
class(psb_d_vect_oacc), intent(inout) :: x
real(psb_dpk_), intent(in) :: val
@@ -718,8 +708,6 @@ contains
call x%set_dev()
end subroutine d_oacc_set_scal
-
-
subroutine d_oacc_zero(x)
use psi_serial_mod
implicit none
@@ -743,6 +731,7 @@ contains
end function d_oacc_get_fmt
+
function d_oacc_vect_dot(n, x, y) result(res)
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
@@ -776,9 +765,6 @@ contains
end function d_oacc_vect_dot
-
-
-
function d_oacc_dot_a(n, x, y) result(res)
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
@@ -910,7 +896,6 @@ contains
end if
end subroutine d_oacc_vect_all
-
subroutine d_oacc_vect_free(x, info)
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
diff --git a/openacc/psb_i_oacc_vect_mod.F90 b/openacc/psb_i_oacc_vect_mod.F90
index 70fc325e..72e9ada2 100644
--- a/openacc/psb_i_oacc_vect_mod.F90
+++ b/openacc/psb_i_oacc_vect_mod.F90
@@ -1,455 +1,505 @@
module psb_i_oacc_vect_mod
- use iso_c_binding
- use psb_const_mod
- use psb_error_mod
- use psb_i_vect_mod
-
- integer(psb_ipk_), parameter, private :: is_host = -1
- integer(psb_ipk_), parameter, private :: is_sync = 0
- integer(psb_ipk_), parameter, private :: is_dev = 1
-
- type, extends(psb_i_base_vect_type) :: psb_i_vect_oacc
- integer :: state = is_host
- contains
- procedure, pass(x) :: get_nrows => i_oacc_get_nrows
- procedure, nopass :: get_fmt => i_oacc_get_fmt
-
- procedure, pass(x) :: all => i_oacc_all
- procedure, pass(x) :: zero => i_oacc_zero
- procedure, pass(x) :: asb_m => i_oacc_asb_m
- procedure, pass(x) :: sync => i_oacc_sync
- procedure, pass(x) :: sync_space => i_oacc_sync_space
- procedure, pass(x) :: bld_x => i_oacc_bld_x
- procedure, pass(x) :: bld_mn => i_oacc_bld_mn
- procedure, pass(x) :: free => i_oacc_free
- procedure, pass(x) :: ins_a => i_oacc_ins_a
- procedure, pass(x) :: ins_v => i_oacc_ins_v
- procedure, pass(x) :: is_host => i_oacc_is_host
- procedure, pass(x) :: is_dev => i_oacc_is_dev
- procedure, pass(x) :: is_sync => i_oacc_is_sync
- procedure, pass(x) :: set_host => i_oacc_set_host
- procedure, pass(x) :: set_dev => i_oacc_set_dev
- procedure, pass(x) :: set_sync => i_oacc_set_sync
- procedure, pass(x) :: set_scal => i_oacc_set_scal
- procedure, pass(x) :: gthzv_x => i_oacc_gthzv_x
- procedure, pass(y) :: sctb => i_oacc_sctb
- procedure, pass(y) :: sctb_x => i_oacc_sctb_x
- procedure, pass(x) :: gthzbuf => i_oacc_gthzbuf
- procedure, pass(y) :: sctb_buf => i_oacc_sctb_buf
-
- final :: i_oacc_vect_finalize
- end type psb_i_vect_oacc
-
- public :: psb_i_vect_oacc_
- private :: constructor
- interface psb_i_vect_oacc_
- module procedure constructor
- end interface psb_i_vect_oacc_
+ use iso_c_binding
+ use psb_const_mod
+ use psb_error_mod
+ use psb_i_vect_mod
+
+ integer(psb_ipk_), parameter, private :: is_host = -1
+ integer(psb_ipk_), parameter, private :: is_sync = 0
+ integer(psb_ipk_), parameter, private :: is_dev = 1
+
+ type, extends(psb_i_base_vect_type) :: psb_i_vect_oacc
+ integer :: state = is_host
+
+ contains
+ procedure, pass(x) :: get_nrows => i_oacc_get_nrows
+ procedure, nopass :: get_fmt => i_oacc_get_fmt
+
+ procedure, pass(x) :: all => i_oacc_vect_all
+ procedure, pass(x) :: zero => i_oacc_zero
+ procedure, pass(x) :: asb_m => i_oacc_asb_m
+ procedure, pass(x) :: sync => i_oacc_sync
+ procedure, pass(x) :: sync_space => i_oacc_sync_space
+ procedure, pass(x) :: bld_x => i_oacc_bld_x
+ procedure, pass(x) :: bld_mn => i_oacc_bld_mn
+ procedure, pass(x) :: free => i_oacc_vect_free
+ procedure, pass(x) :: ins_a => i_oacc_ins_a
+ procedure, pass(x) :: ins_v => i_oacc_ins_v
+ procedure, pass(x) :: is_host => i_oacc_is_host
+ procedure, pass(x) :: is_dev => i_oacc_is_dev
+ procedure, pass(x) :: is_sync => i_oacc_is_sync
+ procedure, pass(x) :: set_host => i_oacc_set_host
+ procedure, pass(x) :: set_dev => i_oacc_set_dev
+ procedure, pass(x) :: set_sync => i_oacc_set_sync
+ procedure, pass(x) :: set_scal => i_oacc_set_scal
+
+ procedure, pass(x) :: gthzv_x => i_oacc_gthzv_x
+ procedure, pass(x) :: gthzbuf_x => i_oacc_gthzbuf
+ procedure, pass(y) :: sctb => i_oacc_sctb
+ procedure, pass(y) :: sctb_x => i_oacc_sctb_x
+ procedure, pass(y) :: sctb_buf => i_oacc_sctb_buf
+
+ procedure, pass(x) :: get_size => i_oacc_get_size
+
+
+ end type psb_i_vect_oacc
+
contains
- function constructor(x) result(this)
- integer(psb_ipk_) :: x(:)
- type(psb_i_vect_oacc) :: this
- integer(psb_ipk_) :: info
- this%v = x
- call this%asb(size(x), info)
- end function constructor
+ subroutine i_oacc_sctb_buf(i, n, idx, beta, y)
+ use psb_base_mod
+ implicit none
+ integer(psb_ipk_) :: i, n
+ class(psb_i_base_vect_type) :: idx
+ integer(psb_ipk_) :: beta
+ class(psb_i_vect_oacc) :: y
+ integer(psb_ipk_) :: info
+ if (.not.allocated(y%combuf)) then
+ call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
+ return
+ end if
- subroutine i_oacc_gthzv_x(i, n, idx, x, y)
- implicit none
- integer(psb_ipk_) :: i, n
- class(psb_i_base_vect_type) :: idx
- integer(psb_ipk_) :: y(:)
- class(psb_i_vect_oacc) :: x
- integer :: info
+ select type(ii => idx)
+ class is (psb_i_vect_oacc)
+ if (ii%is_host()) call ii%sync_space()
+ if (y%is_host()) call y%sync_space()
- !$acc parallel loop
- do i = 1, n
- y(i) = x%v(idx%v(i))
- end do
- end subroutine i_oacc_gthzv_x
+ !$acc parallel loop
+ do i = 1, n
+ y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i)
+ end do
- subroutine i_oacc_gthzbuf(i, n, idx, x)
- implicit none
- integer(psb_ipk_) :: i, n
- class(psb_i_base_vect_type) :: idx
- class(psb_i_vect_oacc) :: x
- integer :: info
+ class default
+ !$acc parallel loop
+ do i = 1, n
+ y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i)
+ end do
+ end select
+ end subroutine i_oacc_sctb_buf
- if (.not.allocated(x%combuf)) then
- call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
- return
- end if
+ subroutine i_oacc_sctb_x(i, n, idx, x, beta, y)
+ use psb_base_mod
+ implicit none
+ integer(psb_ipk_):: i, n
+ class(psb_i_base_vect_type) :: idx
+ integer(psb_ipk_) :: beta, x(:)
+ class(psb_i_vect_oacc) :: y
+ integer(psb_ipk_) :: info, ni
- !$acc parallel loop
- do i = 1, n
- x%combuf(i) = x%v(idx%v(i))
- end do
- end subroutine i_oacc_gthzbuf
+ select type(ii => idx)
+ class is (psb_i_vect_oacc)
+ if (ii%is_host()) call ii%sync_space()
+ class default
+ call psb_errpush(info, 'i_oacc_sctb_x')
+ return
+ end select
- subroutine i_oacc_sctb(n, idx, x, beta, y)
- implicit none
- integer(psb_ipk_) :: n, idx(:)
- integer(psb_ipk_) :: beta, x(:)
- class(psb_i_vect_oacc) :: y
- integer(psb_ipk_) :: info
- integer :: i
+ if (y%is_host()) call y%sync_space()
- if (n == 0) return
+ !$acc parallel loop
+ do i = 1, n
+ y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i)
+ end do
+ call y%set_dev()
+ end subroutine i_oacc_sctb_x
+
+ subroutine i_oacc_sctb(n, idx, x, beta, y)
+ use psb_base_mod
+ implicit none
+ integer(psb_ipk_) :: n
+ integer(psb_ipk_) :: idx(:)
+ integer(psb_ipk_) :: beta, x(:)
+ class(psb_i_vect_oacc) :: y
+ integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: i
+
+ if (n == 0) return
+ if (y%is_dev()) call y%sync_space()
+
+ !$acc parallel loop
+ do i = 1, n
+ y%v(idx(i)) = beta * y%v(idx(i)) + x(i)
+ end do
+
+ call y%set_host()
+ end subroutine i_oacc_sctb
+
+ subroutine i_oacc_gthzbuf(i, n, idx, x)
+ use psb_base_mod
+ implicit none
+ integer(psb_ipk_) :: i, n
+ class(psb_i_base_vect_type) :: idx
+ class(psb_i_vect_oacc) :: x
+ integer(psb_ipk_) :: info
+
+ info = 0
+ if (.not.allocated(x%combuf)) then
+ call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
+ return
+ end if
+
+ select type(ii => idx)
+ class is (psb_i_vect_oacc)
+ if (ii%is_host()) call ii%sync_space()
+ class default
+ call psb_errpush(info, 'i_oacc_gthzbuf')
+ return
+ end select
+
+ if (x%is_host()) call x%sync_space()
+
+ !$acc parallel loop
+ do i = 1, n
+ x%combuf(i) = x%v(idx%v(i))
+ end do
+ end subroutine i_oacc_gthzbuf
+
+ subroutine i_oacc_gthzv_x(i, n, idx, x, y)
+ use psb_base_mod
+ implicit none
+ integer(psb_ipk_) :: i, n
+ class(psb_i_base_vect_type):: idx
+ integer(psb_ipk_) :: y(:)
+ class(psb_i_vect_oacc):: x
+ integer(psb_ipk_) :: info
+
+ info = 0
+
+ select type(ii => idx)
+ class is (psb_i_vect_oacc)
+ if (ii%is_host()) call ii%sync_space()
+ class default
+ call psb_errpush(info, 'i_oacc_gthzv_x')
+ return
+ end select
+
+ if (x%is_host()) call x%sync_space()
+
+ !$acc parallel loop
+ do i = 1, n
+ y(i) = x%v(idx%v(i))
+ end do
+ end subroutine i_oacc_gthzv_x
+
+ subroutine i_oacc_ins_v(n, irl, val, dupl, x, info)
+ use psi_serial_mod
+ implicit none
+ class(psb_i_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(in) :: n, dupl
+ class(psb_i_base_vect_type), intent(inout) :: irl
+ class(psb_i_base_vect_type), intent(inout) :: val
+ integer(psb_ipk_), intent(out) :: info
+
+ integer(psb_ipk_) :: i, isz
+ logical :: done_oacc
+
+ info = 0
+ if (psb_errstatus_fatal()) return
+
+ done_oacc = .false.
+ select type(virl => irl)
+ type is (psb_i_vect_oacc)
+ select type(vval => val)
+ type is (psb_i_vect_oacc)
+ if (vval%is_host()) call vval%sync_space()
+ if (virl%is_host()) call virl%sync_space()
+ if (x%is_host()) call x%sync_space()
!$acc parallel loop
do i = 1, n
- y%v(idx(i)) = beta * y%v(idx(i)) + x(i)
- end do
- end subroutine i_oacc_sctb
-
- subroutine i_oacc_sctb_x(i, n, idx, x, beta, y)
- implicit none
- integer(psb_ipk_) :: i, n
- class(psb_i_base_vect_type) :: idx
- integer(psb_ipk_) :: beta, x(:)
- class(psb_i_vect_oacc) :: y
- integer :: info
-
- select type(ii => idx)
- class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space(info)
- if (y%is_host()) call y%sync_space(info)
-
- !$acc parallel loop
- do i = 1, n
- y%v(ii%v(i)) = beta * y%v(ii%v(i)) + x(i)
- end do
-
- class default
- !$acc parallel loop
- do i = 1, n
- y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i)
- end do
- end select
- end subroutine i_oacc_sctb_x
-
- subroutine i_oacc_sctb_buf(i, n, idx, beta, y)
- implicit none
- integer(psb_ipk_) :: i, n
- class(psb_i_base_vect_type) :: idx
- integer(psb_ipk_) :: beta
- class(psb_i_vect_oacc) :: y
- integer(psb_ipk_) :: info
-
- if (.not.allocated(y%v)) then
- call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
- return
- end if
-
- !$acc parallel loop
- do i = 1, n
- y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%v(i)
- end do
- end subroutine i_oacc_sctb_buf
-
- subroutine i_oacc_set_host(x)
- class(psb_i_vect_oacc), intent(inout) :: x
- x%state = is_host
- end subroutine i_oacc_set_host
-
- subroutine i_oacc_set_sync(x)
- class(psb_i_vect_oacc), intent(inout) :: x
- x%state = is_sync
- end subroutine i_oacc_set_sync
-
- subroutine i_oacc_set_dev(x)
- class(psb_i_vect_oacc), intent(inout) :: x
- x%state = is_dev
- end subroutine i_oacc_set_dev
-
- subroutine i_oacc_set_scal(x, val, first, last)
- class(psb_i_vect_oacc), intent(inout) :: x
- integer(psb_ipk_), intent(in) :: val
- integer(psb_ipk_), optional :: first, last
-
- integer(psb_ipk_) :: first_, last_
-
- first_ = 1
- last_ = size(x%v)
- if (present(first)) first_ = max(1, first)
- if (present(last)) last_ = min(size(x%v), last)
-
- !$acc parallel loop
- do i = first_, last_
- x%v(i) = val
+ x%v(virl%v(i)) = vval%v(i)
end do
call x%set_dev()
- end subroutine i_oacc_set_scal
-
- function i_oacc_is_host(x) result(res)
- implicit none
- class(psb_i_vect_oacc), intent(in) :: x
- logical :: res
-
- res = (x%state == is_host)
- end function i_oacc_is_host
-
- function i_oacc_is_dev(x) result(res)
- implicit none
- class(psb_i_vect_oacc), intent(in) :: x
- logical :: res
-
- res = (x%state == is_dev)
- end function i_oacc_is_dev
-
- function i_oacc_is_sync(x) result(res)
- implicit none
- class(psb_i_vect_oacc), intent(in) :: x
- logical :: res
-
- res = (x%state == is_sync)
- end function i_oacc_is_sync
-
- subroutine i_oacc_free(x, info)
- use psb_error_mod
- implicit none
- class(psb_i_vect_oacc), intent(inout) :: x
- integer(psb_ipk_), intent(out) :: info
-
- info = 0
- if (allocated(x%v)) deallocate(x%v, stat=info)
- if (info /= 0) then
- info = psb_err_alloc_dealloc_
- call psb_errpush(info, 'i_oacc_free')
- end if
- call x%set_sync()
- end subroutine i_oacc_free
-
- subroutine i_oacc_ins_a(n, irl, val, dupl, x, info)
- implicit none
- class(psb_i_vect_oacc), intent(inout) :: x
- integer(psb_ipk_), intent(in) :: n, dupl
- integer(psb_ipk_), intent(in) :: irl(:)
- integer(psb_ipk_), intent(in) :: val(:)
- integer(psb_ipk_), intent(out) :: info
-
- integer(psb_ipk_) :: i
-
- info = 0
- if (x%is_dev()) call x%sync()
- call x%psb_i_base_vect_type%ins(n, irl, val, dupl, info)
+ done_oacc = .true.
+ end select
+ end select
+
+ if (.not.done_oacc) then
+ select type(virl => irl)
+ type is (psb_i_vect_oacc)
+ if (virl%is_dev()) call virl%sync_space()
+ end select
+ select type(vval => val)
+ type is (psb_i_vect_oacc)
+ if (vval%is_dev()) call vval%sync_space()
+ end select
+ call x%ins(n, irl%v, val%v, dupl, info)
+ end if
+
+ if (info /= 0) then
+ call psb_errpush(info, 'oacc_vect_ins')
+ return
+ end if
+
+ end subroutine i_oacc_ins_v
+
+ subroutine i_oacc_ins_a(n, irl, val, dupl, x, info)
+ use psi_serial_mod
+ implicit none
+ class(psb_i_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(in) :: n, dupl
+ integer(psb_ipk_), intent(in) :: irl(:)
+ integer(psb_ipk_), intent(in) :: val(:)
+ integer(psb_ipk_), intent(out) :: info
+
+ integer(psb_ipk_) :: i
+
+ info = 0
+ if (x%is_dev()) call x%sync_space()
+ call x%psb_i_base_vect_type%ins(n, irl, val, dupl, info)
+ call x%set_host()
+ !$acc update device(x%v)
+
+ end subroutine i_oacc_ins_a
+
+ subroutine i_oacc_bld_mn(x, n)
+ use psb_base_mod
+ implicit none
+ integer(psb_mpk_), intent(in) :: n
+ class(psb_i_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_) :: info
+
+ call x%all(n, info)
+ if (info /= 0) then
+ call psb_errpush(info, 'i_oacc_bld_mn', i_err=(/n, n, n, n, n/))
+ end if
+ call x%set_host()
+ !$acc update device(x%v)
+
+ end subroutine i_oacc_bld_mn
+
+
+ subroutine i_oacc_bld_x(x, this)
+ use psb_base_mod
+ implicit none
+ integer(psb_ipk_), intent(in) :: this(:)
+ class(psb_i_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_) :: info
+
+ call psb_realloc(size(this), x%v, info)
+ if (info /= 0) then
+ info = psb_err_alloc_request_
+ call psb_errpush(info, 'i_oacc_bld_x', &
+ i_err=(/size(this), izero, izero, izero, izero/))
+ return
+ end if
+
+ x%v(:) = this(:)
+ call x%set_host()
+ !$acc update device(x%v)
+
+ end subroutine i_oacc_bld_x
+
+ subroutine i_oacc_asb_m(n, x, info)
+ use psb_base_mod
+ implicit none
+ integer(psb_mpk_), intent(in) :: n
+ class(psb_i_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_mpk_) :: nd
+
+ info = psb_success_
+
+ if (x%is_dev()) then
+ nd = size(x%v)
+ if (nd < n) then
+ call x%sync()
+ call x%psb_i_base_vect_type%asb(n, info)
+ if (info == psb_success_) call x%sync_space()
call x%set_host()
- end subroutine i_oacc_ins_a
-
- subroutine i_oacc_ins_v(n, irl, val, dupl, x, info)
- implicit none
- class(psb_i_vect_oacc), intent(inout) :: x
- integer(psb_ipk_), intent(in) :: n, dupl
- class(psb_i_base_vect_type), intent(inout) :: irl
- class(psb_i_base_vect_type), intent(inout) :: val
- integer(psb_ipk_), intent(out) :: info
-
- integer(psb_ipk_) :: i, isz
- logical :: done_oacc
-
- info = 0
- if (psb_errstatus_fatal()) return
-
- done_oacc = .false.
- select type(virl => irl)
- class is (psb_i_vect_oacc)
- select type(vval => val)
- class is (psb_i_vect_oacc)
- if (vval%is_host()) call vval%sync()
- if (virl%is_host()) call virl%sync()
- if (x%is_host()) call x%sync()
- ! Add the OpenACC kernel call here if needed
- call x%set_dev()
- done_oacc = .true.
- end select
- end select
-
- if (.not.done_oacc) then
- if (irl%is_dev()) call irl%sync()
- if (val%is_dev()) call val%sync()
- call x%ins(n, irl%v, val%v, dupl, info)
- end if
-
- if (info /= 0) then
- call psb_errpush(info,'i_oacc_ins_v')
- return
- end if
- end subroutine i_oacc_ins_v
-
- subroutine i_oacc_bld_x(x, this)
- use psb_error_mod
- implicit none
- integer(psb_ipk_), intent(in) :: this(:)
- class(psb_i_vect_oacc), intent(inout) :: x
- integer(psb_ipk_) :: info
-
- call psb_realloc(size(this), x%v, info)
- if (info /= 0) then
- info = psb_err_alloc_request_
- call psb_errpush(info, 'i_oacc_bld_x', i_err = (/size(this), izero, izero, izero, izero/))
- end if
- x%v(:) = this(:)
+ end if
+ else
+ if (size(x%v) < n) then
+ call x%psb_i_base_vect_type%asb(n, info)
+ if (info == psb_success_) call x%sync_space()
call x%set_host()
- call x%sync()
- end subroutine i_oacc_bld_x
-
- subroutine i_oacc_bld_mn(x, n)
- use psb_error_mod
- implicit none
- integer(psb_mpk_), intent(in) :: n
- class(psb_i_vect_oacc), intent(inout) :: x
- integer(psb_ipk_) :: info
-
- call x%all(n, info)
- if (info /= 0) then
- call psb_errpush(info, 'i_oacc_bld_mn', i_err = (/n, n, n, n, n/))
- end if
- end subroutine i_oacc_bld_mn
-
- subroutine i_oacc_sync(x)
- use psb_error_mod
- implicit none
- class(psb_i_vect_oacc), intent(inout) :: x
- integer(psb_ipk_) :: n, info
-
- info = 0
- if (x%is_host()) then
- n = size(x%v)
- if (.not.allocated(x%v)) then
- write(*, *) 'Incoherent situation : x%v not allocated'
- call psb_realloc(n, x%v, info)
- end if
- if ((n > size(x%v)) .or. (n > x%get_nrows())) then
- write(*, *) 'Incoherent situation : sizes', n, size(x%v), x%get_nrows()
- call psb_realloc(n, x%v, info)
- end if
- !$acc update device(x%v)
- else if (x%is_dev()) then
- n = size(x%v)
- if (.not.allocated(x%v)) then
- write(*, *) 'Incoherent situation : x%v not allocated'
- call psb_realloc(n, x%v, info)
- end if
- !$acc update self(x%v)
- end if
- if (info == 0) call x%set_sync()
- if (info /= 0) then
- info = psb_err_internal_error_
- call psb_errpush(info, 'i_oacc_sync')
- end if
- end subroutine i_oacc_sync
-
- subroutine i_oacc_sync_space(x, info)
- use psb_error_mod
- implicit none
- class(psb_i_vect_oacc), intent(inout) :: x
- integer(psb_ipk_), intent(out) :: info
- integer(psb_ipk_) :: nh, nd
-
- info = 0
- if (x%is_dev()) then
- nh = size(x%v)
- nd = nh
- if (nh < nd) then
- call psb_realloc(nd, x%v, info)
- end if
- else
- nh = size(x%v)
- nd = nh
- if (nh < nd) then
- call psb_realloc(nd, x%v, info)
- end if
- end if
- end subroutine i_oacc_sync_space
-
- function i_oacc_get_nrows(x) result(res)
- implicit none
- class(psb_i_vect_oacc), intent(in) :: x
- integer(psb_ipk_) :: res
-
- res = 0
- if (allocated(x%v)) res = size(x%v)
- end function i_oacc_get_nrows
-
- function i_oacc_get_fmt() result(res)
- implicit none
- character(len=5) :: res
- res = 'iOACC'
- end function i_oacc_get_fmt
-
- subroutine i_oacc_all(n, x, info)
- use psb_error_mod
- implicit none
- class(psb_i_vect_oacc), intent(out) :: x
- integer(psb_ipk_), intent(in) :: n
- integer(psb_ipk_), intent(out) :: info
-
- call psb_realloc(n, x%v, info)
- if (info == 0) call x%set_host()
- if (info == 0) call x%sync_space(info)
- if (info /= 0) then
- info = psb_err_alloc_request_
- call psb_errpush(info, 'i_oacc_all', i_err=(/n, n, n, n, n/))
- end if
- end subroutine i_oacc_all
-
- subroutine i_oacc_zero(x)
- use psb_error_mod
- implicit none
- class(psb_i_vect_oacc), intent(inout) :: x
- ! Ensure zeroing on the GPU side
- call x%set_dev()
- x%v = 0
- !$acc update device(x%v)
- end subroutine i_oacc_zero
-
- subroutine i_oacc_asb_m(n, x, info)
- use psb_error_mod
- use psb_realloc_mod
- implicit none
- integer(psb_ipk_), intent(in) :: n
- class(psb_i_vect_oacc), intent(inout) :: x
- integer(psb_ipk_), intent(out) :: info
- integer(psb_ipk_) :: nh, nd
-
- info = 0
- if (x%is_dev()) then
- nd = size(x%v)
- if (nd < n) then
- call x%sync()
- call x%psb_i_base_vect_type%asb(n, info)
- if (info == psb_success_) call x%sync_space(info)
- call x%set_host()
- end if
- else
- nh = size(x%v)
- if (nh < n) then
- call x%psb_i_base_vect_type%asb(n, info)
- if (info == psb_success_) call x%sync_space(info)
- call x%set_host()
- end if
- end if
- end subroutine i_oacc_asb_m
-
- subroutine i_oacc_vect_finalize(x)
- use psi_serial_mod
- use psb_realloc_mod
- implicit none
- type(psb_i_vect_oacc), intent(inout) :: x
- integer(psb_ipk_) :: info
-
- info = 0
- call x%free(info)
- end subroutine i_oacc_vect_finalize
+ end if
+ end if
+ end subroutine i_oacc_asb_m
-end module psb_i_oacc_vect_mod
+ subroutine i_oacc_set_scal(x, val, first, last)
+ class(psb_i_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(in) :: val
+ integer(psb_ipk_), optional :: first, last
+
+ integer(psb_ipk_) :: first_, last_
+ first_ = 1
+ last_ = x%get_nrows()
+ if (present(first)) first_ = max(1, first)
+ if (present(last)) last_ = min(last, last_)
+
+ !$acc parallel loop
+ do i = first_, last_
+ x%v(i) = val
+ end do
+ !$acc end parallel loop
+
+ call x%set_dev()
+ end subroutine i_oacc_set_scal
+
+ subroutine i_oacc_zero(x)
+ use psi_serial_mod
+ implicit none
+ class(psb_i_vect_oacc), intent(inout) :: x
+ call x%set_dev()
+ call x%set_scal(izero)
+ end subroutine i_oacc_zero
+
+ function i_oacc_get_nrows(x) result(res)
+ implicit none
+ class(psb_i_vect_oacc), intent(in) :: x
+ integer(psb_ipk_) :: res
+
+ if (allocated(x%v)) res = size(x%v)
+ end function i_oacc_get_nrows
+
+ function i_oacc_get_fmt() result(res)
+ implicit none
+ character(len=5) :: res
+ res = "iOACC"
+
+ end function i_oacc_get_fmt
+
+ ! subroutine i_oacc_set_vect(x,y)
+ ! implicit none
+ ! class(psb_i_vect_oacc), intent(inout) :: x
+ ! integer(psb_ipk_), intent(in) :: y(:)
+ ! integer(psb_ipk_) :: info
+
+ ! if (size(x%v) /= size(y)) then
+ ! call x%free(info)
+ ! call x%all(size(y),info)
+ ! end if
+ ! x%v(:) = y(:)
+ ! call x%set_host()
+ ! end subroutine i_oacc_set_vect
-
-
-
-
-
\ No newline at end of file
+ subroutine i_oacc_to_dev(v)
+ implicit none
+ integer(psb_ipk_) :: v(:)
+ !$acc update device(v)
+ end subroutine i_oacc_to_dev
+
+ subroutine i_oacc_to_host(v)
+ implicit none
+ integer(psb_ipk_) :: v(:)
+ !$acc update self(v)
+ end subroutine i_oacc_to_host
+
+ subroutine i_oacc_sync_space(x)
+ implicit none
+ class(psb_i_vect_oacc), intent(inout) :: x
+ if (allocated(x%v)) then
+ call i_oacc_create_dev(x%v)
+ end if
+ contains
+ subroutine i_oacc_create_dev(v)
+ implicit none
+ integer(psb_ipk_) :: v(:)
+ !$acc enter data copyin(v)
+ end subroutine i_oacc_create_dev
+ end subroutine i_oacc_sync_space
+
+ subroutine i_oacc_sync(x)
+ implicit none
+ class(psb_i_vect_oacc), intent(inout) :: x
+ if (x%is_dev()) then
+ call i_oacc_to_host(x%v)
+ end if
+ if (x%is_host()) then
+ call i_oacc_to_dev(x%v)
+ end if
+ call x%set_sync()
+ end subroutine i_oacc_sync
+
+ subroutine i_oacc_set_host(x)
+ implicit none
+ class(psb_i_vect_oacc), intent(inout) :: x
+
+ x%state = is_host
+ end subroutine i_oacc_set_host
+
+ subroutine i_oacc_set_dev(x)
+ implicit none
+ class(psb_i_vect_oacc), intent(inout) :: x
+
+ x%state = is_dev
+ end subroutine i_oacc_set_dev
+
+ subroutine i_oacc_set_sync(x)
+ implicit none
+ class(psb_i_vect_oacc), intent(inout) :: x
+
+ x%state = is_sync
+ end subroutine i_oacc_set_sync
+
+ function i_oacc_is_dev(x) result(res)
+ implicit none
+ class(psb_i_vect_oacc), intent(in) :: x
+ logical :: res
+
+ res = (x%state == is_dev)
+ end function i_oacc_is_dev
+
+ function i_oacc_is_host(x) result(res)
+ implicit none
+ class(psb_i_vect_oacc), intent(in) :: x
+ logical :: res
+
+ res = (x%state == is_host)
+ end function i_oacc_is_host
+
+ function i_oacc_is_sync(x) result(res)
+ implicit none
+ class(psb_i_vect_oacc), intent(in) :: x
+ logical :: res
+
+ res = (x%state == is_sync)
+ end function i_oacc_is_sync
+
+ subroutine i_oacc_vect_all(n, x, info)
+ use psi_serial_mod
+ use psb_realloc_mod
+ implicit none
+ integer(psb_ipk_), intent(in) :: n
+ class(psb_i_vect_oacc), intent(out) :: x
+ integer(psb_ipk_), intent(out) :: info
+
+ call psb_realloc(n, x%v, info)
+ if (info == 0) then
+ call x%set_host()
+ !$acc enter data create(x%v)
+ call x%sync_space()
+ end if
+ if (info /= 0) then
+ info = psb_err_alloc_request_
+ call psb_errpush(info, 'i_oacc_all', &
+ i_err=(/n, n, n, n, n/))
+ end if
+ end subroutine i_oacc_vect_all
+
+ subroutine i_oacc_vect_free(x, info)
+ implicit none
+ class(psb_i_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(out) :: info
+ info = 0
+ if (allocated(x%v)) then
+ !$acc exit data delete(x%v) finalize
+ deallocate(x%v, stat=info)
+ end if
+
+ end subroutine i_oacc_vect_free
+
+ function i_oacc_get_size(x) result(res)
+ implicit none
+ class(psb_i_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_) :: res
+
+ if (x%is_dev()) call x%sync()
+ res = size(x%v)
+ end function i_oacc_get_size
+
+end module psb_i_oacc_vect_mod
diff --git a/openacc/psb_l_oacc_vect_mod.F90 b/openacc/psb_l_oacc_vect_mod.F90
new file mode 100644
index 00000000..aeba4537
--- /dev/null
+++ b/openacc/psb_l_oacc_vect_mod.F90
@@ -0,0 +1,507 @@
+module psb_l_oacc_vect_mod
+ use iso_c_binding
+ use psb_const_mod
+ use psb_error_mod
+ use psb_l_vect_mod
+ use psb_i_vect_mod
+ use psb_i_oacc_vect_mod
+
+ integer(psb_ipk_), parameter, private :: is_host = -1
+ integer(psb_ipk_), parameter, private :: is_sync = 0
+ integer(psb_ipk_), parameter, private :: is_dev = 1
+
+ type, extends(psb_l_base_vect_type) :: psb_l_vect_oacc
+ integer :: state = is_host
+
+ contains
+ procedure, pass(x) :: get_nrows => l_oacc_get_nrows
+ procedure, nopass :: get_fmt => l_oacc_get_fmt
+
+ procedure, pass(x) :: all => l_oacc_vect_all
+ procedure, pass(x) :: zero => l_oacc_zero
+ procedure, pass(x) :: asb_m => l_oacc_asb_m
+ procedure, pass(x) :: sync => l_oacc_sync
+ procedure, pass(x) :: sync_space => l_oacc_sync_space
+ procedure, pass(x) :: bld_x => l_oacc_bld_x
+ procedure, pass(x) :: bld_mn => l_oacc_bld_mn
+ procedure, pass(x) :: free => l_oacc_vect_free
+ procedure, pass(x) :: ins_a => l_oacc_ins_a
+ procedure, pass(x) :: ins_v => l_oacc_ins_v
+ procedure, pass(x) :: is_host => l_oacc_is_host
+ procedure, pass(x) :: is_dev => l_oacc_is_dev
+ procedure, pass(x) :: is_sync => l_oacc_is_sync
+ procedure, pass(x) :: set_host => l_oacc_set_host
+ procedure, pass(x) :: set_dev => l_oacc_set_dev
+ procedure, pass(x) :: set_sync => l_oacc_set_sync
+ procedure, pass(x) :: set_scal => l_oacc_set_scal
+
+ procedure, pass(x) :: gthzv_x => l_oacc_gthzv_x
+ procedure, pass(x) :: gthzbuf_x => l_oacc_gthzbuf
+ procedure, pass(y) :: sctb => l_oacc_sctb
+ procedure, pass(y) :: sctb_x => l_oacc_sctb_x
+ procedure, pass(y) :: sctb_buf => l_oacc_sctb_buf
+
+ procedure, pass(x) :: get_size => l_oacc_get_size
+
+
+ end type psb_l_vect_oacc
+
+
+contains
+
+
+ subroutine l_oacc_sctb_buf(i, n, idx, beta, y)
+ use psb_base_mod
+ implicit none
+ integer(psb_ipk_) :: i, n
+ class(psb_i_base_vect_type) :: idx
+ integer(psb_lpk_) :: beta
+ class(psb_l_vect_oacc) :: y
+ integer(psb_ipk_) :: info
+
+ if (.not.allocated(y%combuf)) then
+ call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
+ return
+ end if
+
+ select type(ii => idx)
+ class is (psb_i_vect_oacc)
+ if (ii%is_host()) call ii%sync_space()
+ if (y%is_host()) call y%sync_space()
+
+ !$acc parallel loop
+ do i = 1, n
+ y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i)
+ end do
+
+ class default
+ !$acc parallel loop
+ do i = 1, n
+ y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i)
+ end do
+ end select
+ end subroutine l_oacc_sctb_buf
+
+ subroutine l_oacc_sctb_x(i, n, idx, x, beta, y)
+ use psb_base_mod
+ implicit none
+ integer(psb_ipk_):: i, n
+ class(psb_i_base_vect_type) :: idx
+ integer(psb_lpk_) :: beta, x(:)
+ class(psb_l_vect_oacc) :: y
+ integer(psb_ipk_) :: info, ni
+
+ select type(ii => idx)
+ class is (psb_i_vect_oacc)
+ if (ii%is_host()) call ii%sync_space()
+ class default
+ call psb_errpush(info, 'l_oacc_sctb_x')
+ return
+ end select
+
+ if (y%is_host()) call y%sync_space()
+
+ !$acc parallel loop
+ do i = 1, n
+ y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i)
+ end do
+
+ call y%set_dev()
+ end subroutine l_oacc_sctb_x
+
+ subroutine l_oacc_sctb(n, idx, x, beta, y)
+ use psb_base_mod
+ implicit none
+ integer(psb_ipk_) :: n
+ integer(psb_ipk_) :: idx(:)
+ integer(psb_lpk_) :: beta, x(:)
+ class(psb_l_vect_oacc) :: y
+ integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: i
+
+ if (n == 0) return
+ if (y%is_dev()) call y%sync_space()
+
+ !$acc parallel loop
+ do i = 1, n
+ y%v(idx(i)) = beta * y%v(idx(i)) + x(i)
+ end do
+
+ call y%set_host()
+ end subroutine l_oacc_sctb
+
+ subroutine l_oacc_gthzbuf(i, n, idx, x)
+ use psb_base_mod
+ implicit none
+ integer(psb_ipk_) :: i, n
+ class(psb_i_base_vect_type) :: idx
+ class(psb_l_vect_oacc) :: x
+ integer(psb_ipk_) :: info
+
+ info = 0
+ if (.not.allocated(x%combuf)) then
+ call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
+ return
+ end if
+
+ select type(ii => idx)
+ class is (psb_i_vect_oacc)
+ if (ii%is_host()) call ii%sync_space()
+ class default
+ call psb_errpush(info, 'l_oacc_gthzbuf')
+ return
+ end select
+
+ if (x%is_host()) call x%sync_space()
+
+ !$acc parallel loop
+ do i = 1, n
+ x%combuf(i) = x%v(idx%v(i))
+ end do
+ end subroutine l_oacc_gthzbuf
+
+ subroutine l_oacc_gthzv_x(i, n, idx, x, y)
+ use psb_base_mod
+ implicit none
+ integer(psb_ipk_) :: i, n
+ class(psb_i_base_vect_type):: idx
+ integer(psb_lpk_) :: y(:)
+ class(psb_l_vect_oacc):: x
+ integer(psb_ipk_) :: info
+
+ info = 0
+
+ select type(ii => idx)
+ class is (psb_i_vect_oacc)
+ if (ii%is_host()) call ii%sync_space()
+ class default
+ call psb_errpush(info, 'l_oacc_gthzv_x')
+ return
+ end select
+
+ if (x%is_host()) call x%sync_space()
+
+ !$acc parallel loop
+ do i = 1, n
+ y(i) = x%v(idx%v(i))
+ end do
+ end subroutine l_oacc_gthzv_x
+
+ subroutine l_oacc_ins_v(n, irl, val, dupl, x, info)
+ use psi_serial_mod
+ implicit none
+ class(psb_l_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(in) :: n, dupl
+ class(psb_i_base_vect_type), intent(inout) :: irl
+ class(psb_l_base_vect_type), intent(inout) :: val
+ integer(psb_ipk_), intent(out) :: info
+
+ integer(psb_ipk_) :: i, isz
+ logical :: done_oacc
+
+ info = 0
+ if (psb_errstatus_fatal()) return
+
+ done_oacc = .false.
+ select type(virl => irl)
+ type is (psb_i_vect_oacc)
+ select type(vval => val)
+ type is (psb_l_vect_oacc)
+ if (vval%is_host()) call vval%sync_space()
+ if (virl%is_host()) call virl%sync_space()
+ if (x%is_host()) call x%sync_space()
+ !$acc parallel loop
+ do i = 1, n
+ x%v(virl%v(i)) = vval%v(i)
+ end do
+ call x%set_dev()
+ done_oacc = .true.
+ end select
+ end select
+
+ if (.not.done_oacc) then
+ select type(virl => irl)
+ type is (psb_i_vect_oacc)
+ if (virl%is_dev()) call virl%sync_space()
+ end select
+ select type(vval => val)
+ type is (psb_l_vect_oacc)
+ if (vval%is_dev()) call vval%sync_space()
+ end select
+ call x%ins(n, irl%v, val%v, dupl, info)
+ end if
+
+ if (info /= 0) then
+ call psb_errpush(info, 'oacc_vect_ins')
+ return
+ end if
+
+ end subroutine l_oacc_ins_v
+
+ subroutine l_oacc_ins_a(n, irl, val, dupl, x, info)
+ use psi_serial_mod
+ implicit none
+ class(psb_l_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(in) :: n, dupl
+ integer(psb_ipk_), intent(in) :: irl(:)
+ integer(psb_lpk_), intent(in) :: val(:)
+ integer(psb_ipk_), intent(out) :: info
+
+ integer(psb_ipk_) :: i
+
+ info = 0
+ if (x%is_dev()) call x%sync_space()
+ call x%psb_l_base_vect_type%ins(n, irl, val, dupl, info)
+ call x%set_host()
+ !$acc update device(x%v)
+
+ end subroutine l_oacc_ins_a
+
+ subroutine l_oacc_bld_mn(x, n)
+ use psb_base_mod
+ implicit none
+ integer(psb_mpk_), intent(in) :: n
+ class(psb_l_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_) :: info
+
+ call x%all(n, info)
+ if (info /= 0) then
+ call psb_errpush(info, 'l_oacc_bld_mn', i_err=(/n, n, n, n, n/))
+ end if
+ call x%set_host()
+ !$acc update device(x%v)
+
+ end subroutine l_oacc_bld_mn
+
+
+ subroutine l_oacc_bld_x(x, this)
+ use psb_base_mod
+ implicit none
+ integer(psb_lpk_), intent(in) :: this(:)
+ class(psb_l_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_) :: info
+
+ call psb_realloc(size(this), x%v, info)
+ if (info /= 0) then
+ info = psb_err_alloc_request_
+ call psb_errpush(info, 'l_oacc_bld_x', &
+ i_err=(/size(this), izero, izero, izero, izero/))
+ return
+ end if
+
+ x%v(:) = this(:)
+ call x%set_host()
+ !$acc update device(x%v)
+
+ end subroutine l_oacc_bld_x
+
+ subroutine l_oacc_asb_m(n, x, info)
+ use psb_base_mod
+ implicit none
+ integer(psb_mpk_), intent(in) :: n
+ class(psb_l_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_mpk_) :: nd
+
+ info = psb_success_
+
+ if (x%is_dev()) then
+ nd = size(x%v)
+ if (nd < n) then
+ call x%sync()
+ call x%psb_l_base_vect_type%asb(n, info)
+ if (info == psb_success_) call x%sync_space()
+ call x%set_host()
+ end if
+ else
+ if (size(x%v) < n) then
+ call x%psb_l_base_vect_type%asb(n, info)
+ if (info == psb_success_) call x%sync_space()
+ call x%set_host()
+ end if
+ end if
+ end subroutine l_oacc_asb_m
+
+ subroutine l_oacc_set_scal(x, val, first, last)
+ class(psb_l_vect_oacc), intent(inout) :: x
+ integer(psb_lpk_), intent(in) :: val
+ integer(psb_ipk_), optional :: first, last
+
+ integer(psb_ipk_) :: first_, last_
+ first_ = 1
+ last_ = x%get_nrows()
+ if (present(first)) first_ = max(1, first)
+ if (present(last)) last_ = min(last, last_)
+
+ !$acc parallel loop
+ do i = first_, last_
+ x%v(i) = val
+ end do
+ !$acc end parallel loop
+
+ call x%set_dev()
+ end subroutine l_oacc_set_scal
+
+ subroutine l_oacc_zero(x)
+ use psi_serial_mod
+ implicit none
+ class(psb_l_vect_oacc), intent(inout) :: x
+ call x%set_dev()
+ call x%set_scal(lzero)
+ end subroutine l_oacc_zero
+
+ function l_oacc_get_nrows(x) result(res)
+ implicit none
+ class(psb_l_vect_oacc), intent(in) :: x
+ integer(psb_ipk_) :: res
+
+ if (allocated(x%v)) res = size(x%v)
+ end function l_oacc_get_nrows
+
+ function l_oacc_get_fmt() result(res)
+ implicit none
+ character(len=5) :: res
+ res = "lOACC"
+
+ end function l_oacc_get_fmt
+
+ ! subroutine l_oacc_set_vect(x,y)
+ ! implicit none
+ ! class(psb_l_vect_oacc), intent(inout) :: x
+ ! integer(psb_lpk_), intent(in) :: y(:)
+ ! integer(psb_ipk_) :: info
+
+ ! if (size(x%v) /= size(y)) then
+ ! call x%free(info)
+ ! call x%all(size(y),info)
+ ! end if
+ ! x%v(:) = y(:)
+ ! call x%set_host()
+ ! end subroutine l_oacc_set_vect
+
+ subroutine l_oacc_to_dev(v)
+ implicit none
+ integer(psb_lpk_) :: v(:)
+ !$acc update device(v)
+ end subroutine l_oacc_to_dev
+
+ subroutine l_oacc_to_host(v)
+ implicit none
+ integer(psb_lpk_) :: v(:)
+ !$acc update self(v)
+ end subroutine l_oacc_to_host
+
+ subroutine l_oacc_sync_space(x)
+ implicit none
+ class(psb_l_vect_oacc), intent(inout) :: x
+ if (allocated(x%v)) then
+ call l_oacc_create_dev(x%v)
+ end if
+ contains
+ subroutine l_oacc_create_dev(v)
+ implicit none
+ integer(psb_lpk_) :: v(:)
+ !$acc enter data copyin(v)
+ end subroutine l_oacc_create_dev
+ end subroutine l_oacc_sync_space
+
+ subroutine l_oacc_sync(x)
+ implicit none
+ class(psb_l_vect_oacc), intent(inout) :: x
+ if (x%is_dev()) then
+ call l_oacc_to_host(x%v)
+ end if
+ if (x%is_host()) then
+ call l_oacc_to_dev(x%v)
+ end if
+ call x%set_sync()
+ end subroutine l_oacc_sync
+
+ subroutine l_oacc_set_host(x)
+ implicit none
+ class(psb_l_vect_oacc), intent(inout) :: x
+
+ x%state = is_host
+ end subroutine l_oacc_set_host
+
+ subroutine l_oacc_set_dev(x)
+ implicit none
+ class(psb_l_vect_oacc), intent(inout) :: x
+
+ x%state = is_dev
+ end subroutine l_oacc_set_dev
+
+ subroutine l_oacc_set_sync(x)
+ implicit none
+ class(psb_l_vect_oacc), intent(inout) :: x
+
+ x%state = is_sync
+ end subroutine l_oacc_set_sync
+
+ function l_oacc_is_dev(x) result(res)
+ implicit none
+ class(psb_l_vect_oacc), intent(in) :: x
+ logical :: res
+
+ res = (x%state == is_dev)
+ end function l_oacc_is_dev
+
+ function l_oacc_is_host(x) result(res)
+ implicit none
+ class(psb_l_vect_oacc), intent(in) :: x
+ logical :: res
+
+ res = (x%state == is_host)
+ end function l_oacc_is_host
+
+ function l_oacc_is_sync(x) result(res)
+ implicit none
+ class(psb_l_vect_oacc), intent(in) :: x
+ logical :: res
+
+ res = (x%state == is_sync)
+ end function l_oacc_is_sync
+
+ subroutine l_oacc_vect_all(n, x, info)
+ use psi_serial_mod
+ use psb_realloc_mod
+ implicit none
+ integer(psb_ipk_), intent(in) :: n
+ class(psb_l_vect_oacc), intent(out) :: x
+ integer(psb_ipk_), intent(out) :: info
+
+ call psb_realloc(n, x%v, info)
+ if (info == 0) then
+ call x%set_host()
+ !$acc enter data create(x%v)
+ call x%sync_space()
+ end if
+ if (info /= 0) then
+ info = psb_err_alloc_request_
+ call psb_errpush(info, 'l_oacc_all', &
+ i_err=(/n, n, n, n, n/))
+ end if
+ end subroutine l_oacc_vect_all
+
+ subroutine l_oacc_vect_free(x, info)
+ implicit none
+ class(psb_l_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(out) :: info
+ info = 0
+ if (allocated(x%v)) then
+ !$acc exit data delete(x%v) finalize
+ deallocate(x%v, stat=info)
+ end if
+
+ end subroutine l_oacc_vect_free
+
+ function l_oacc_get_size(x) result(res)
+ implicit none
+ class(psb_l_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_) :: res
+
+ if (x%is_dev()) call x%sync()
+ res = size(x%v)
+ end function l_oacc_get_size
+
+end module psb_l_oacc_vect_mod
diff --git a/openacc/psb_oacc_mod.F90 b/openacc/psb_oacc_mod.F90
index 2d8e8b40..7d3f9406 100644
--- a/openacc/psb_oacc_mod.F90
+++ b/openacc/psb_oacc_mod.F90
@@ -4,6 +4,7 @@ module psb_oacc_mod
use psb_oacc_env_mod
use psb_i_oacc_vect_mod
+ use psb_l_oacc_vect_mod
use psb_s_oacc_vect_mod
use psb_d_oacc_vect_mod
use psb_c_oacc_vect_mod
@@ -13,5 +14,13 @@ module psb_oacc_mod
use psb_d_oacc_csr_mat_mod
use psb_c_oacc_csr_mat_mod
use psb_z_oacc_csr_mat_mod
+ use psb_s_oacc_ell_mat_mod
+ use psb_d_oacc_ell_mat_mod
+ use psb_c_oacc_ell_mat_mod
+ use psb_z_oacc_ell_mat_mod
+ use psb_s_oacc_hll_mat_mod
+ use psb_d_oacc_hll_mat_mod
+ use psb_c_oacc_hll_mat_mod
+ use psb_z_oacc_hll_mat_mod
end module psb_oacc_mod
diff --git a/openacc/psb_s_oacc_ell_mat_mod.F90 b/openacc/psb_s_oacc_ell_mat_mod.F90
new file mode 100644
index 00000000..541fdf9a
--- /dev/null
+++ b/openacc/psb_s_oacc_ell_mat_mod.F90
@@ -0,0 +1,341 @@
+module psb_s_oacc_ell_mat_mod
+ use iso_c_binding
+ use psb_s_mat_mod
+ use psb_s_ell_mat_mod
+ use psb_s_oacc_vect_mod
+
+ integer(psb_ipk_), parameter, private :: is_host = -1
+ integer(psb_ipk_), parameter, private :: is_sync = 0
+ integer(psb_ipk_), parameter, private :: is_dev = 1
+
+ type, extends(psb_s_ell_sparse_mat) :: psb_s_oacc_ell_sparse_mat
+ integer(psb_ipk_) :: devstate = is_host
+ contains
+ procedure, nopass :: get_fmt => s_oacc_ell_get_fmt
+ procedure, pass(a) :: sizeof => s_oacc_ell_sizeof
+ procedure, pass(a) :: is_host => s_oacc_ell_is_host
+ procedure, pass(a) :: is_sync => s_oacc_ell_is_sync
+ procedure, pass(a) :: is_dev => s_oacc_ell_is_dev
+ procedure, pass(a) :: set_host => s_oacc_ell_set_host
+ procedure, pass(a) :: set_sync => s_oacc_ell_set_sync
+ procedure, pass(a) :: set_dev => s_oacc_ell_set_dev
+ procedure, pass(a) :: sync_space => s_oacc_ell_sync_space
+ procedure, pass(a) :: sync => s_oacc_ell_sync
+ procedure, pass(a) :: free => s_oacc_ell_free
+ procedure, pass(a) :: vect_mv => psb_s_oacc_ell_vect_mv
+ procedure, pass(a) :: in_vect_sv => psb_s_oacc_ell_inner_vect_sv
+ procedure, pass(a) :: csmm => psb_s_oacc_ell_csmm
+ procedure, pass(a) :: csmv => psb_s_oacc_ell_csmv
+ procedure, pass(a) :: scals => psb_s_oacc_ell_scals
+ procedure, pass(a) :: scalv => psb_s_oacc_ell_scal
+ procedure, pass(a) :: reallocate_nz => psb_s_oacc_ell_reallocate_nz
+ procedure, pass(a) :: allocate_mnnz => psb_s_oacc_ell_allocate_mnnz
+ procedure, pass(a) :: cp_from_coo => psb_s_oacc_ell_cp_from_coo
+ procedure, pass(a) :: cp_from_fmt => psb_s_oacc_ell_cp_from_fmt
+ procedure, pass(a) :: mv_from_coo => psb_s_oacc_ell_mv_from_coo
+ procedure, pass(a) :: mv_from_fmt => psb_s_oacc_ell_mv_from_fmt
+ procedure, pass(a) :: mold => psb_s_oacc_ell_mold
+
+ end type psb_s_oacc_ell_sparse_mat
+
+ interface
+ module subroutine psb_s_oacc_ell_mold(a,b,info)
+ class(psb_s_oacc_ell_sparse_mat), intent(in) :: a
+ class(psb_s_base_sparse_mat), intent(inout), allocatable :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_s_oacc_ell_mold
+ end interface
+
+ interface
+ module subroutine psb_s_oacc_ell_cp_from_fmt(a,b,info)
+ class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
+ class(psb_s_base_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_s_oacc_ell_cp_from_fmt
+ end interface
+
+ interface
+ module subroutine psb_s_oacc_ell_mv_from_coo(a,b,info)
+ class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
+ class(psb_s_coo_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_s_oacc_ell_mv_from_coo
+ end interface
+
+ interface
+ module subroutine psb_s_oacc_ell_mv_from_fmt(a,b,info)
+ class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
+ class(psb_s_base_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_s_oacc_ell_mv_from_fmt
+ end interface
+
+ interface
+ module subroutine psb_s_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans)
+ class(psb_s_oacc_ell_sparse_mat), intent(in) :: a
+ real(psb_spk_), intent(in) :: alpha, beta
+ class(psb_s_base_vect_type), intent(inout) :: x, y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_s_oacc_ell_vect_mv
+ end interface
+
+ interface
+ module subroutine psb_s_oacc_ell_inner_vect_sv(alpha, a, x, beta, y, info, trans)
+ class(psb_s_oacc_ell_sparse_mat), intent(in) :: a
+ real(psb_spk_), intent(in) :: alpha, beta
+ class(psb_s_base_vect_type), intent(inout) :: x,y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_s_oacc_ell_inner_vect_sv
+ end interface
+
+ interface
+ module subroutine psb_s_oacc_ell_csmm(alpha, a, x, beta, y, info, trans)
+ class(psb_s_oacc_ell_sparse_mat), intent(in) :: a
+ real(psb_spk_), intent(in) :: alpha, beta, x(:,:)
+ real(psb_spk_), intent(inout) :: y(:,:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_s_oacc_ell_csmm
+ end interface
+
+ interface
+ module subroutine psb_s_oacc_ell_csmv(alpha, a, x, beta, y, info, trans)
+ class(psb_s_oacc_ell_sparse_mat), intent(in) :: a
+ real(psb_spk_), intent(in) :: alpha, beta, x(:)
+ real(psb_spk_), intent(inout) :: y(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_s_oacc_ell_csmv
+ end interface
+
+ interface
+ module subroutine psb_s_oacc_ell_scals(d, a, info)
+ class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
+ real(psb_spk_), intent(in) :: d
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_s_oacc_ell_scals
+ end interface
+
+ interface
+ module subroutine psb_s_oacc_ell_scal(d,a,info,side)
+ class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
+ real(psb_spk_), intent(in) :: d(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: side
+ end subroutine psb_s_oacc_ell_scal
+ end interface
+
+ interface
+ module subroutine psb_s_oacc_ell_reallocate_nz(nz,a)
+ class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_), intent(in) :: nz
+ end subroutine psb_s_oacc_ell_reallocate_nz
+ end interface
+
+ interface
+ module subroutine psb_s_oacc_ell_allocate_mnnz(m,n,a,nz)
+ class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_), intent(in) :: m,n
+ integer(psb_ipk_), intent(in), optional :: nz
+ end subroutine psb_s_oacc_ell_allocate_mnnz
+ end interface
+
+ interface
+ module subroutine psb_s_oacc_ell_cp_from_coo(a,b,info)
+ class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
+ class(psb_s_coo_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_s_oacc_ell_cp_from_coo
+ end interface
+
+contains
+
+ subroutine s_oacc_ell_free(a)
+ use psb_base_mod
+ implicit none
+ class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_) :: info
+
+ if (allocated(a%val)) then
+ !$acc exit data delete(a%val)
+ end if
+ if (allocated(a%ja)) then
+ !$acc exit data delete(a%ja)
+ end if
+ if (allocated(a%irn)) then
+ !$acc exit data delete(a%irn)
+ end if
+ if (allocated(a%idiag)) then
+ !$acc exit data delete(a%idiag)
+ end if
+
+ call a%psb_s_ell_sparse_mat%free()
+
+ return
+ end subroutine s_oacc_ell_free
+
+
+ function s_oacc_ell_sizeof(a) result(res)
+ implicit none
+ class(psb_s_oacc_ell_sparse_mat), intent(in) :: a
+ integer(psb_epk_) :: res
+
+ if (a%is_dev()) call a%sync()
+
+ res = 8
+ res = res + psb_sizeof_dp * size(a%val)
+ res = res + psb_sizeof_ip * size(a%ja)
+ res = res + psb_sizeof_ip * size(a%irn)
+ res = res + psb_sizeof_ip * size(a%idiag)
+
+ end function s_oacc_ell_sizeof
+
+ subroutine s_oacc_ell_sync_space(a)
+ implicit none
+ class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
+
+ if (allocated(a%val)) then
+ call s_oacc_create_dev(a%val)
+ end if
+ if (allocated(a%ja)) then
+ call i_oacc_create_dev(a%ja)
+ end if
+ if (allocated(a%irn)) then
+ call i_oacc_create_dev_scalar(a%irn)
+ end if
+ if (allocated(a%idiag)) then
+ call i_oacc_create_dev_scalar(a%idiag)
+ end if
+
+ contains
+ subroutine s_oacc_create_dev(v)
+ implicit none
+ real(psb_spk_), intent(in) :: v(:,:)
+ !$acc enter data copyin(v)
+ end subroutine s_oacc_create_dev
+
+ subroutine i_oacc_create_dev(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:,:)
+ !$acc enter data copyin(v)
+ end subroutine i_oacc_create_dev
+
+ subroutine i_oacc_create_dev_scalar(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc enter data copyin(v)
+ end subroutine i_oacc_create_dev_scalar
+ end subroutine s_oacc_ell_sync_space
+
+
+
+ function s_oacc_ell_is_host(a) result(res)
+ implicit none
+ class(psb_s_oacc_ell_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_host)
+ end function s_oacc_ell_is_host
+
+ function s_oacc_ell_is_sync(a) result(res)
+ implicit none
+ class(psb_s_oacc_ell_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_sync)
+ end function s_oacc_ell_is_sync
+
+ function s_oacc_ell_is_dev(a) result(res)
+ implicit none
+ class(psb_s_oacc_ell_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_dev)
+ end function s_oacc_ell_is_dev
+
+ subroutine s_oacc_ell_set_host(a)
+ implicit none
+ class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_host
+ end subroutine s_oacc_ell_set_host
+
+ subroutine s_oacc_ell_set_sync(a)
+ implicit none
+ class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_sync
+ end subroutine s_oacc_ell_set_sync
+
+ subroutine s_oacc_ell_set_dev(a)
+ implicit none
+ class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_dev
+ end subroutine s_oacc_ell_set_dev
+
+ function s_oacc_ell_get_fmt() result(res)
+ implicit none
+ character(len=5) :: res
+ res = 'ELL_oacc'
+ end function s_oacc_ell_get_fmt
+
+ subroutine s_oacc_ell_sync(a)
+ implicit none
+ class(psb_s_oacc_ell_sparse_mat), target, intent(in) :: a
+ class(psb_s_oacc_ell_sparse_mat), pointer :: tmpa
+ integer(psb_ipk_) :: info
+
+ tmpa => a
+ if (a%is_dev()) then
+ call s_oacc_ell_to_host(a%val)
+ call i_oacc_ell_to_host(a%ja)
+ call i_oacc_ell_to_host_scalar(a%irn)
+ call i_oacc_ell_to_host_scalar(a%idiag)
+ else if (a%is_host()) then
+ call s_oacc_ell_to_dev(a%val)
+ call i_oacc_ell_to_dev(a%ja)
+ call i_oacc_ell_to_dev_scalar(a%irn)
+ call i_oacc_ell_to_dev_scalar(a%idiag)
+ end if
+ call tmpa%set_sync()
+ end subroutine s_oacc_ell_sync
+
+ subroutine s_oacc_ell_to_host(v)
+ implicit none
+ real(psb_spk_), intent(in) :: v(:,:)
+ !$acc update self(v)
+ end subroutine s_oacc_ell_to_host
+
+ subroutine s_oacc_ell_to_host_scalar(v)
+ implicit none
+ real(psb_spk_), intent(in) :: v(:)
+ !$acc update self(v)
+ end subroutine s_oacc_ell_to_host_scalar
+
+ subroutine i_oacc_ell_to_dev(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:,:)
+ !$acc update device(v)
+ end subroutine i_oacc_ell_to_dev
+
+ subroutine i_oacc_ell_to_dev_scalar(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc update device(v)
+ end subroutine i_oacc_ell_to_dev_scalar
+
+ subroutine i_oacc_ell_to_host(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:,:)
+ !$acc update self(v)
+ end subroutine i_oacc_ell_to_host
+
+ subroutine i_oacc_ell_to_host_scalar(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc update self(v)
+ end subroutine i_oacc_ell_to_host_scalar
+end module psb_s_oacc_ell_mat_mod
diff --git a/openacc/psb_s_oacc_hll_mat_mod.F90 b/openacc/psb_s_oacc_hll_mat_mod.F90
new file mode 100644
index 00000000..bf8949a1
--- /dev/null
+++ b/openacc/psb_s_oacc_hll_mat_mod.F90
@@ -0,0 +1,352 @@
+module psb_s_oacc_hll_mat_mod
+ use iso_c_binding
+ use psb_s_mat_mod
+ use psb_s_hll_mat_mod
+ use psb_s_oacc_vect_mod
+
+ integer(psb_ipk_), parameter, private :: is_host = -1
+ integer(psb_ipk_), parameter, private :: is_sync = 0
+ integer(psb_ipk_), parameter, private :: is_dev = 1
+
+ type, extends(psb_s_hll_sparse_mat) :: psb_s_oacc_hll_sparse_mat
+ integer(psb_ipk_) :: devstate = is_host
+ contains
+ procedure, nopass :: get_fmt => s_oacc_hll_get_fmt
+ procedure, pass(a) :: sizeof => s_oacc_hll_sizeof
+ procedure, pass(a) :: is_host => s_oacc_hll_is_host
+ procedure, pass(a) :: is_sync => s_oacc_hll_is_sync
+ procedure, pass(a) :: is_dev => s_oacc_hll_is_dev
+ procedure, pass(a) :: set_host => s_oacc_hll_set_host
+ procedure, pass(a) :: set_sync => s_oacc_hll_set_sync
+ procedure, pass(a) :: set_dev => s_oacc_hll_set_dev
+ procedure, pass(a) :: sync_space => s_oacc_hll_sync_space
+ procedure, pass(a) :: sync => s_oacc_hll_sync
+ procedure, pass(a) :: free => s_oacc_hll_free
+ procedure, pass(a) :: vect_mv => psb_s_oacc_hll_vect_mv
+ procedure, pass(a) :: in_vect_sv => psb_s_oacc_hll_inner_vect_sv
+ procedure, pass(a) :: csmm => psb_s_oacc_hll_csmm
+ procedure, pass(a) :: csmv => psb_s_oacc_hll_csmv
+ procedure, pass(a) :: scals => psb_s_oacc_hll_scals
+ procedure, pass(a) :: scalv => psb_s_oacc_hll_scal
+ procedure, pass(a) :: reallocate_nz => psb_s_oacc_hll_reallocate_nz
+ procedure, pass(a) :: allocate_mnnz => psb_s_oacc_hll_allocate_mnnz
+ procedure, pass(a) :: cp_from_coo => psb_s_oacc_hll_cp_from_coo
+ procedure, pass(a) :: cp_from_fmt => psb_s_oacc_hll_cp_from_fmt
+ procedure, pass(a) :: mv_from_coo => psb_s_oacc_hll_mv_from_coo
+ procedure, pass(a) :: mv_from_fmt => psb_s_oacc_hll_mv_from_fmt
+ procedure, pass(a) :: mold => psb_s_oacc_hll_mold
+
+ end type psb_s_oacc_hll_sparse_mat
+
+ interface
+ module subroutine psb_s_oacc_hll_mold(a,b,info)
+ class(psb_s_oacc_hll_sparse_mat), intent(in) :: a
+ class(psb_s_base_sparse_mat), intent(inout), allocatable :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_s_oacc_hll_mold
+ end interface
+
+ interface
+ module subroutine psb_s_oacc_hll_cp_from_fmt(a,b,info)
+ class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a
+ class(psb_s_base_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_s_oacc_hll_cp_from_fmt
+ end interface
+
+ interface
+ module subroutine psb_s_oacc_hll_mv_from_coo(a,b,info)
+ class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a
+ class(psb_s_coo_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_s_oacc_hll_mv_from_coo
+ end interface
+
+ interface
+ module subroutine psb_s_oacc_hll_mv_from_fmt(a,b,info)
+ class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a
+ class(psb_s_base_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_s_oacc_hll_mv_from_fmt
+ end interface
+
+ interface
+ module subroutine psb_s_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans)
+ class(psb_s_oacc_hll_sparse_mat), intent(in) :: a
+ real(psb_spk_), intent(in) :: alpha, beta
+ class(psb_s_base_vect_type), intent(inout) :: x, y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_s_oacc_hll_vect_mv
+ end interface
+
+ interface
+ module subroutine psb_s_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans)
+ class(psb_s_oacc_hll_sparse_mat), intent(in) :: a
+ real(psb_spk_), intent(in) :: alpha, beta
+ class(psb_s_base_vect_type), intent(inout) :: x,y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_s_oacc_hll_inner_vect_sv
+ end interface
+
+ interface
+ module subroutine psb_s_oacc_hll_csmm(alpha, a, x, beta, y, info, trans)
+ class(psb_s_oacc_hll_sparse_mat), intent(in) :: a
+ real(psb_spk_), intent(in) :: alpha, beta, x(:,:)
+ real(psb_spk_), intent(inout) :: y(:,:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_s_oacc_hll_csmm
+ end interface
+
+ interface
+ module subroutine psb_s_oacc_hll_csmv(alpha, a, x, beta, y, info, trans)
+ class(psb_s_oacc_hll_sparse_mat), intent(in) :: a
+ real(psb_spk_), intent(in) :: alpha, beta, x(:)
+ real(psb_spk_), intent(inout) :: y(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_s_oacc_hll_csmv
+ end interface
+
+ interface
+ module subroutine psb_s_oacc_hll_scals(d, a, info)
+ class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a
+ real(psb_spk_), intent(in) :: d
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_s_oacc_hll_scals
+ end interface
+
+ interface
+ module subroutine psb_s_oacc_hll_scal(d,a,info,side)
+ class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a
+ real(psb_spk_), intent(in) :: d(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: side
+ end subroutine psb_s_oacc_hll_scal
+ end interface
+
+ interface
+ module subroutine psb_s_oacc_hll_reallocate_nz(nz,a)
+ class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_), intent(in) :: nz
+ end subroutine psb_s_oacc_hll_reallocate_nz
+ end interface
+
+ interface
+ module subroutine psb_s_oacc_hll_allocate_mnnz(m,n,a,nz)
+ class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_), intent(in) :: m,n
+ integer(psb_ipk_), intent(in), optional :: nz
+ end subroutine psb_s_oacc_hll_allocate_mnnz
+ end interface
+
+ interface
+ module subroutine psb_s_oacc_hll_cp_from_coo(a,b,info)
+ class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a
+ class(psb_s_coo_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_s_oacc_hll_cp_from_coo
+ end interface
+
+contains
+
+ subroutine s_oacc_hll_free(a)
+ use psb_base_mod
+ implicit none
+ class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_) :: info
+
+ if (allocated(a%val)) then
+ !$acc exit data delete(a%val)
+ end if
+ if (allocated(a%ja)) then
+ !$acc exit data delete(a%ja)
+ end if
+ if (allocated(a%irn)) then
+ !$acc exit data delete(a%irn)
+ end if
+ if (allocated(a%idiag)) then
+ !$acc exit data delete(a%idiag)
+ end if
+ if (allocated(a%hkoffs)) then
+ !$acc exit data delete(a%hkoffs)
+ end if
+
+ call a%psb_s_hll_sparse_mat%free()
+
+ return
+ end subroutine s_oacc_hll_free
+
+ function s_oacc_hll_sizeof(a) result(res)
+ implicit none
+ class(psb_s_oacc_hll_sparse_mat), intent(in) :: a
+ integer(psb_epk_) :: res
+
+ if (a%is_dev()) call a%sync()
+
+ res = 8
+ res = res + psb_sizeof_dp * size(a%val)
+ res = res + psb_sizeof_ip * size(a%ja)
+ res = res + psb_sizeof_ip * size(a%irn)
+ res = res + psb_sizeof_ip * size(a%idiag)
+ res = res + psb_sizeof_ip * size(a%hkoffs)
+ end function s_oacc_hll_sizeof
+
+
+
+ function s_oacc_hll_is_host(a) result(res)
+ implicit none
+ class(psb_s_oacc_hll_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_host)
+ end function s_oacc_hll_is_host
+
+ function s_oacc_hll_is_sync(a) result(res)
+ implicit none
+ class(psb_s_oacc_hll_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_sync)
+ end function s_oacc_hll_is_sync
+
+ function s_oacc_hll_is_dev(a) result(res)
+ implicit none
+ class(psb_s_oacc_hll_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_dev)
+ end function s_oacc_hll_is_dev
+
+ subroutine s_oacc_hll_set_host(a)
+ implicit none
+ class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_host
+ end subroutine s_oacc_hll_set_host
+
+ subroutine s_oacc_hll_set_sync(a)
+ implicit none
+ class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_sync
+ end subroutine s_oacc_hll_set_sync
+
+ subroutine s_oacc_hll_set_dev(a)
+ implicit none
+ class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_dev
+ end subroutine s_oacc_hll_set_dev
+
+ function s_oacc_hll_get_fmt() result(res)
+ implicit none
+ character(len=5) :: res
+ res = 'HLL_oacc'
+ end function s_oacc_hll_get_fmt
+
+ subroutine s_oacc_hll_sync_space(a)
+ implicit none
+ class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a
+
+ if (allocated(a%val)) then
+ call s_oacc_create_dev(a%val)
+ end if
+ if (allocated(a%ja)) then
+ call i_oacc_create_dev(a%ja)
+ end if
+ if (allocated(a%irn)) then
+ call i_oacc_create_dev_scalar(a%irn)
+ end if
+ if (allocated(a%idiag)) then
+ call i_oacc_create_dev_scalar(a%idiag)
+ end if
+ if (allocated(a%hkoffs)) then
+ call i_oacc_create_dev_scalar(a%hkoffs)
+ end if
+
+ contains
+ subroutine s_oacc_create_dev(v)
+ implicit none
+ real(psb_spk_), intent(in) :: v(:)
+ !$acc enter data copyin(v)
+ end subroutine s_oacc_create_dev
+
+ subroutine i_oacc_create_dev(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc enter data copyin(v)
+ end subroutine i_oacc_create_dev
+
+ subroutine i_oacc_create_dev_scalar(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc enter data copyin(v)
+ end subroutine i_oacc_create_dev_scalar
+
+ end subroutine s_oacc_hll_sync_space
+
+
+ subroutine s_oacc_hll_sync(a)
+ implicit none
+ class(psb_s_oacc_hll_sparse_mat), target, intent(in) :: a
+ class(psb_s_oacc_hll_sparse_mat), pointer :: tmpa
+ integer(psb_ipk_) :: info
+
+ tmpa => a
+ if (a%is_dev()) then
+ call s_oacc_hll_to_host(a%val)
+ call i_oacc_hll_to_host(a%ja)
+ call i_oacc_hll_to_host_scalar(a%irn)
+ call i_oacc_hll_to_host_scalar(a%idiag)
+ call i_oacc_hll_to_host_scalar(a%hkoffs)
+ else if (a%is_host()) then
+ call s_oacc_hll_to_dev(a%val)
+ call i_oacc_hll_to_dev(a%ja)
+ call i_oacc_hll_to_dev_scalar(a%irn)
+ call i_oacc_hll_to_dev_scalar(a%idiag)
+ call i_oacc_hll_to_dev_scalar(a%hkoffs)
+ end if
+ call tmpa%set_sync()
+ end subroutine s_oacc_hll_sync
+
+ subroutine s_oacc_hll_to_host(v)
+ implicit none
+ real(psb_spk_), intent(in) :: v(:)
+ !$acc update self(v)
+ end subroutine s_oacc_hll_to_host
+
+ subroutine s_oacc_hll_to_dev(v)
+ implicit none
+ real(psb_spk_), intent(in) :: v(:)
+ !$acc update device(v)
+ end subroutine s_oacc_hll_to_dev
+
+ subroutine i_oacc_hll_to_host(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc update self(v)
+ end subroutine i_oacc_hll_to_host
+
+ subroutine i_oacc_hll_to_dev(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc update device(v)
+ end subroutine i_oacc_hll_to_dev
+
+ subroutine i_oacc_hll_to_host_scalar(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc update self(v)
+ end subroutine i_oacc_hll_to_host_scalar
+
+ subroutine i_oacc_hll_to_dev_scalar(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc update device(v)
+ end subroutine i_oacc_hll_to_dev_scalar
+
+
+end module psb_s_oacc_hll_mat_mod
diff --git a/openacc/psb_s_oacc_vect_mod.F90 b/openacc/psb_s_oacc_vect_mod.F90
index 36ae7da8..5c34827d 100644
--- a/openacc/psb_s_oacc_vect_mod.F90
+++ b/openacc/psb_s_oacc_vect_mod.F90
@@ -42,6 +42,7 @@ module psb_s_oacc_vect_mod
procedure, pass(y) :: sctb_buf => s_oacc_sctb_buf
procedure, pass(x) :: get_size => s_oacc_get_size
+
procedure, pass(x) :: dot_v => s_oacc_vect_dot
procedure, pass(x) :: dot_a => s_oacc_dot_a
procedure, pass(y) :: axpby_v => s_oacc_axpby_v
@@ -70,7 +71,6 @@ module psb_s_oacc_vect_mod
end subroutine s_oacc_mlt_v
end interface
-
interface
subroutine s_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
import
@@ -83,7 +83,7 @@ module psb_s_oacc_vect_mod
character(len=1), intent(in), optional :: conjgx, conjgy
end subroutine s_oacc_mlt_v_2
end interface
-
+
contains
subroutine s_oacc_absval1(x)
@@ -432,7 +432,7 @@ contains
select type(ii => idx)
class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space(info)
+ if (ii%is_host()) call ii%sync_space()
if (y%is_host()) call y%sync_space()
!$acc parallel loop
@@ -459,7 +459,7 @@ contains
select type(ii => idx)
class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space(info)
+ if (ii%is_host()) call ii%sync_space()
class default
call psb_errpush(info, 's_oacc_sctb_x')
return
@@ -475,8 +475,6 @@ contains
call y%set_dev()
end subroutine s_oacc_sctb_x
-
-
subroutine s_oacc_sctb(n, idx, x, beta, y)
use psb_base_mod
implicit none
@@ -498,7 +496,6 @@ contains
call y%set_host()
end subroutine s_oacc_sctb
-
subroutine s_oacc_gthzbuf(i, n, idx, x)
use psb_base_mod
implicit none
@@ -515,7 +512,7 @@ contains
select type(ii => idx)
class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space(info)
+ if (ii%is_host()) call ii%sync_space()
class default
call psb_errpush(info, 's_oacc_gthzbuf')
return
@@ -542,7 +539,7 @@ contains
select type(ii => idx)
class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space(info)
+ if (ii%is_host()) call ii%sync_space()
class default
call psb_errpush(info, 's_oacc_gthzv_x')
return
@@ -577,7 +574,7 @@ contains
select type(vval => val)
type is (psb_s_vect_oacc)
if (vval%is_host()) call vval%sync_space()
- if (virl%is_host()) call virl%sync_space(info)
+ if (virl%is_host()) call virl%sync_space()
if (x%is_host()) call x%sync_space()
!$acc parallel loop
do i = 1, n
@@ -591,7 +588,7 @@ contains
if (.not.done_oacc) then
select type(virl => irl)
type is (psb_i_vect_oacc)
- if (virl%is_dev()) call virl%sync_space(info)
+ if (virl%is_dev()) call virl%sync_space()
end select
select type(vval => val)
type is (psb_s_vect_oacc)
@@ -607,8 +604,6 @@ contains
end subroutine s_oacc_ins_v
-
-
subroutine s_oacc_ins_a(n, irl, val, dupl, x, info)
use psi_serial_mod
implicit none
@@ -628,8 +623,6 @@ contains
end subroutine s_oacc_ins_a
-
-
subroutine s_oacc_bld_mn(x, n)
use psb_base_mod
implicit none
@@ -668,7 +661,6 @@ contains
end subroutine s_oacc_bld_x
-
subroutine s_oacc_asb_m(n, x, info)
use psb_base_mod
implicit none
@@ -696,8 +688,6 @@ contains
end if
end subroutine s_oacc_asb_m
-
-
subroutine s_oacc_set_scal(x, val, first, last)
class(psb_s_vect_oacc), intent(inout) :: x
real(psb_spk_), intent(in) :: val
@@ -718,8 +708,6 @@ contains
call x%set_dev()
end subroutine s_oacc_set_scal
-
-
subroutine s_oacc_zero(x)
use psi_serial_mod
implicit none
@@ -743,6 +731,7 @@ contains
end function s_oacc_get_fmt
+
function s_oacc_vect_dot(n, x, y) result(res)
implicit none
class(psb_s_vect_oacc), intent(inout) :: x
@@ -776,9 +765,6 @@ contains
end function s_oacc_vect_dot
-
-
-
function s_oacc_dot_a(n, x, y) result(res)
implicit none
class(psb_s_vect_oacc), intent(inout) :: x
@@ -910,7 +896,6 @@ contains
end if
end subroutine s_oacc_vect_all
-
subroutine s_oacc_vect_free(x, info)
implicit none
class(psb_s_vect_oacc), intent(inout) :: x
diff --git a/openacc/psb_z_oacc_ell_mat_mod.F90 b/openacc/psb_z_oacc_ell_mat_mod.F90
new file mode 100644
index 00000000..8bf8c9fa
--- /dev/null
+++ b/openacc/psb_z_oacc_ell_mat_mod.F90
@@ -0,0 +1,341 @@
+module psb_z_oacc_ell_mat_mod
+ use iso_c_binding
+ use psb_z_mat_mod
+ use psb_z_ell_mat_mod
+ use psb_z_oacc_vect_mod
+
+ integer(psb_ipk_), parameter, private :: is_host = -1
+ integer(psb_ipk_), parameter, private :: is_sync = 0
+ integer(psb_ipk_), parameter, private :: is_dev = 1
+
+ type, extends(psb_z_ell_sparse_mat) :: psb_z_oacc_ell_sparse_mat
+ integer(psb_ipk_) :: devstate = is_host
+ contains
+ procedure, nopass :: get_fmt => z_oacc_ell_get_fmt
+ procedure, pass(a) :: sizeof => z_oacc_ell_sizeof
+ procedure, pass(a) :: is_host => z_oacc_ell_is_host
+ procedure, pass(a) :: is_sync => z_oacc_ell_is_sync
+ procedure, pass(a) :: is_dev => z_oacc_ell_is_dev
+ procedure, pass(a) :: set_host => z_oacc_ell_set_host
+ procedure, pass(a) :: set_sync => z_oacc_ell_set_sync
+ procedure, pass(a) :: set_dev => z_oacc_ell_set_dev
+ procedure, pass(a) :: sync_space => z_oacc_ell_sync_space
+ procedure, pass(a) :: sync => z_oacc_ell_sync
+ procedure, pass(a) :: free => z_oacc_ell_free
+ procedure, pass(a) :: vect_mv => psb_z_oacc_ell_vect_mv
+ procedure, pass(a) :: in_vect_sv => psb_z_oacc_ell_inner_vect_sv
+ procedure, pass(a) :: csmm => psb_z_oacc_ell_csmm
+ procedure, pass(a) :: csmv => psb_z_oacc_ell_csmv
+ procedure, pass(a) :: scals => psb_z_oacc_ell_scals
+ procedure, pass(a) :: scalv => psb_z_oacc_ell_scal
+ procedure, pass(a) :: reallocate_nz => psb_z_oacc_ell_reallocate_nz
+ procedure, pass(a) :: allocate_mnnz => psb_z_oacc_ell_allocate_mnnz
+ procedure, pass(a) :: cp_from_coo => psb_z_oacc_ell_cp_from_coo
+ procedure, pass(a) :: cp_from_fmt => psb_z_oacc_ell_cp_from_fmt
+ procedure, pass(a) :: mv_from_coo => psb_z_oacc_ell_mv_from_coo
+ procedure, pass(a) :: mv_from_fmt => psb_z_oacc_ell_mv_from_fmt
+ procedure, pass(a) :: mold => psb_z_oacc_ell_mold
+
+ end type psb_z_oacc_ell_sparse_mat
+
+ interface
+ module subroutine psb_z_oacc_ell_mold(a,b,info)
+ class(psb_z_oacc_ell_sparse_mat), intent(in) :: a
+ class(psb_z_base_sparse_mat), intent(inout), allocatable :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_z_oacc_ell_mold
+ end interface
+
+ interface
+ module subroutine psb_z_oacc_ell_cp_from_fmt(a,b,info)
+ class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a
+ class(psb_z_base_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_z_oacc_ell_cp_from_fmt
+ end interface
+
+ interface
+ module subroutine psb_z_oacc_ell_mv_from_coo(a,b,info)
+ class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a
+ class(psb_z_coo_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_z_oacc_ell_mv_from_coo
+ end interface
+
+ interface
+ module subroutine psb_z_oacc_ell_mv_from_fmt(a,b,info)
+ class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a
+ class(psb_z_base_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_z_oacc_ell_mv_from_fmt
+ end interface
+
+ interface
+ module subroutine psb_z_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans)
+ class(psb_z_oacc_ell_sparse_mat), intent(in) :: a
+ complex(psb_dpk_), intent(in) :: alpha, beta
+ class(psb_z_base_vect_type), intent(inout) :: x, y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_z_oacc_ell_vect_mv
+ end interface
+
+ interface
+ module subroutine psb_z_oacc_ell_inner_vect_sv(alpha, a, x, beta, y, info, trans)
+ class(psb_z_oacc_ell_sparse_mat), intent(in) :: a
+ complex(psb_dpk_), intent(in) :: alpha, beta
+ class(psb_z_base_vect_type), intent(inout) :: x,y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_z_oacc_ell_inner_vect_sv
+ end interface
+
+ interface
+ module subroutine psb_z_oacc_ell_csmm(alpha, a, x, beta, y, info, trans)
+ class(psb_z_oacc_ell_sparse_mat), intent(in) :: a
+ complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:)
+ complex(psb_dpk_), intent(inout) :: y(:,:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_z_oacc_ell_csmm
+ end interface
+
+ interface
+ module subroutine psb_z_oacc_ell_csmv(alpha, a, x, beta, y, info, trans)
+ class(psb_z_oacc_ell_sparse_mat), intent(in) :: a
+ complex(psb_dpk_), intent(in) :: alpha, beta, x(:)
+ complex(psb_dpk_), intent(inout) :: y(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_z_oacc_ell_csmv
+ end interface
+
+ interface
+ module subroutine psb_z_oacc_ell_scals(d, a, info)
+ class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a
+ complex(psb_dpk_), intent(in) :: d
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_z_oacc_ell_scals
+ end interface
+
+ interface
+ module subroutine psb_z_oacc_ell_scal(d,a,info,side)
+ class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a
+ complex(psb_dpk_), intent(in) :: d(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: side
+ end subroutine psb_z_oacc_ell_scal
+ end interface
+
+ interface
+ module subroutine psb_z_oacc_ell_reallocate_nz(nz,a)
+ class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_), intent(in) :: nz
+ end subroutine psb_z_oacc_ell_reallocate_nz
+ end interface
+
+ interface
+ module subroutine psb_z_oacc_ell_allocate_mnnz(m,n,a,nz)
+ class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_), intent(in) :: m,n
+ integer(psb_ipk_), intent(in), optional :: nz
+ end subroutine psb_z_oacc_ell_allocate_mnnz
+ end interface
+
+ interface
+ module subroutine psb_z_oacc_ell_cp_from_coo(a,b,info)
+ class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a
+ class(psb_z_coo_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_z_oacc_ell_cp_from_coo
+ end interface
+
+contains
+
+ subroutine z_oacc_ell_free(a)
+ use psb_base_mod
+ implicit none
+ class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_) :: info
+
+ if (allocated(a%val)) then
+ !$acc exit data delete(a%val)
+ end if
+ if (allocated(a%ja)) then
+ !$acc exit data delete(a%ja)
+ end if
+ if (allocated(a%irn)) then
+ !$acc exit data delete(a%irn)
+ end if
+ if (allocated(a%idiag)) then
+ !$acc exit data delete(a%idiag)
+ end if
+
+ call a%psb_z_ell_sparse_mat%free()
+
+ return
+ end subroutine z_oacc_ell_free
+
+
+ function z_oacc_ell_sizeof(a) result(res)
+ implicit none
+ class(psb_z_oacc_ell_sparse_mat), intent(in) :: a
+ integer(psb_epk_) :: res
+
+ if (a%is_dev()) call a%sync()
+
+ res = 8
+ res = res + psb_sizeof_dp * size(a%val)
+ res = res + psb_sizeof_ip * size(a%ja)
+ res = res + psb_sizeof_ip * size(a%irn)
+ res = res + psb_sizeof_ip * size(a%idiag)
+
+ end function z_oacc_ell_sizeof
+
+ subroutine z_oacc_ell_sync_space(a)
+ implicit none
+ class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a
+
+ if (allocated(a%val)) then
+ call z_oacc_create_dev(a%val)
+ end if
+ if (allocated(a%ja)) then
+ call i_oacc_create_dev(a%ja)
+ end if
+ if (allocated(a%irn)) then
+ call i_oacc_create_dev_scalar(a%irn)
+ end if
+ if (allocated(a%idiag)) then
+ call i_oacc_create_dev_scalar(a%idiag)
+ end if
+
+ contains
+ subroutine z_oacc_create_dev(v)
+ implicit none
+ complex(psb_dpk_), intent(in) :: v(:,:)
+ !$acc enter data copyin(v)
+ end subroutine z_oacc_create_dev
+
+ subroutine i_oacc_create_dev(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:,:)
+ !$acc enter data copyin(v)
+ end subroutine i_oacc_create_dev
+
+ subroutine i_oacc_create_dev_scalar(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc enter data copyin(v)
+ end subroutine i_oacc_create_dev_scalar
+ end subroutine z_oacc_ell_sync_space
+
+
+
+ function z_oacc_ell_is_host(a) result(res)
+ implicit none
+ class(psb_z_oacc_ell_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_host)
+ end function z_oacc_ell_is_host
+
+ function z_oacc_ell_is_sync(a) result(res)
+ implicit none
+ class(psb_z_oacc_ell_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_sync)
+ end function z_oacc_ell_is_sync
+
+ function z_oacc_ell_is_dev(a) result(res)
+ implicit none
+ class(psb_z_oacc_ell_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_dev)
+ end function z_oacc_ell_is_dev
+
+ subroutine z_oacc_ell_set_host(a)
+ implicit none
+ class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_host
+ end subroutine z_oacc_ell_set_host
+
+ subroutine z_oacc_ell_set_sync(a)
+ implicit none
+ class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_sync
+ end subroutine z_oacc_ell_set_sync
+
+ subroutine z_oacc_ell_set_dev(a)
+ implicit none
+ class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_dev
+ end subroutine z_oacc_ell_set_dev
+
+ function z_oacc_ell_get_fmt() result(res)
+ implicit none
+ character(len=5) :: res
+ res = 'ELL_oacc'
+ end function z_oacc_ell_get_fmt
+
+ subroutine z_oacc_ell_sync(a)
+ implicit none
+ class(psb_z_oacc_ell_sparse_mat), target, intent(in) :: a
+ class(psb_z_oacc_ell_sparse_mat), pointer :: tmpa
+ integer(psb_ipk_) :: info
+
+ tmpa => a
+ if (a%is_dev()) then
+ call z_oacc_ell_to_host(a%val)
+ call i_oacc_ell_to_host(a%ja)
+ call i_oacc_ell_to_host_scalar(a%irn)
+ call i_oacc_ell_to_host_scalar(a%idiag)
+ else if (a%is_host()) then
+ call z_oacc_ell_to_dev(a%val)
+ call i_oacc_ell_to_dev(a%ja)
+ call i_oacc_ell_to_dev_scalar(a%irn)
+ call i_oacc_ell_to_dev_scalar(a%idiag)
+ end if
+ call tmpa%set_sync()
+ end subroutine z_oacc_ell_sync
+
+ subroutine z_oacc_ell_to_host(v)
+ implicit none
+ complex(psb_dpk_), intent(in) :: v(:,:)
+ !$acc update self(v)
+ end subroutine z_oacc_ell_to_host
+
+ subroutine z_oacc_ell_to_host_scalar(v)
+ implicit none
+ complex(psb_dpk_), intent(in) :: v(:)
+ !$acc update self(v)
+ end subroutine z_oacc_ell_to_host_scalar
+
+ subroutine i_oacc_ell_to_dev(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:,:)
+ !$acc update device(v)
+ end subroutine i_oacc_ell_to_dev
+
+ subroutine i_oacc_ell_to_dev_scalar(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc update device(v)
+ end subroutine i_oacc_ell_to_dev_scalar
+
+ subroutine i_oacc_ell_to_host(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:,:)
+ !$acc update self(v)
+ end subroutine i_oacc_ell_to_host
+
+ subroutine i_oacc_ell_to_host_scalar(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc update self(v)
+ end subroutine i_oacc_ell_to_host_scalar
+end module psb_z_oacc_ell_mat_mod
diff --git a/openacc/psb_z_oacc_hll_mat_mod.F90 b/openacc/psb_z_oacc_hll_mat_mod.F90
new file mode 100644
index 00000000..e6a4929a
--- /dev/null
+++ b/openacc/psb_z_oacc_hll_mat_mod.F90
@@ -0,0 +1,352 @@
+module psb_z_oacc_hll_mat_mod
+ use iso_c_binding
+ use psb_z_mat_mod
+ use psb_z_hll_mat_mod
+ use psb_z_oacc_vect_mod
+
+ integer(psb_ipk_), parameter, private :: is_host = -1
+ integer(psb_ipk_), parameter, private :: is_sync = 0
+ integer(psb_ipk_), parameter, private :: is_dev = 1
+
+ type, extends(psb_z_hll_sparse_mat) :: psb_z_oacc_hll_sparse_mat
+ integer(psb_ipk_) :: devstate = is_host
+ contains
+ procedure, nopass :: get_fmt => z_oacc_hll_get_fmt
+ procedure, pass(a) :: sizeof => z_oacc_hll_sizeof
+ procedure, pass(a) :: is_host => z_oacc_hll_is_host
+ procedure, pass(a) :: is_sync => z_oacc_hll_is_sync
+ procedure, pass(a) :: is_dev => z_oacc_hll_is_dev
+ procedure, pass(a) :: set_host => z_oacc_hll_set_host
+ procedure, pass(a) :: set_sync => z_oacc_hll_set_sync
+ procedure, pass(a) :: set_dev => z_oacc_hll_set_dev
+ procedure, pass(a) :: sync_space => z_oacc_hll_sync_space
+ procedure, pass(a) :: sync => z_oacc_hll_sync
+ procedure, pass(a) :: free => z_oacc_hll_free
+ procedure, pass(a) :: vect_mv => psb_z_oacc_hll_vect_mv
+ procedure, pass(a) :: in_vect_sv => psb_z_oacc_hll_inner_vect_sv
+ procedure, pass(a) :: csmm => psb_z_oacc_hll_csmm
+ procedure, pass(a) :: csmv => psb_z_oacc_hll_csmv
+ procedure, pass(a) :: scals => psb_z_oacc_hll_scals
+ procedure, pass(a) :: scalv => psb_z_oacc_hll_scal
+ procedure, pass(a) :: reallocate_nz => psb_z_oacc_hll_reallocate_nz
+ procedure, pass(a) :: allocate_mnnz => psb_z_oacc_hll_allocate_mnnz
+ procedure, pass(a) :: cp_from_coo => psb_z_oacc_hll_cp_from_coo
+ procedure, pass(a) :: cp_from_fmt => psb_z_oacc_hll_cp_from_fmt
+ procedure, pass(a) :: mv_from_coo => psb_z_oacc_hll_mv_from_coo
+ procedure, pass(a) :: mv_from_fmt => psb_z_oacc_hll_mv_from_fmt
+ procedure, pass(a) :: mold => psb_z_oacc_hll_mold
+
+ end type psb_z_oacc_hll_sparse_mat
+
+ interface
+ module subroutine psb_z_oacc_hll_mold(a,b,info)
+ class(psb_z_oacc_hll_sparse_mat), intent(in) :: a
+ class(psb_z_base_sparse_mat), intent(inout), allocatable :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_z_oacc_hll_mold
+ end interface
+
+ interface
+ module subroutine psb_z_oacc_hll_cp_from_fmt(a,b,info)
+ class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a
+ class(psb_z_base_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_z_oacc_hll_cp_from_fmt
+ end interface
+
+ interface
+ module subroutine psb_z_oacc_hll_mv_from_coo(a,b,info)
+ class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a
+ class(psb_z_coo_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_z_oacc_hll_mv_from_coo
+ end interface
+
+ interface
+ module subroutine psb_z_oacc_hll_mv_from_fmt(a,b,info)
+ class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a
+ class(psb_z_base_sparse_mat), intent(inout) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_z_oacc_hll_mv_from_fmt
+ end interface
+
+ interface
+ module subroutine psb_z_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans)
+ class(psb_z_oacc_hll_sparse_mat), intent(in) :: a
+ complex(psb_dpk_), intent(in) :: alpha, beta
+ class(psb_z_base_vect_type), intent(inout) :: x, y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_z_oacc_hll_vect_mv
+ end interface
+
+ interface
+ module subroutine psb_z_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans)
+ class(psb_z_oacc_hll_sparse_mat), intent(in) :: a
+ complex(psb_dpk_), intent(in) :: alpha, beta
+ class(psb_z_base_vect_type), intent(inout) :: x,y
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_z_oacc_hll_inner_vect_sv
+ end interface
+
+ interface
+ module subroutine psb_z_oacc_hll_csmm(alpha, a, x, beta, y, info, trans)
+ class(psb_z_oacc_hll_sparse_mat), intent(in) :: a
+ complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:)
+ complex(psb_dpk_), intent(inout) :: y(:,:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_z_oacc_hll_csmm
+ end interface
+
+ interface
+ module subroutine psb_z_oacc_hll_csmv(alpha, a, x, beta, y, info, trans)
+ class(psb_z_oacc_hll_sparse_mat), intent(in) :: a
+ complex(psb_dpk_), intent(in) :: alpha, beta, x(:)
+ complex(psb_dpk_), intent(inout) :: y(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: trans
+ end subroutine psb_z_oacc_hll_csmv
+ end interface
+
+ interface
+ module subroutine psb_z_oacc_hll_scals(d, a, info)
+ class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a
+ complex(psb_dpk_), intent(in) :: d
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_z_oacc_hll_scals
+ end interface
+
+ interface
+ module subroutine psb_z_oacc_hll_scal(d,a,info,side)
+ class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a
+ complex(psb_dpk_), intent(in) :: d(:)
+ integer(psb_ipk_), intent(out) :: info
+ character, optional, intent(in) :: side
+ end subroutine psb_z_oacc_hll_scal
+ end interface
+
+ interface
+ module subroutine psb_z_oacc_hll_reallocate_nz(nz,a)
+ class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_), intent(in) :: nz
+ end subroutine psb_z_oacc_hll_reallocate_nz
+ end interface
+
+ interface
+ module subroutine psb_z_oacc_hll_allocate_mnnz(m,n,a,nz)
+ class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_), intent(in) :: m,n
+ integer(psb_ipk_), intent(in), optional :: nz
+ end subroutine psb_z_oacc_hll_allocate_mnnz
+ end interface
+
+ interface
+ module subroutine psb_z_oacc_hll_cp_from_coo(a,b,info)
+ class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a
+ class(psb_z_coo_sparse_mat), intent(in) :: b
+ integer(psb_ipk_), intent(out) :: info
+ end subroutine psb_z_oacc_hll_cp_from_coo
+ end interface
+
+contains
+
+ subroutine z_oacc_hll_free(a)
+ use psb_base_mod
+ implicit none
+ class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_) :: info
+
+ if (allocated(a%val)) then
+ !$acc exit data delete(a%val)
+ end if
+ if (allocated(a%ja)) then
+ !$acc exit data delete(a%ja)
+ end if
+ if (allocated(a%irn)) then
+ !$acc exit data delete(a%irn)
+ end if
+ if (allocated(a%idiag)) then
+ !$acc exit data delete(a%idiag)
+ end if
+ if (allocated(a%hkoffs)) then
+ !$acc exit data delete(a%hkoffs)
+ end if
+
+ call a%psb_z_hll_sparse_mat%free()
+
+ return
+ end subroutine z_oacc_hll_free
+
+ function z_oacc_hll_sizeof(a) result(res)
+ implicit none
+ class(psb_z_oacc_hll_sparse_mat), intent(in) :: a
+ integer(psb_epk_) :: res
+
+ if (a%is_dev()) call a%sync()
+
+ res = 8
+ res = res + psb_sizeof_dp * size(a%val)
+ res = res + psb_sizeof_ip * size(a%ja)
+ res = res + psb_sizeof_ip * size(a%irn)
+ res = res + psb_sizeof_ip * size(a%idiag)
+ res = res + psb_sizeof_ip * size(a%hkoffs)
+ end function z_oacc_hll_sizeof
+
+
+
+ function z_oacc_hll_is_host(a) result(res)
+ implicit none
+ class(psb_z_oacc_hll_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_host)
+ end function z_oacc_hll_is_host
+
+ function z_oacc_hll_is_sync(a) result(res)
+ implicit none
+ class(psb_z_oacc_hll_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_sync)
+ end function z_oacc_hll_is_sync
+
+ function z_oacc_hll_is_dev(a) result(res)
+ implicit none
+ class(psb_z_oacc_hll_sparse_mat), intent(in) :: a
+ logical :: res
+
+ res = (a%devstate == is_dev)
+ end function z_oacc_hll_is_dev
+
+ subroutine z_oacc_hll_set_host(a)
+ implicit none
+ class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_host
+ end subroutine z_oacc_hll_set_host
+
+ subroutine z_oacc_hll_set_sync(a)
+ implicit none
+ class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_sync
+ end subroutine z_oacc_hll_set_sync
+
+ subroutine z_oacc_hll_set_dev(a)
+ implicit none
+ class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a
+
+ a%devstate = is_dev
+ end subroutine z_oacc_hll_set_dev
+
+ function z_oacc_hll_get_fmt() result(res)
+ implicit none
+ character(len=5) :: res
+ res = 'HLL_oacc'
+ end function z_oacc_hll_get_fmt
+
+ subroutine z_oacc_hll_sync_space(a)
+ implicit none
+ class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a
+
+ if (allocated(a%val)) then
+ call z_oacc_create_dev(a%val)
+ end if
+ if (allocated(a%ja)) then
+ call i_oacc_create_dev(a%ja)
+ end if
+ if (allocated(a%irn)) then
+ call i_oacc_create_dev_scalar(a%irn)
+ end if
+ if (allocated(a%idiag)) then
+ call i_oacc_create_dev_scalar(a%idiag)
+ end if
+ if (allocated(a%hkoffs)) then
+ call i_oacc_create_dev_scalar(a%hkoffs)
+ end if
+
+ contains
+ subroutine z_oacc_create_dev(v)
+ implicit none
+ complex(psb_dpk_), intent(in) :: v(:)
+ !$acc enter data copyin(v)
+ end subroutine z_oacc_create_dev
+
+ subroutine i_oacc_create_dev(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc enter data copyin(v)
+ end subroutine i_oacc_create_dev
+
+ subroutine i_oacc_create_dev_scalar(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc enter data copyin(v)
+ end subroutine i_oacc_create_dev_scalar
+
+ end subroutine z_oacc_hll_sync_space
+
+
+ subroutine z_oacc_hll_sync(a)
+ implicit none
+ class(psb_z_oacc_hll_sparse_mat), target, intent(in) :: a
+ class(psb_z_oacc_hll_sparse_mat), pointer :: tmpa
+ integer(psb_ipk_) :: info
+
+ tmpa => a
+ if (a%is_dev()) then
+ call z_oacc_hll_to_host(a%val)
+ call i_oacc_hll_to_host(a%ja)
+ call i_oacc_hll_to_host_scalar(a%irn)
+ call i_oacc_hll_to_host_scalar(a%idiag)
+ call i_oacc_hll_to_host_scalar(a%hkoffs)
+ else if (a%is_host()) then
+ call z_oacc_hll_to_dev(a%val)
+ call i_oacc_hll_to_dev(a%ja)
+ call i_oacc_hll_to_dev_scalar(a%irn)
+ call i_oacc_hll_to_dev_scalar(a%idiag)
+ call i_oacc_hll_to_dev_scalar(a%hkoffs)
+ end if
+ call tmpa%set_sync()
+ end subroutine z_oacc_hll_sync
+
+ subroutine z_oacc_hll_to_host(v)
+ implicit none
+ complex(psb_dpk_), intent(in) :: v(:)
+ !$acc update self(v)
+ end subroutine z_oacc_hll_to_host
+
+ subroutine z_oacc_hll_to_dev(v)
+ implicit none
+ complex(psb_dpk_), intent(in) :: v(:)
+ !$acc update device(v)
+ end subroutine z_oacc_hll_to_dev
+
+ subroutine i_oacc_hll_to_host(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc update self(v)
+ end subroutine i_oacc_hll_to_host
+
+ subroutine i_oacc_hll_to_dev(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc update device(v)
+ end subroutine i_oacc_hll_to_dev
+
+ subroutine i_oacc_hll_to_host_scalar(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc update self(v)
+ end subroutine i_oacc_hll_to_host_scalar
+
+ subroutine i_oacc_hll_to_dev_scalar(v)
+ implicit none
+ integer(psb_ipk_), intent(in) :: v(:)
+ !$acc update device(v)
+ end subroutine i_oacc_hll_to_dev_scalar
+
+
+end module psb_z_oacc_hll_mat_mod
diff --git a/openacc/psb_z_oacc_vect_mod.F90 b/openacc/psb_z_oacc_vect_mod.F90
index 5d03b49d..0bac854a 100644
--- a/openacc/psb_z_oacc_vect_mod.F90
+++ b/openacc/psb_z_oacc_vect_mod.F90
@@ -42,6 +42,7 @@ module psb_z_oacc_vect_mod
procedure, pass(y) :: sctb_buf => z_oacc_sctb_buf
procedure, pass(x) :: get_size => z_oacc_get_size
+
procedure, pass(x) :: dot_v => z_oacc_vect_dot
procedure, pass(x) :: dot_a => z_oacc_dot_a
procedure, pass(y) :: axpby_v => z_oacc_axpby_v
@@ -70,7 +71,6 @@ module psb_z_oacc_vect_mod
end subroutine z_oacc_mlt_v
end interface
-
interface
subroutine z_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
import
@@ -83,7 +83,7 @@ module psb_z_oacc_vect_mod
character(len=1), intent(in), optional :: conjgx, conjgy
end subroutine z_oacc_mlt_v_2
end interface
-
+
contains
subroutine z_oacc_absval1(x)
@@ -432,7 +432,7 @@ contains
select type(ii => idx)
class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space(info)
+ if (ii%is_host()) call ii%sync_space()
if (y%is_host()) call y%sync_space()
!$acc parallel loop
@@ -459,7 +459,7 @@ contains
select type(ii => idx)
class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space(info)
+ if (ii%is_host()) call ii%sync_space()
class default
call psb_errpush(info, 'z_oacc_sctb_x')
return
@@ -475,8 +475,6 @@ contains
call y%set_dev()
end subroutine z_oacc_sctb_x
-
-
subroutine z_oacc_sctb(n, idx, x, beta, y)
use psb_base_mod
implicit none
@@ -498,7 +496,6 @@ contains
call y%set_host()
end subroutine z_oacc_sctb
-
subroutine z_oacc_gthzbuf(i, n, idx, x)
use psb_base_mod
implicit none
@@ -515,7 +512,7 @@ contains
select type(ii => idx)
class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space(info)
+ if (ii%is_host()) call ii%sync_space()
class default
call psb_errpush(info, 'z_oacc_gthzbuf')
return
@@ -542,7 +539,7 @@ contains
select type(ii => idx)
class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space(info)
+ if (ii%is_host()) call ii%sync_space()
class default
call psb_errpush(info, 'z_oacc_gthzv_x')
return
@@ -577,7 +574,7 @@ contains
select type(vval => val)
type is (psb_z_vect_oacc)
if (vval%is_host()) call vval%sync_space()
- if (virl%is_host()) call virl%sync_space(info)
+ if (virl%is_host()) call virl%sync_space()
if (x%is_host()) call x%sync_space()
!$acc parallel loop
do i = 1, n
@@ -591,7 +588,7 @@ contains
if (.not.done_oacc) then
select type(virl => irl)
type is (psb_i_vect_oacc)
- if (virl%is_dev()) call virl%sync_space(info)
+ if (virl%is_dev()) call virl%sync_space()
end select
select type(vval => val)
type is (psb_z_vect_oacc)
@@ -607,8 +604,6 @@ contains
end subroutine z_oacc_ins_v
-
-
subroutine z_oacc_ins_a(n, irl, val, dupl, x, info)
use psi_serial_mod
implicit none
@@ -628,8 +623,6 @@ contains
end subroutine z_oacc_ins_a
-
-
subroutine z_oacc_bld_mn(x, n)
use psb_base_mod
implicit none
@@ -668,7 +661,6 @@ contains
end subroutine z_oacc_bld_x
-
subroutine z_oacc_asb_m(n, x, info)
use psb_base_mod
implicit none
@@ -696,8 +688,6 @@ contains
end if
end subroutine z_oacc_asb_m
-
-
subroutine z_oacc_set_scal(x, val, first, last)
class(psb_z_vect_oacc), intent(inout) :: x
complex(psb_dpk_), intent(in) :: val
@@ -718,8 +708,6 @@ contains
call x%set_dev()
end subroutine z_oacc_set_scal
-
-
subroutine z_oacc_zero(x)
use psi_serial_mod
implicit none
@@ -743,6 +731,7 @@ contains
end function z_oacc_get_fmt
+
function z_oacc_vect_dot(n, x, y) result(res)
implicit none
class(psb_z_vect_oacc), intent(inout) :: x
@@ -776,9 +765,6 @@ contains
end function z_oacc_vect_dot
-
-
-
function z_oacc_dot_a(n, x, y) result(res)
implicit none
class(psb_z_vect_oacc), intent(inout) :: x
@@ -910,7 +896,6 @@ contains
end if
end subroutine z_oacc_vect_all
-
subroutine z_oacc_vect_free(x, info)
implicit none
class(psb_z_vect_oacc), intent(inout) :: x
From 2709aa9f16b6b1614ecea1a3516d7d7a91651c80 Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Thu, 8 Aug 2024 11:10:04 +0200
Subject: [PATCH 28/86] Fix upd_xyz name
---
openacc/psb_c_oacc_vect_mod.F90 | 6 +++---
openacc/psb_d_oacc_vect_mod.F90 | 6 +++---
openacc/psb_s_oacc_vect_mod.F90 | 6 +++---
openacc/psb_z_oacc_vect_mod.F90 | 6 +++---
4 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/openacc/psb_c_oacc_vect_mod.F90 b/openacc/psb_c_oacc_vect_mod.F90
index 9225f159..fc501e04 100644
--- a/openacc/psb_c_oacc_vect_mod.F90
+++ b/openacc/psb_c_oacc_vect_mod.F90
@@ -47,7 +47,7 @@ module psb_c_oacc_vect_mod
procedure, pass(x) :: dot_a => c_oacc_dot_a
procedure, pass(y) :: axpby_v => c_oacc_axpby_v
procedure, pass(y) :: axpby_a => c_oacc_axpby_a
- procedure, pass(z) :: abgdxyz => c_oacc_abgdxyz
+ procedure, pass(z) :: upd_xyz => c_oacc_upd_xyz
procedure, pass(y) :: mlt_a => c_oacc_mlt_a
procedure, pass(z) :: mlt_a_2 => c_oacc_mlt_a_2
procedure, pass(y) :: mlt_v => c_oacc_mlt_v
@@ -364,7 +364,7 @@ contains
call y%set_host()
end subroutine c_oacc_axpby_a
- subroutine c_oacc_abgdxyz(m, alpha, beta, gamma, delta, x, y, z, info)
+ subroutine c_oacc_upd_xyz(m, alpha, beta, gamma, delta, x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@@ -414,7 +414,7 @@ contains
call y%axpby(m, alpha, x, beta, info)
call z%axpby(m, gamma, y, delta, info)
end if
- end subroutine c_oacc_abgdxyz
+ end subroutine c_oacc_upd_xyz
subroutine c_oacc_sctb_buf(i, n, idx, beta, y)
use psb_base_mod
diff --git a/openacc/psb_d_oacc_vect_mod.F90 b/openacc/psb_d_oacc_vect_mod.F90
index 0dff0f27..bfb97b5c 100644
--- a/openacc/psb_d_oacc_vect_mod.F90
+++ b/openacc/psb_d_oacc_vect_mod.F90
@@ -47,7 +47,7 @@ module psb_d_oacc_vect_mod
procedure, pass(x) :: dot_a => d_oacc_dot_a
procedure, pass(y) :: axpby_v => d_oacc_axpby_v
procedure, pass(y) :: axpby_a => d_oacc_axpby_a
- procedure, pass(z) :: abgdxyz => d_oacc_abgdxyz
+ procedure, pass(z) :: upd_xyz => d_oacc_upd_xyz
procedure, pass(y) :: mlt_a => d_oacc_mlt_a
procedure, pass(z) :: mlt_a_2 => d_oacc_mlt_a_2
procedure, pass(y) :: mlt_v => d_oacc_mlt_v
@@ -364,7 +364,7 @@ contains
call y%set_host()
end subroutine d_oacc_axpby_a
- subroutine d_oacc_abgdxyz(m, alpha, beta, gamma, delta, x, y, z, info)
+ subroutine d_oacc_upd_xyz(m, alpha, beta, gamma, delta, x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@@ -414,7 +414,7 @@ contains
call y%axpby(m, alpha, x, beta, info)
call z%axpby(m, gamma, y, delta, info)
end if
- end subroutine d_oacc_abgdxyz
+ end subroutine d_oacc_upd_xyz
subroutine d_oacc_sctb_buf(i, n, idx, beta, y)
use psb_base_mod
diff --git a/openacc/psb_s_oacc_vect_mod.F90 b/openacc/psb_s_oacc_vect_mod.F90
index 5c34827d..47922d6a 100644
--- a/openacc/psb_s_oacc_vect_mod.F90
+++ b/openacc/psb_s_oacc_vect_mod.F90
@@ -47,7 +47,7 @@ module psb_s_oacc_vect_mod
procedure, pass(x) :: dot_a => s_oacc_dot_a
procedure, pass(y) :: axpby_v => s_oacc_axpby_v
procedure, pass(y) :: axpby_a => s_oacc_axpby_a
- procedure, pass(z) :: abgdxyz => s_oacc_abgdxyz
+ procedure, pass(z) :: upd_xyz => s_oacc_upd_xyz
procedure, pass(y) :: mlt_a => s_oacc_mlt_a
procedure, pass(z) :: mlt_a_2 => s_oacc_mlt_a_2
procedure, pass(y) :: mlt_v => s_oacc_mlt_v
@@ -364,7 +364,7 @@ contains
call y%set_host()
end subroutine s_oacc_axpby_a
- subroutine s_oacc_abgdxyz(m, alpha, beta, gamma, delta, x, y, z, info)
+ subroutine s_oacc_upd_xyz(m, alpha, beta, gamma, delta, x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@@ -414,7 +414,7 @@ contains
call y%axpby(m, alpha, x, beta, info)
call z%axpby(m, gamma, y, delta, info)
end if
- end subroutine s_oacc_abgdxyz
+ end subroutine s_oacc_upd_xyz
subroutine s_oacc_sctb_buf(i, n, idx, beta, y)
use psb_base_mod
diff --git a/openacc/psb_z_oacc_vect_mod.F90 b/openacc/psb_z_oacc_vect_mod.F90
index 0bac854a..be03b1cd 100644
--- a/openacc/psb_z_oacc_vect_mod.F90
+++ b/openacc/psb_z_oacc_vect_mod.F90
@@ -47,7 +47,7 @@ module psb_z_oacc_vect_mod
procedure, pass(x) :: dot_a => z_oacc_dot_a
procedure, pass(y) :: axpby_v => z_oacc_axpby_v
procedure, pass(y) :: axpby_a => z_oacc_axpby_a
- procedure, pass(z) :: abgdxyz => z_oacc_abgdxyz
+ procedure, pass(z) :: upd_xyz => z_oacc_upd_xyz
procedure, pass(y) :: mlt_a => z_oacc_mlt_a
procedure, pass(z) :: mlt_a_2 => z_oacc_mlt_a_2
procedure, pass(y) :: mlt_v => z_oacc_mlt_v
@@ -364,7 +364,7 @@ contains
call y%set_host()
end subroutine z_oacc_axpby_a
- subroutine z_oacc_abgdxyz(m, alpha, beta, gamma, delta, x, y, z, info)
+ subroutine z_oacc_upd_xyz(m, alpha, beta, gamma, delta, x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@@ -414,7 +414,7 @@ contains
call y%axpby(m, alpha, x, beta, info)
call z%axpby(m, gamma, y, delta, info)
end if
- end subroutine z_oacc_abgdxyz
+ end subroutine z_oacc_upd_xyz
subroutine z_oacc_sctb_buf(i, n, idx, beta, y)
use psb_base_mod
From cf2cc6cab9af7178ba4da7d30a7ee47a3adc7044 Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Thu, 8 Aug 2024 14:34:28 +0200
Subject: [PATCH 29/86] Precedence of oacc_vect modules
---
openacc/Makefile | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/openacc/Makefile b/openacc/Makefile
index 035244d9..cdcc9f71 100644
--- a/openacc/Makefile
+++ b/openacc/Makefile
@@ -58,8 +58,9 @@ psb_oacc_mod.o : psb_i_oacc_vect_mod.o psb_l_oacc_vect_mod.o \
psb_z_oacc_ell_mat_mod.o psb_z_oacc_hll_mat_mod.o \
psb_oacc_env_mod.o
-psb_s_oacc_vect_mod.o psb_d_oacc_vect_mod.o \
- psb_c_oacc_vect_mod.o psb_z_oacc_vect_mod.o : psb_i_oacc_vect_mod.o psb_l_oacc_vect_mod.o
+psb_s_oacc_vect_mod.o psb_d_oacc_vect_mod.o\
+ psb_c_oacc_vect_mod.o psb_z_oacc_vect_mod.o: psb_i_oacc_vect_mod.o psb_l_oacc_vect_mod.o
+psb_l_oacc_vect_mod.o: psb_i_oacc_vect_mod.o
psb_s_oacc_csr_mat_mod.o psb_s_oacc_ell_mat_mod.o psb_s_oacc_hll_mat_mod.o: psb_s_oacc_vect_mod.o
From 82201407290cdbd35af47cbaae9f9ca7bf8d3a09 Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Thu, 8 Aug 2024 14:59:09 +0200
Subject: [PATCH 30/86] Merged recent changes from development
---
base/modules/auxil/psi_c_serial_mod.f90 | 8 ++++----
base/modules/auxil/psi_d_serial_mod.f90 | 8 ++++----
base/modules/auxil/psi_e_serial_mod.f90 | 8 ++++----
base/modules/auxil/psi_i2_serial_mod.f90 | 8 ++++----
base/modules/auxil/psi_m_serial_mod.f90 | 8 ++++----
base/modules/auxil/psi_s_serial_mod.f90 | 8 ++++----
base/modules/auxil/psi_z_serial_mod.f90 | 8 ++++----
base/modules/psblas/psb_c_psblas_mod.F90 | 8 ++++----
base/modules/psblas/psb_d_psblas_mod.F90 | 8 ++++----
base/modules/psblas/psb_s_psblas_mod.F90 | 8 ++++----
base/modules/psblas/psb_z_psblas_mod.F90 | 8 ++++----
base/modules/serial/psb_c_base_vect_mod.F90 | 14 +++++++-------
base/modules/serial/psb_c_vect_mod.F90 | 9 +++++----
base/modules/serial/psb_d_base_vect_mod.F90 | 14 +++++++-------
base/modules/serial/psb_d_vect_mod.F90 | 9 +++++----
base/modules/serial/psb_s_base_vect_mod.F90 | 14 +++++++-------
base/modules/serial/psb_s_vect_mod.F90 | 9 +++++----
base/modules/serial/psb_z_base_vect_mod.F90 | 14 +++++++-------
base/modules/serial/psb_z_vect_mod.F90 | 9 +++++----
base/psblas/psb_caxpby.f90 | 8 ++++----
base/psblas/psb_daxpby.f90 | 8 ++++----
base/psblas/psb_saxpby.f90 | 8 ++++----
base/psblas/psb_zaxpby.f90 | 8 ++++----
base/serial/psi_c_serial_impl.F90 | 8 ++++----
base/serial/psi_d_serial_impl.F90 | 8 ++++----
base/serial/psi_e_serial_impl.F90 | 8 ++++----
base/serial/psi_i2_serial_impl.F90 | 8 ++++----
base/serial/psi_m_serial_impl.F90 | 8 ++++----
base/serial/psi_s_serial_impl.F90 | 8 ++++----
base/serial/psi_z_serial_impl.F90 | 8 ++++----
cuda/psb_c_cuda_vect_mod.F90 | 8 ++++----
cuda/psb_c_vectordev_mod.F90 | 8 ++++----
cuda/psb_d_cuda_vect_mod.F90 | 8 ++++----
cuda/psb_d_vectordev_mod.F90 | 8 ++++----
cuda/psb_s_cuda_vect_mod.F90 | 8 ++++----
cuda/psb_s_vectordev_mod.F90 | 8 ++++----
cuda/psb_z_cuda_vect_mod.F90 | 8 ++++----
cuda/psb_z_vectordev_mod.F90 | 8 ++++----
38 files changed, 168 insertions(+), 164 deletions(-)
diff --git a/base/modules/auxil/psi_c_serial_mod.f90 b/base/modules/auxil/psi_c_serial_mod.f90
index 38b740a7..3fe001c8 100644
--- a/base/modules/auxil/psi_c_serial_mod.f90
+++ b/base/modules/auxil/psi_c_serial_mod.f90
@@ -99,8 +99,8 @@ module psi_c_serial_mod
end subroutine psi_caxpbyv2
end interface psb_geaxpby
- interface psi_abgdxyz
- subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+ interface psi_upd_xyz
+ subroutine psi_c_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_spk_
implicit none
integer(psb_ipk_), intent(in) :: m
@@ -109,8 +109,8 @@ module psi_c_serial_mod
complex(psb_spk_), intent (inout) :: z(:)
complex(psb_spk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
- end subroutine psi_cabgdxyz
- end interface psi_abgdxyz
+ end subroutine psi_c_upd_xyz
+ end interface psi_upd_xyz
interface psi_xyzw
subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
diff --git a/base/modules/auxil/psi_d_serial_mod.f90 b/base/modules/auxil/psi_d_serial_mod.f90
index 1d65c5f6..a08263df 100644
--- a/base/modules/auxil/psi_d_serial_mod.f90
+++ b/base/modules/auxil/psi_d_serial_mod.f90
@@ -99,8 +99,8 @@ module psi_d_serial_mod
end subroutine psi_daxpbyv2
end interface psb_geaxpby
- interface psi_abgdxyz
- subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+ interface psi_upd_xyz
+ subroutine psi_d_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_dpk_
implicit none
integer(psb_ipk_), intent(in) :: m
@@ -109,8 +109,8 @@ module psi_d_serial_mod
real(psb_dpk_), intent (inout) :: z(:)
real(psb_dpk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
- end subroutine psi_dabgdxyz
- end interface psi_abgdxyz
+ end subroutine psi_d_upd_xyz
+ end interface psi_upd_xyz
interface psi_xyzw
subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
diff --git a/base/modules/auxil/psi_e_serial_mod.f90 b/base/modules/auxil/psi_e_serial_mod.f90
index 6f4e8c06..1f1bebd7 100644
--- a/base/modules/auxil/psi_e_serial_mod.f90
+++ b/base/modules/auxil/psi_e_serial_mod.f90
@@ -99,8 +99,8 @@ module psi_e_serial_mod
end subroutine psi_eaxpbyv2
end interface psb_geaxpby
- interface psi_abgdxyz
- subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+ interface psi_upd_xyz
+ subroutine psi_e_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
implicit none
integer(psb_ipk_), intent(in) :: m
@@ -109,8 +109,8 @@ module psi_e_serial_mod
integer(psb_epk_), intent (inout) :: z(:)
integer(psb_epk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
- end subroutine psi_eabgdxyz
- end interface psi_abgdxyz
+ end subroutine psi_e_upd_xyz
+ end interface psi_upd_xyz
interface psi_xyzw
subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info)
diff --git a/base/modules/auxil/psi_i2_serial_mod.f90 b/base/modules/auxil/psi_i2_serial_mod.f90
index ffa14059..770d3256 100644
--- a/base/modules/auxil/psi_i2_serial_mod.f90
+++ b/base/modules/auxil/psi_i2_serial_mod.f90
@@ -99,8 +99,8 @@ module psi_i2_serial_mod
end subroutine psi_i2axpbyv2
end interface psb_geaxpby
- interface psi_abgdxyz
- subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+ interface psi_upd_xyz
+ subroutine psi_i2_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
implicit none
integer(psb_ipk_), intent(in) :: m
@@ -109,8 +109,8 @@ module psi_i2_serial_mod
integer(psb_i2pk_), intent (inout) :: z(:)
integer(psb_i2pk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
- end subroutine psi_i2abgdxyz
- end interface psi_abgdxyz
+ end subroutine psi_i2_upd_xyz
+ end interface psi_upd_xyz
interface psi_xyzw
subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info)
diff --git a/base/modules/auxil/psi_m_serial_mod.f90 b/base/modules/auxil/psi_m_serial_mod.f90
index 5661fdbf..3583cccc 100644
--- a/base/modules/auxil/psi_m_serial_mod.f90
+++ b/base/modules/auxil/psi_m_serial_mod.f90
@@ -99,8 +99,8 @@ module psi_m_serial_mod
end subroutine psi_maxpbyv2
end interface psb_geaxpby
- interface psi_abgdxyz
- subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+ interface psi_upd_xyz
+ subroutine psi_m_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_lpk_,psb_mpk_, psb_epk_
implicit none
integer(psb_ipk_), intent(in) :: m
@@ -109,8 +109,8 @@ module psi_m_serial_mod
integer(psb_mpk_), intent (inout) :: z(:)
integer(psb_mpk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
- end subroutine psi_mabgdxyz
- end interface psi_abgdxyz
+ end subroutine psi_m_upd_xyz
+ end interface psi_upd_xyz
interface psi_xyzw
subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
diff --git a/base/modules/auxil/psi_s_serial_mod.f90 b/base/modules/auxil/psi_s_serial_mod.f90
index 5cc17d58..3e0c6d91 100644
--- a/base/modules/auxil/psi_s_serial_mod.f90
+++ b/base/modules/auxil/psi_s_serial_mod.f90
@@ -99,8 +99,8 @@ module psi_s_serial_mod
end subroutine psi_saxpbyv2
end interface psb_geaxpby
- interface psi_abgdxyz
- subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+ interface psi_upd_xyz
+ subroutine psi_s_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_spk_
implicit none
integer(psb_ipk_), intent(in) :: m
@@ -109,8 +109,8 @@ module psi_s_serial_mod
real(psb_spk_), intent (inout) :: z(:)
real(psb_spk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
- end subroutine psi_sabgdxyz
- end interface psi_abgdxyz
+ end subroutine psi_s_upd_xyz
+ end interface psi_upd_xyz
interface psi_xyzw
subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
diff --git a/base/modules/auxil/psi_z_serial_mod.f90 b/base/modules/auxil/psi_z_serial_mod.f90
index 8a3f053d..a8ea734e 100644
--- a/base/modules/auxil/psi_z_serial_mod.f90
+++ b/base/modules/auxil/psi_z_serial_mod.f90
@@ -99,8 +99,8 @@ module psi_z_serial_mod
end subroutine psi_zaxpbyv2
end interface psb_geaxpby
- interface psi_abgdxyz
- subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+ interface psi_upd_xyz
+ subroutine psi_z_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
import :: psb_ipk_, psb_dpk_
implicit none
integer(psb_ipk_), intent(in) :: m
@@ -109,8 +109,8 @@ module psi_z_serial_mod
complex(psb_dpk_), intent (inout) :: z(:)
complex(psb_dpk_), intent (in) :: alpha, beta,gamma,delta
integer(psb_ipk_), intent(out) :: info
- end subroutine psi_zabgdxyz
- end interface psi_abgdxyz
+ end subroutine psi_z_upd_xyz
+ end interface psi_upd_xyz
interface psi_xyzw
subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
diff --git a/base/modules/psblas/psb_c_psblas_mod.F90 b/base/modules/psblas/psb_c_psblas_mod.F90
index 7f7f937c..591dec09 100644
--- a/base/modules/psblas/psb_c_psblas_mod.F90
+++ b/base/modules/psblas/psb_c_psblas_mod.F90
@@ -143,8 +143,8 @@ module psb_c_psblas_mod
end subroutine psb_caxpby
end interface
- interface psb_abgdxyz
- subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
+ interface psb_upd_xyz
+ subroutine psb_c_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
import :: psb_desc_type, psb_spk_, psb_ipk_, &
& psb_c_vect_type, psb_cspmat_type
@@ -154,8 +154,8 @@ module psb_c_psblas_mod
complex(psb_spk_), intent (in) :: alpha, beta, gamma, delta
type(psb_desc_type), intent (in) :: desc_a
integer(psb_ipk_), intent(out) :: info
- end subroutine psb_cabgdxyz_vect
- end interface psb_abgdxyz
+ end subroutine psb_c_upd_xyz_vect
+ end interface psb_upd_xyz
interface psb_geamax
function psb_camax(x, desc_a, info, jx,global)
diff --git a/base/modules/psblas/psb_d_psblas_mod.F90 b/base/modules/psblas/psb_d_psblas_mod.F90
index 12090956..b200bc8a 100644
--- a/base/modules/psblas/psb_d_psblas_mod.F90
+++ b/base/modules/psblas/psb_d_psblas_mod.F90
@@ -143,8 +143,8 @@ module psb_d_psblas_mod
end subroutine psb_daxpby
end interface
- interface psb_abgdxyz
- subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
+ interface psb_upd_xyz
+ subroutine psb_d_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
import :: psb_desc_type, psb_dpk_, psb_ipk_, &
& psb_d_vect_type, psb_dspmat_type
@@ -154,8 +154,8 @@ module psb_d_psblas_mod
real(psb_dpk_), intent (in) :: alpha, beta, gamma, delta
type(psb_desc_type), intent (in) :: desc_a
integer(psb_ipk_), intent(out) :: info
- end subroutine psb_dabgdxyz_vect
- end interface psb_abgdxyz
+ end subroutine psb_d_upd_xyz_vect
+ end interface psb_upd_xyz
interface psb_geamax
function psb_damax(x, desc_a, info, jx,global)
diff --git a/base/modules/psblas/psb_s_psblas_mod.F90 b/base/modules/psblas/psb_s_psblas_mod.F90
index 7a7ce783..a60da025 100644
--- a/base/modules/psblas/psb_s_psblas_mod.F90
+++ b/base/modules/psblas/psb_s_psblas_mod.F90
@@ -143,8 +143,8 @@ module psb_s_psblas_mod
end subroutine psb_saxpby
end interface
- interface psb_abgdxyz
- subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
+ interface psb_upd_xyz
+ subroutine psb_s_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
import :: psb_desc_type, psb_spk_, psb_ipk_, &
& psb_s_vect_type, psb_sspmat_type
@@ -154,8 +154,8 @@ module psb_s_psblas_mod
real(psb_spk_), intent (in) :: alpha, beta, gamma, delta
type(psb_desc_type), intent (in) :: desc_a
integer(psb_ipk_), intent(out) :: info
- end subroutine psb_sabgdxyz_vect
- end interface psb_abgdxyz
+ end subroutine psb_s_upd_xyz_vect
+ end interface psb_upd_xyz
interface psb_geamax
function psb_samax(x, desc_a, info, jx,global)
diff --git a/base/modules/psblas/psb_z_psblas_mod.F90 b/base/modules/psblas/psb_z_psblas_mod.F90
index bcfe9caa..241df2b9 100644
--- a/base/modules/psblas/psb_z_psblas_mod.F90
+++ b/base/modules/psblas/psb_z_psblas_mod.F90
@@ -143,8 +143,8 @@ module psb_z_psblas_mod
end subroutine psb_zaxpby
end interface
- interface psb_abgdxyz
- subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
+ interface psb_upd_xyz
+ subroutine psb_z_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
import :: psb_desc_type, psb_dpk_, psb_ipk_, &
& psb_z_vect_type, psb_zspmat_type
@@ -154,8 +154,8 @@ module psb_z_psblas_mod
complex(psb_dpk_), intent (in) :: alpha, beta, gamma, delta
type(psb_desc_type), intent (in) :: desc_a
integer(psb_ipk_), intent(out) :: info
- end subroutine psb_zabgdxyz_vect
- end interface psb_abgdxyz
+ end subroutine psb_z_upd_xyz_vect
+ end interface psb_upd_xyz
interface psb_geamax
function psb_zamax(x, desc_a, info, jx,global)
diff --git a/base/modules/serial/psb_c_base_vect_mod.F90 b/base/modules/serial/psb_c_base_vect_mod.F90
index 41bab5ab..4dac86d6 100644
--- a/base/modules/serial/psb_c_base_vect_mod.F90
+++ b/base/modules/serial/psb_c_base_vect_mod.F90
@@ -155,7 +155,7 @@ module psb_c_base_vect_mod
procedure, pass(z) :: axpby_v2 => c_base_axpby_v2
procedure, pass(z) :: axpby_a2 => c_base_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
- procedure, pass(z) :: abgdxyz => c_base_abgdxyz
+ procedure, pass(z) :: upd_xyz => c_base_upd_xyz
procedure, pass(w) :: xyzw => c_base_xyzw
!
@@ -1130,12 +1130,12 @@ contains
end subroutine c_base_axpby_a2
!
- ! ABGDXYZ is invoked via Z, hence the structure below.
+ ! UPD_XYZ is invoked via Z, hence the structure below.
!
!
- !> Function base_abgdxyz
+ !> Function base_upd_xyz
!! \memberof psb_c_base_vect_type
- !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
+ !! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param beta scalar beta
@@ -1146,7 +1146,7 @@ contains
!! \param z The class(base_vect) to be added
!! \param info return code
!!
- subroutine c_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+ subroutine c_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@@ -1159,11 +1159,11 @@ contains
if (x%is_dev().and.(alpha/=czero)) call x%sync()
if (y%is_dev().and.(beta/=czero)) call y%sync()
if (z%is_dev().and.(delta/=czero)) call z%sync()
- call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
+ call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
call y%set_host()
call z%set_host()
- end subroutine c_base_abgdxyz
+ end subroutine c_base_upd_xyz
subroutine c_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info)
use psi_serial_mod
diff --git a/base/modules/serial/psb_c_vect_mod.F90 b/base/modules/serial/psb_c_vect_mod.F90
index faf1733f..1e9510f2 100644
--- a/base/modules/serial/psb_c_vect_mod.F90
+++ b/base/modules/serial/psb_c_vect_mod.F90
@@ -102,8 +102,9 @@ module psb_c_vect_mod
procedure, pass(z) :: axpby_v2 => c_vect_axpby_v2
procedure, pass(z) :: axpby_a2 => c_vect_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
- procedure, pass(z) :: abgdxyz => c_vect_abgdxyz
+ procedure, pass(z) :: upd_xyz => c_vect_upd_xyz
procedure, pass(z) :: xyzw => c_vect_xyzw
+
procedure, pass(y) :: mlt_v => c_vect_mlt_v
procedure, pass(y) :: mlt_a => c_vect_mlt_a
procedure, pass(z) :: mlt_a_2 => c_vect_mlt_a_2
@@ -773,7 +774,7 @@ contains
end subroutine c_vect_axpby_a2
- subroutine c_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info)
+ subroutine c_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@@ -784,9 +785,9 @@ contains
integer(psb_ipk_), intent(out) :: info
if (allocated(z%v)) &
- call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
+ call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
- end subroutine c_vect_abgdxyz
+ end subroutine c_vect_upd_xyz
subroutine c_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info)
use psi_serial_mod
diff --git a/base/modules/serial/psb_d_base_vect_mod.F90 b/base/modules/serial/psb_d_base_vect_mod.F90
index 1ad1ffa5..f07b5aed 100644
--- a/base/modules/serial/psb_d_base_vect_mod.F90
+++ b/base/modules/serial/psb_d_base_vect_mod.F90
@@ -155,7 +155,7 @@ module psb_d_base_vect_mod
procedure, pass(z) :: axpby_v2 => d_base_axpby_v2
procedure, pass(z) :: axpby_a2 => d_base_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
- procedure, pass(z) :: abgdxyz => d_base_abgdxyz
+ procedure, pass(z) :: upd_xyz => d_base_upd_xyz
procedure, pass(w) :: xyzw => d_base_xyzw
!
@@ -1137,12 +1137,12 @@ contains
end subroutine d_base_axpby_a2
!
- ! ABGDXYZ is invoked via Z, hence the structure below.
+ ! UPD_XYZ is invoked via Z, hence the structure below.
!
!
- !> Function base_abgdxyz
+ !> Function base_upd_xyz
!! \memberof psb_d_base_vect_type
- !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
+ !! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param beta scalar beta
@@ -1153,7 +1153,7 @@ contains
!! \param z The class(base_vect) to be added
!! \param info return code
!!
- subroutine d_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+ subroutine d_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@@ -1166,11 +1166,11 @@ contains
if (x%is_dev().and.(alpha/=dzero)) call x%sync()
if (y%is_dev().and.(beta/=dzero)) call y%sync()
if (z%is_dev().and.(delta/=dzero)) call z%sync()
- call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
+ call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
call y%set_host()
call z%set_host()
- end subroutine d_base_abgdxyz
+ end subroutine d_base_upd_xyz
subroutine d_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info)
use psi_serial_mod
diff --git a/base/modules/serial/psb_d_vect_mod.F90 b/base/modules/serial/psb_d_vect_mod.F90
index a54ddfdb..ae3062dd 100644
--- a/base/modules/serial/psb_d_vect_mod.F90
+++ b/base/modules/serial/psb_d_vect_mod.F90
@@ -102,8 +102,9 @@ module psb_d_vect_mod
procedure, pass(z) :: axpby_v2 => d_vect_axpby_v2
procedure, pass(z) :: axpby_a2 => d_vect_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
- procedure, pass(z) :: abgdxyz => d_vect_abgdxyz
+ procedure, pass(z) :: upd_xyz => d_vect_upd_xyz
procedure, pass(z) :: xyzw => d_vect_xyzw
+
procedure, pass(y) :: mlt_v => d_vect_mlt_v
procedure, pass(y) :: mlt_a => d_vect_mlt_a
procedure, pass(z) :: mlt_a_2 => d_vect_mlt_a_2
@@ -780,7 +781,7 @@ contains
end subroutine d_vect_axpby_a2
- subroutine d_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info)
+ subroutine d_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@@ -791,9 +792,9 @@ contains
integer(psb_ipk_), intent(out) :: info
if (allocated(z%v)) &
- call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
+ call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
- end subroutine d_vect_abgdxyz
+ end subroutine d_vect_upd_xyz
subroutine d_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info)
use psi_serial_mod
diff --git a/base/modules/serial/psb_s_base_vect_mod.F90 b/base/modules/serial/psb_s_base_vect_mod.F90
index 26b82c31..596cd634 100644
--- a/base/modules/serial/psb_s_base_vect_mod.F90
+++ b/base/modules/serial/psb_s_base_vect_mod.F90
@@ -155,7 +155,7 @@ module psb_s_base_vect_mod
procedure, pass(z) :: axpby_v2 => s_base_axpby_v2
procedure, pass(z) :: axpby_a2 => s_base_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
- procedure, pass(z) :: abgdxyz => s_base_abgdxyz
+ procedure, pass(z) :: upd_xyz => s_base_upd_xyz
procedure, pass(w) :: xyzw => s_base_xyzw
!
@@ -1137,12 +1137,12 @@ contains
end subroutine s_base_axpby_a2
!
- ! ABGDXYZ is invoked via Z, hence the structure below.
+ ! UPD_XYZ is invoked via Z, hence the structure below.
!
!
- !> Function base_abgdxyz
+ !> Function base_upd_xyz
!! \memberof psb_s_base_vect_type
- !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
+ !! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param beta scalar beta
@@ -1153,7 +1153,7 @@ contains
!! \param z The class(base_vect) to be added
!! \param info return code
!!
- subroutine s_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+ subroutine s_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@@ -1166,11 +1166,11 @@ contains
if (x%is_dev().and.(alpha/=szero)) call x%sync()
if (y%is_dev().and.(beta/=szero)) call y%sync()
if (z%is_dev().and.(delta/=szero)) call z%sync()
- call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
+ call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
call y%set_host()
call z%set_host()
- end subroutine s_base_abgdxyz
+ end subroutine s_base_upd_xyz
subroutine s_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info)
use psi_serial_mod
diff --git a/base/modules/serial/psb_s_vect_mod.F90 b/base/modules/serial/psb_s_vect_mod.F90
index 901c6103..cad4659c 100644
--- a/base/modules/serial/psb_s_vect_mod.F90
+++ b/base/modules/serial/psb_s_vect_mod.F90
@@ -102,8 +102,9 @@ module psb_s_vect_mod
procedure, pass(z) :: axpby_v2 => s_vect_axpby_v2
procedure, pass(z) :: axpby_a2 => s_vect_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
- procedure, pass(z) :: abgdxyz => s_vect_abgdxyz
+ procedure, pass(z) :: upd_xyz => s_vect_upd_xyz
procedure, pass(z) :: xyzw => s_vect_xyzw
+
procedure, pass(y) :: mlt_v => s_vect_mlt_v
procedure, pass(y) :: mlt_a => s_vect_mlt_a
procedure, pass(z) :: mlt_a_2 => s_vect_mlt_a_2
@@ -780,7 +781,7 @@ contains
end subroutine s_vect_axpby_a2
- subroutine s_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info)
+ subroutine s_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@@ -791,9 +792,9 @@ contains
integer(psb_ipk_), intent(out) :: info
if (allocated(z%v)) &
- call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
+ call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
- end subroutine s_vect_abgdxyz
+ end subroutine s_vect_upd_xyz
subroutine s_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info)
use psi_serial_mod
diff --git a/base/modules/serial/psb_z_base_vect_mod.F90 b/base/modules/serial/psb_z_base_vect_mod.F90
index a3afc9c1..1bbdfba1 100644
--- a/base/modules/serial/psb_z_base_vect_mod.F90
+++ b/base/modules/serial/psb_z_base_vect_mod.F90
@@ -155,7 +155,7 @@ module psb_z_base_vect_mod
procedure, pass(z) :: axpby_v2 => z_base_axpby_v2
procedure, pass(z) :: axpby_a2 => z_base_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
- procedure, pass(z) :: abgdxyz => z_base_abgdxyz
+ procedure, pass(z) :: upd_xyz => z_base_upd_xyz
procedure, pass(w) :: xyzw => z_base_xyzw
!
@@ -1130,12 +1130,12 @@ contains
end subroutine z_base_axpby_a2
!
- ! ABGDXYZ is invoked via Z, hence the structure below.
+ ! UPD_XYZ is invoked via Z, hence the structure below.
!
!
- !> Function base_abgdxyz
+ !> Function base_upd_xyz
!! \memberof psb_z_base_vect_type
- !! \brief ABGDXYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
+ !! \brief UPD_XYZ combines two AXPBYS y=alpha*x+beta*y, z=gamma*y+delta*zeta
!! \param m Number of entries to be considered
!! \param alpha scalar alpha
!! \param beta scalar beta
@@ -1146,7 +1146,7 @@ contains
!! \param z The class(base_vect) to be added
!! \param info return code
!!
- subroutine z_base_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+ subroutine z_base_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@@ -1159,11 +1159,11 @@ contains
if (x%is_dev().and.(alpha/=zzero)) call x%sync()
if (y%is_dev().and.(beta/=zzero)) call y%sync()
if (z%is_dev().and.(delta/=zzero)) call z%sync()
- call psi_abgdxyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
+ call psi_upd_xyz(m,alpha, beta, gamma,delta,x%v, y%v, z%v, info)
call y%set_host()
call z%set_host()
- end subroutine z_base_abgdxyz
+ end subroutine z_base_upd_xyz
subroutine z_base_xyzw(m,a,b,c,d,e,f,x, y, z, w,info)
use psi_serial_mod
diff --git a/base/modules/serial/psb_z_vect_mod.F90 b/base/modules/serial/psb_z_vect_mod.F90
index 8235d1d4..48f2e947 100644
--- a/base/modules/serial/psb_z_vect_mod.F90
+++ b/base/modules/serial/psb_z_vect_mod.F90
@@ -102,8 +102,9 @@ module psb_z_vect_mod
procedure, pass(z) :: axpby_v2 => z_vect_axpby_v2
procedure, pass(z) :: axpby_a2 => z_vect_axpby_a2
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
- procedure, pass(z) :: abgdxyz => z_vect_abgdxyz
+ procedure, pass(z) :: upd_xyz => z_vect_upd_xyz
procedure, pass(z) :: xyzw => z_vect_xyzw
+
procedure, pass(y) :: mlt_v => z_vect_mlt_v
procedure, pass(y) :: mlt_a => z_vect_mlt_a
procedure, pass(z) :: mlt_a_2 => z_vect_mlt_a_2
@@ -773,7 +774,7 @@ contains
end subroutine z_vect_axpby_a2
- subroutine z_vect_abgdxyz(m,alpha,beta,gamma,delta,x, y, z, info)
+ subroutine z_vect_upd_xyz(m,alpha,beta,gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@@ -784,9 +785,9 @@ contains
integer(psb_ipk_), intent(out) :: info
if (allocated(z%v)) &
- call z%v%abgdxyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
+ call z%v%upd_xyz(m,alpha,beta,gamma,delta,x%v,y%v,info)
- end subroutine z_vect_abgdxyz
+ end subroutine z_vect_upd_xyz
subroutine z_vect_xyzw(m,a,b,c,d,e,f,x, y, z, w, info)
use psi_serial_mod
diff --git a/base/psblas/psb_caxpby.f90 b/base/psblas/psb_caxpby.f90
index 7c22bb06..5d80ef00 100644
--- a/base/psblas/psb_caxpby.f90
+++ b/base/psblas/psb_caxpby.f90
@@ -743,9 +743,9 @@ subroutine psb_caddconst_vect(x,b,z,desc_a,info)
end subroutine psb_caddconst_vect
-subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
+subroutine psb_c_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
- use psb_base_mod, psb_protect_name => psb_cabgdxyz_vect
+ use psb_base_mod, psb_protect_name => psb_c_upd_xyz_vect
implicit none
type(psb_c_vect_type), intent (inout) :: x
type(psb_c_vect_type), intent (inout) :: y
@@ -812,7 +812,7 @@ subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
end if
if(desc_a%get_local_rows() > 0) then
- call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info)
+ call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info)
end if
call psb_erractionrestore(err_act)
@@ -822,5 +822,5 @@ subroutine psb_cabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
return
-end subroutine psb_cabgdxyz_vect
+end subroutine psb_c_upd_xyz_vect
diff --git a/base/psblas/psb_daxpby.f90 b/base/psblas/psb_daxpby.f90
index 1de77647..38ebe465 100644
--- a/base/psblas/psb_daxpby.f90
+++ b/base/psblas/psb_daxpby.f90
@@ -743,9 +743,9 @@ subroutine psb_daddconst_vect(x,b,z,desc_a,info)
end subroutine psb_daddconst_vect
-subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
+subroutine psb_d_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
- use psb_base_mod, psb_protect_name => psb_dabgdxyz_vect
+ use psb_base_mod, psb_protect_name => psb_d_upd_xyz_vect
implicit none
type(psb_d_vect_type), intent (inout) :: x
type(psb_d_vect_type), intent (inout) :: y
@@ -812,7 +812,7 @@ subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
end if
if(desc_a%get_local_rows() > 0) then
- call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info)
+ call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info)
end if
call psb_erractionrestore(err_act)
@@ -822,5 +822,5 @@ subroutine psb_dabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
return
-end subroutine psb_dabgdxyz_vect
+end subroutine psb_d_upd_xyz_vect
diff --git a/base/psblas/psb_saxpby.f90 b/base/psblas/psb_saxpby.f90
index 1b1f24e6..0055fdbe 100644
--- a/base/psblas/psb_saxpby.f90
+++ b/base/psblas/psb_saxpby.f90
@@ -743,9 +743,9 @@ subroutine psb_saddconst_vect(x,b,z,desc_a,info)
end subroutine psb_saddconst_vect
-subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
+subroutine psb_s_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
- use psb_base_mod, psb_protect_name => psb_sabgdxyz_vect
+ use psb_base_mod, psb_protect_name => psb_s_upd_xyz_vect
implicit none
type(psb_s_vect_type), intent (inout) :: x
type(psb_s_vect_type), intent (inout) :: y
@@ -812,7 +812,7 @@ subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
end if
if(desc_a%get_local_rows() > 0) then
- call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info)
+ call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info)
end if
call psb_erractionrestore(err_act)
@@ -822,5 +822,5 @@ subroutine psb_sabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
return
-end subroutine psb_sabgdxyz_vect
+end subroutine psb_s_upd_xyz_vect
diff --git a/base/psblas/psb_zaxpby.f90 b/base/psblas/psb_zaxpby.f90
index 0f37a1f4..e93488e3 100644
--- a/base/psblas/psb_zaxpby.f90
+++ b/base/psblas/psb_zaxpby.f90
@@ -743,9 +743,9 @@ subroutine psb_zaddconst_vect(x,b,z,desc_a,info)
end subroutine psb_zaddconst_vect
-subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
+subroutine psb_z_upd_xyz_vect(alpha, beta, gamma, delta, x, y, z,&
& desc_a, info)
- use psb_base_mod, psb_protect_name => psb_zabgdxyz_vect
+ use psb_base_mod, psb_protect_name => psb_z_upd_xyz_vect
implicit none
type(psb_z_vect_type), intent (inout) :: x
type(psb_z_vect_type), intent (inout) :: y
@@ -812,7 +812,7 @@ subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
end if
if(desc_a%get_local_rows() > 0) then
- call z%abgdxyz(nr,alpha,beta,gamma,delta,x,y,info)
+ call z%upd_xyz(nr,alpha,beta,gamma,delta,x,y,info)
end if
call psb_erractionrestore(err_act)
@@ -822,5 +822,5 @@ subroutine psb_zabgdxyz_vect(alpha, beta, gamma, delta, x, y, z,&
return
-end subroutine psb_zabgdxyz_vect
+end subroutine psb_z_upd_xyz_vect
diff --git a/base/serial/psi_c_serial_impl.F90 b/base/serial/psi_c_serial_impl.F90
index e230a1e0..e3f1d9a3 100644
--- a/base/serial/psi_c_serial_impl.F90
+++ b/base/serial/psi_c_serial_impl.F90
@@ -1568,7 +1568,7 @@ subroutine caxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
end subroutine caxpbyv2
-subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+subroutine psi_c_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
@@ -1582,7 +1582,7 @@ subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
- name='cabgdxyz'
+ name='c_upd_xyz'
info = psb_success_
if (m.lt.0) then
@@ -1791,7 +1791,7 @@ subroutine psi_cabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
call fcpsb_serror()
return
-end subroutine psi_cabgdxyz
+end subroutine psi_c_upd_xyz
subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
@@ -1808,7 +1808,7 @@ subroutine psi_cxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
- name='cabgdxyz'
+ name='c_xyzw'
info = psb_success_
if (m.lt.0) then
diff --git a/base/serial/psi_d_serial_impl.F90 b/base/serial/psi_d_serial_impl.F90
index bf1b2917..d6a9a31d 100644
--- a/base/serial/psi_d_serial_impl.F90
+++ b/base/serial/psi_d_serial_impl.F90
@@ -1568,7 +1568,7 @@ subroutine daxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
end subroutine daxpbyv2
-subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+subroutine psi_d_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
@@ -1582,7 +1582,7 @@ subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
- name='dabgdxyz'
+ name='d_upd_xyz'
info = psb_success_
if (m.lt.0) then
@@ -1791,7 +1791,7 @@ subroutine psi_dabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
call fcpsb_serror()
return
-end subroutine psi_dabgdxyz
+end subroutine psi_d_upd_xyz
subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
@@ -1808,7 +1808,7 @@ subroutine psi_dxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
- name='dabgdxyz'
+ name='d_xyzw'
info = psb_success_
if (m.lt.0) then
diff --git a/base/serial/psi_e_serial_impl.F90 b/base/serial/psi_e_serial_impl.F90
index 911ab4ec..37b11a94 100644
--- a/base/serial/psi_e_serial_impl.F90
+++ b/base/serial/psi_e_serial_impl.F90
@@ -1568,7 +1568,7 @@ subroutine eaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
end subroutine eaxpbyv2
-subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+subroutine psi_e_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
@@ -1582,7 +1582,7 @@ subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
- name='eabgdxyz'
+ name='e_upd_xyz'
info = psb_success_
if (m.lt.0) then
@@ -1791,7 +1791,7 @@ subroutine psi_eabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
call fcpsb_serror()
return
-end subroutine psi_eabgdxyz
+end subroutine psi_e_upd_xyz
subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
@@ -1808,7 +1808,7 @@ subroutine psi_exyzw(m,a,b,c,d,e,f,x, y, z,w, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
- name='eabgdxyz'
+ name='e_xyzw'
info = psb_success_
if (m.lt.0) then
diff --git a/base/serial/psi_i2_serial_impl.F90 b/base/serial/psi_i2_serial_impl.F90
index fb42dfcd..c20cd60b 100644
--- a/base/serial/psi_i2_serial_impl.F90
+++ b/base/serial/psi_i2_serial_impl.F90
@@ -1568,7 +1568,7 @@ subroutine i2axpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
end subroutine i2axpbyv2
-subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+subroutine psi_i2_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
@@ -1582,7 +1582,7 @@ subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
- name='i2abgdxyz'
+ name='i2_upd_xyz'
info = psb_success_
if (m.lt.0) then
@@ -1791,7 +1791,7 @@ subroutine psi_i2abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
call fcpsb_serror()
return
-end subroutine psi_i2abgdxyz
+end subroutine psi_i2_upd_xyz
subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
@@ -1808,7 +1808,7 @@ subroutine psi_i2xyzw(m,a,b,c,d,e,f,x, y, z,w, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
- name='i2abgdxyz'
+ name='i2_xyzw'
info = psb_success_
if (m.lt.0) then
diff --git a/base/serial/psi_m_serial_impl.F90 b/base/serial/psi_m_serial_impl.F90
index 346fd897..55913a16 100644
--- a/base/serial/psi_m_serial_impl.F90
+++ b/base/serial/psi_m_serial_impl.F90
@@ -1568,7 +1568,7 @@ subroutine maxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
end subroutine maxpbyv2
-subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+subroutine psi_m_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
@@ -1582,7 +1582,7 @@ subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
- name='mabgdxyz'
+ name='m_upd_xyz'
info = psb_success_
if (m.lt.0) then
@@ -1791,7 +1791,7 @@ subroutine psi_mabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
call fcpsb_serror()
return
-end subroutine psi_mabgdxyz
+end subroutine psi_m_upd_xyz
subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
@@ -1808,7 +1808,7 @@ subroutine psi_mxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
- name='mabgdxyz'
+ name='m_xyzw'
info = psb_success_
if (m.lt.0) then
diff --git a/base/serial/psi_s_serial_impl.F90 b/base/serial/psi_s_serial_impl.F90
index 52f86bcd..c3846c8e 100644
--- a/base/serial/psi_s_serial_impl.F90
+++ b/base/serial/psi_s_serial_impl.F90
@@ -1568,7 +1568,7 @@ subroutine saxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
end subroutine saxpbyv2
-subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+subroutine psi_s_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
@@ -1582,7 +1582,7 @@ subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
- name='sabgdxyz'
+ name='s_upd_xyz'
info = psb_success_
if (m.lt.0) then
@@ -1791,7 +1791,7 @@ subroutine psi_sabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
call fcpsb_serror()
return
-end subroutine psi_sabgdxyz
+end subroutine psi_s_upd_xyz
subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
@@ -1808,7 +1808,7 @@ subroutine psi_sxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
- name='sabgdxyz'
+ name='s_xyzw'
info = psb_success_
if (m.lt.0) then
diff --git a/base/serial/psi_z_serial_impl.F90 b/base/serial/psi_z_serial_impl.F90
index 7e680273..763eae22 100644
--- a/base/serial/psi_z_serial_impl.F90
+++ b/base/serial/psi_z_serial_impl.F90
@@ -1568,7 +1568,7 @@ subroutine zaxpbyv2(m, n, alpha, X, lldx, beta, Y, lldy, Z, lldz, info)
end subroutine zaxpbyv2
-subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+subroutine psi_z_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psb_const_mod
use psb_error_mod
implicit none
@@ -1582,7 +1582,7 @@ subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
- name='zabgdxyz'
+ name='z_upd_xyz'
info = psb_success_
if (m.lt.0) then
@@ -1791,7 +1791,7 @@ subroutine psi_zabgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
call fcpsb_serror()
return
-end subroutine psi_zabgdxyz
+end subroutine psi_z_upd_xyz
subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
use psb_const_mod
@@ -1808,7 +1808,7 @@ subroutine psi_zxyzw(m,a,b,c,d,e,f,x, y, z,w, info)
integer(psb_ipk_) :: i
integer(psb_ipk_) :: int_err(5)
character name*20
- name='zabgdxyz'
+ name='z_xyzw'
info = psb_success_
if (m.lt.0) then
diff --git a/cuda/psb_c_cuda_vect_mod.F90 b/cuda/psb_c_cuda_vect_mod.F90
index 2c2a4f61..45fafe0a 100644
--- a/cuda/psb_c_cuda_vect_mod.F90
+++ b/cuda/psb_c_cuda_vect_mod.F90
@@ -90,7 +90,7 @@ module psb_c_cuda_vect_mod
procedure, pass(x) :: dot_a => c_cuda_dot_a
procedure, pass(y) :: axpby_v => c_cuda_axpby_v
procedure, pass(y) :: axpby_a => c_cuda_axpby_a
- procedure, pass(z) :: abgdxyz => c_cuda_abgdxyz
+ procedure, pass(z) :: upd_xyz => c_cuda_upd_xyz
procedure, pass(y) :: mlt_v => c_cuda_mlt_v
procedure, pass(y) :: mlt_a => c_cuda_mlt_a
procedure, pass(z) :: mlt_a_2 => c_cuda_mlt_a_2
@@ -912,7 +912,7 @@ contains
end subroutine c_cuda_axpby_v
- subroutine c_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+ subroutine c_cuda_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@@ -946,7 +946,7 @@ contains
if ((nx d_cuda_dot_a
procedure, pass(y) :: axpby_v => d_cuda_axpby_v
procedure, pass(y) :: axpby_a => d_cuda_axpby_a
- procedure, pass(z) :: abgdxyz => d_cuda_abgdxyz
+ procedure, pass(z) :: upd_xyz => d_cuda_upd_xyz
procedure, pass(y) :: mlt_v => d_cuda_mlt_v
procedure, pass(y) :: mlt_a => d_cuda_mlt_a
procedure, pass(z) :: mlt_a_2 => d_cuda_mlt_a_2
@@ -912,7 +912,7 @@ contains
end subroutine d_cuda_axpby_v
- subroutine d_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+ subroutine d_cuda_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@@ -946,7 +946,7 @@ contains
if ((nx s_cuda_dot_a
procedure, pass(y) :: axpby_v => s_cuda_axpby_v
procedure, pass(y) :: axpby_a => s_cuda_axpby_a
- procedure, pass(z) :: abgdxyz => s_cuda_abgdxyz
+ procedure, pass(z) :: upd_xyz => s_cuda_upd_xyz
procedure, pass(y) :: mlt_v => s_cuda_mlt_v
procedure, pass(y) :: mlt_a => s_cuda_mlt_a
procedure, pass(z) :: mlt_a_2 => s_cuda_mlt_a_2
@@ -912,7 +912,7 @@ contains
end subroutine s_cuda_axpby_v
- subroutine s_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+ subroutine s_cuda_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@@ -946,7 +946,7 @@ contains
if ((nx z_cuda_dot_a
procedure, pass(y) :: axpby_v => z_cuda_axpby_v
procedure, pass(y) :: axpby_a => z_cuda_axpby_a
- procedure, pass(z) :: abgdxyz => z_cuda_abgdxyz
+ procedure, pass(z) :: upd_xyz => z_cuda_upd_xyz
procedure, pass(y) :: mlt_v => z_cuda_mlt_v
procedure, pass(y) :: mlt_a => z_cuda_mlt_a
procedure, pass(z) :: mlt_a_2 => z_cuda_mlt_a_2
@@ -912,7 +912,7 @@ contains
end subroutine z_cuda_axpby_v
- subroutine z_cuda_abgdxyz(m,alpha, beta, gamma,delta,x, y, z, info)
+ subroutine z_cuda_upd_xyz(m,alpha, beta, gamma,delta,x, y, z, info)
use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
@@ -946,7 +946,7 @@ contains
if ((nx
Date: Thu, 8 Aug 2024 15:08:45 +0200
Subject: [PATCH 31/86] Fix missing method in oacc_ell
---
openacc/psb_c_oacc_ell_mat_mod.F90 | 12 ++++++++++++
openacc/psb_d_oacc_ell_mat_mod.F90 | 12 ++++++++++++
openacc/psb_s_oacc_ell_mat_mod.F90 | 12 ++++++++++++
openacc/psb_z_oacc_ell_mat_mod.F90 | 12 ++++++++++++
4 files changed, 48 insertions(+)
diff --git a/openacc/psb_c_oacc_ell_mat_mod.F90 b/openacc/psb_c_oacc_ell_mat_mod.F90
index 102d41c5..5e5dc302 100644
--- a/openacc/psb_c_oacc_ell_mat_mod.F90
+++ b/openacc/psb_c_oacc_ell_mat_mod.F90
@@ -303,6 +303,18 @@ contains
call tmpa%set_sync()
end subroutine c_oacc_ell_sync
+ subroutine c_oacc_ell_to_dev_scalar(v)
+ implicit none
+ complex(psb_spk_), intent(in) :: v(:)
+ !$acc update device(v)
+ end subroutine c_oacc_ell_to_dev_scalar
+
+ subroutine c_oacc_ell_to_dev(v)
+ implicit none
+ complex(psb_spk_), intent(in) :: v(:,:)
+ !$acc update device(v)
+ end subroutine c_oacc_ell_to_dev
+
subroutine c_oacc_ell_to_host(v)
implicit none
complex(psb_spk_), intent(in) :: v(:,:)
diff --git a/openacc/psb_d_oacc_ell_mat_mod.F90 b/openacc/psb_d_oacc_ell_mat_mod.F90
index 042c0ff3..962ad2db 100644
--- a/openacc/psb_d_oacc_ell_mat_mod.F90
+++ b/openacc/psb_d_oacc_ell_mat_mod.F90
@@ -303,6 +303,18 @@ contains
call tmpa%set_sync()
end subroutine d_oacc_ell_sync
+ subroutine d_oacc_ell_to_dev_scalar(v)
+ implicit none
+ real(psb_dpk_), intent(in) :: v(:)
+ !$acc update device(v)
+ end subroutine d_oacc_ell_to_dev_scalar
+
+ subroutine d_oacc_ell_to_dev(v)
+ implicit none
+ real(psb_dpk_), intent(in) :: v(:,:)
+ !$acc update device(v)
+ end subroutine d_oacc_ell_to_dev
+
subroutine d_oacc_ell_to_host(v)
implicit none
real(psb_dpk_), intent(in) :: v(:,:)
diff --git a/openacc/psb_s_oacc_ell_mat_mod.F90 b/openacc/psb_s_oacc_ell_mat_mod.F90
index 541fdf9a..9924ba77 100644
--- a/openacc/psb_s_oacc_ell_mat_mod.F90
+++ b/openacc/psb_s_oacc_ell_mat_mod.F90
@@ -303,6 +303,18 @@ contains
call tmpa%set_sync()
end subroutine s_oacc_ell_sync
+ subroutine s_oacc_ell_to_dev_scalar(v)
+ implicit none
+ real(psb_spk_), intent(in) :: v(:)
+ !$acc update device(v)
+ end subroutine s_oacc_ell_to_dev_scalar
+
+ subroutine s_oacc_ell_to_dev(v)
+ implicit none
+ real(psb_spk_), intent(in) :: v(:,:)
+ !$acc update device(v)
+ end subroutine s_oacc_ell_to_dev
+
subroutine s_oacc_ell_to_host(v)
implicit none
real(psb_spk_), intent(in) :: v(:,:)
diff --git a/openacc/psb_z_oacc_ell_mat_mod.F90 b/openacc/psb_z_oacc_ell_mat_mod.F90
index 8bf8c9fa..eb8884d6 100644
--- a/openacc/psb_z_oacc_ell_mat_mod.F90
+++ b/openacc/psb_z_oacc_ell_mat_mod.F90
@@ -303,6 +303,18 @@ contains
call tmpa%set_sync()
end subroutine z_oacc_ell_sync
+ subroutine z_oacc_ell_to_dev_scalar(v)
+ implicit none
+ complex(psb_dpk_), intent(in) :: v(:)
+ !$acc update device(v)
+ end subroutine z_oacc_ell_to_dev_scalar
+
+ subroutine z_oacc_ell_to_dev(v)
+ implicit none
+ complex(psb_dpk_), intent(in) :: v(:,:)
+ !$acc update device(v)
+ end subroutine z_oacc_ell_to_dev
+
subroutine z_oacc_ell_to_host(v)
implicit none
complex(psb_dpk_), intent(in) :: v(:,:)
From fa5e7ff9455730ec6e5a07ad5fa6a9018eeac6a3 Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Tue, 20 Aug 2024 19:38:34 +0200
Subject: [PATCH 32/86] Fixes for vector methods and sync()
---
openacc/impl/psb_c_oacc_mlt_v.f90 | 29 ++-
openacc/impl/psb_c_oacc_mlt_v_2.f90 | 98 +++++-----
openacc/impl/psb_d_oacc_mlt_v.f90 | 29 ++-
openacc/impl/psb_d_oacc_mlt_v_2.f90 | 98 +++++-----
openacc/impl/psb_s_oacc_mlt_v.f90 | 29 ++-
openacc/impl/psb_s_oacc_mlt_v_2.f90 | 98 +++++-----
openacc/impl/psb_z_oacc_mlt_v.f90 | 29 ++-
openacc/impl/psb_z_oacc_mlt_v_2.f90 | 98 +++++-----
openacc/psb_c_oacc_vect_mod.F90 | 279 ++++++++++++++++++----------
openacc/psb_d_oacc_vect_mod.F90 | 279 ++++++++++++++++++----------
openacc/psb_i_oacc_vect_mod.F90 | 54 +++---
openacc/psb_l_oacc_vect_mod.F90 | 54 +++---
openacc/psb_s_oacc_vect_mod.F90 | 279 ++++++++++++++++++----------
openacc/psb_z_oacc_vect_mod.F90 | 279 ++++++++++++++++++----------
14 files changed, 1082 insertions(+), 650 deletions(-)
diff --git a/openacc/impl/psb_c_oacc_mlt_v.f90 b/openacc/impl/psb_c_oacc_mlt_v.f90
index 66c4e865..a366543a 100644
--- a/openacc/impl/psb_c_oacc_mlt_v.f90
+++ b/openacc/impl/psb_c_oacc_mlt_v.f90
@@ -1,6 +1,6 @@
-subroutine c_oacc_mlt_v(x, y, info)
- use psb_c_oacc_vect_mod, psb_protect_name => c_oacc_mlt_v
+subroutine psb_c_oacc_mlt_v(x, y, info)
+ use psb_c_oacc_vect_mod, psb_protect_name => psb_c_oacc_mlt_v
implicit none
class(psb_c_base_vect_type), intent(inout) :: x
@@ -9,16 +9,19 @@ subroutine c_oacc_mlt_v(x, y, info)
integer(psb_ipk_) :: i, n
+ info = 0
+ n = min(x%get_nrows(), y%get_nrows())
info = 0
n = min(x%get_nrows(), y%get_nrows())
select type(xx => x)
class is (psb_c_vect_oacc)
if (y%is_host()) call y%sync()
if (xx%is_host()) call xx%sync()
- !$acc parallel loop
- do i = 1, n
- y%v(i) = y%v(i) * xx%v(i)
- end do
+ call c_inner_oacc_mlt_v(n,xx%v, y%v)
+!!$ !$acc parallel loop
+!!$ do i = 1, n
+!!$ y%v(i) = y%v(i) * xx%v(i)
+!!$ end do
call y%set_dev()
class default
if (xx%is_dev()) call xx%sync()
@@ -28,4 +31,16 @@ subroutine c_oacc_mlt_v(x, y, info)
end do
call y%set_host()
end select
-end subroutine c_oacc_mlt_v
+contains
+ subroutine c_inner_oacc_mlt_v(n,x, y)
+ implicit none
+ integer(psb_ipk_), intent(in) :: n
+ complex(psb_spk_), intent(inout) :: x(:), y(:)
+
+ integer(psb_ipk_) :: i
+ !$acc parallel loop present(x,y)
+ do i = 1, n
+ y(i) = (x(i)) * (y(i))
+ end do
+ end subroutine c_inner_oacc_mlt_v
+end subroutine psb_c_oacc_mlt_v
diff --git a/openacc/impl/psb_c_oacc_mlt_v_2.f90 b/openacc/impl/psb_c_oacc_mlt_v_2.f90
index a6bb6cc5..f7bceae7 100644
--- a/openacc/impl/psb_c_oacc_mlt_v_2.f90
+++ b/openacc/impl/psb_c_oacc_mlt_v_2.f90
@@ -1,5 +1,5 @@
-subroutine c_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
- use psb_c_oacc_vect_mod, psb_protect_name => c_oacc_mlt_v_2
+subroutine psb_c_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
+ use psb_c_oacc_vect_mod, psb_protect_name => psb_c_oacc_mlt_v_2
use psb_string_mod
implicit none
complex(psb_spk_), intent(in) :: alpha, beta
@@ -25,33 +25,13 @@ subroutine c_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
if (xx%is_host()) call xx%sync()
if (yy%is_host()) call yy%sync()
if ((beta /= czero) .and. (z%is_host())) call z%sync()
- if (conjgx_.and.conjgy_) then
- !$acc parallel loop
- do i = 1, n
- z%v(i) = alpha * conjg(xx%v(i)) * conjg(yy%v(i)) + beta * z%v(i)
- end do
- else if (conjgx_.and.(.not.conjgy_)) then
- !$acc parallel loop
- do i = 1, n
- z%v(i) = alpha * conjg(xx%v(i)) * (yy%v(i)) + beta * z%v(i)
- end do
- else if ((.not.conjgx_).and.(conjgy_)) then
- !$acc parallel loop
- do i = 1, n
- z%v(i) = alpha * (xx%v(i)) * conjg(yy%v(i)) + beta * z%v(i)
- end do
- else
- !$acc parallel loop
- do i = 1, n
- z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
- end do
-
- end if
+ call c_inner_oacc_mlt_v_2(n,alpha, xx%v, yy%v, beta, z%v, info, conjgx_, conjgy_)
call z%set_dev()
class default
if (xx%is_dev()) call xx%sync()
if (yy%is_dev()) call yy%sync()
if ((beta /= czero) .and. (z%is_dev())) call z%sync()
+ !call c_inner_oacc_mlt_v_2(n,alpha, xx%v, yy%v, beta, z%v, info, conjgx_, conjgy_)
if (conjgx_.and.conjgy_) then
do i = 1, n
z%v(i) = alpha * conjg(xx%v(i)) * conjg(yy%v(i)) + beta * z%v(i)
@@ -67,7 +47,7 @@ subroutine c_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
else
do i = 1, n
z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
- end do
+ end do
end if
call z%set_host()
end select
@@ -75,24 +55,56 @@ subroutine c_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
if ((beta /= czero) .and. (z%is_dev())) call z%sync()
- if (conjgx_.and.conjgy_) then
- do i = 1, n
- z%v(i) = alpha * conjg(x%v(i)) * conjg(y%v(i)) + beta * z%v(i)
- end do
- else if (conjgx_.and.(.not.conjgy_)) then
- do i = 1, n
- z%v(i) = alpha * conjg(x%v(i)) * (y%v(i)) + beta * z%v(i)
- end do
- else if ((.not.conjgx_).and.(conjgy_)) then
- do i = 1, n
- z%v(i) = alpha * (x%v(i)) * conjg(y%v(i)) + beta * z%v(i)
- end do
- else
- do i = 1, n
- z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i)
- end do
- end if
+ if (conjgx_.and.conjgy_) then
+ do i = 1, n
+ z%v(i) = alpha * conjg(x%v(i)) * conjg(y%v(i)) + beta * z%v(i)
+ end do
+ else if (conjgx_.and.(.not.conjgy_)) then
+ do i = 1, n
+ z%v(i) = alpha * conjg(x%v(i)) * (y%v(i)) + beta * z%v(i)
+ end do
+ else if ((.not.conjgx_).and.(conjgy_)) then
+ do i = 1, n
+ z%v(i) = alpha * (x%v(i)) * conjg(y%v(i)) + beta * z%v(i)
+ end do
+ else
+ do i = 1, n
+ z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i)
+ end do
+ end if
call z%set_host()
end select
-end subroutine c_oacc_mlt_v_2
+contains
+ subroutine c_inner_oacc_mlt_v_2(n,alpha, x, y, beta, z, info, conjgx, conjgy)
+ implicit none
+ integer(psb_ipk_), intent(in) :: n
+complex(psb_spk_), intent(in) :: alpha, beta
+complex(psb_spk_), intent(inout) :: x(:), y(:), z(:)
+ integer(psb_ipk_), intent(out) :: info
+ logical, intent(in) :: conjgx, conjgy
+
+ integer(psb_ipk_) :: i
+ if (conjgx.and.conjgy) then
+ !$acc parallel loop present(x,y,z)
+ do i = 1, n
+ z(i) = alpha * conjg(x(i)) * conjg(y(i)) + beta * z(i)
+ end do
+ else if (conjgx.and.(.not.conjgy)) then
+ !$acc parallel loop present(x,y,z)
+ do i = 1, n
+ z(i) = alpha * conjg(x(i)) * (y(i)) + beta * z(i)
+ end do
+ else if ((.not.conjgx).and.(conjgy)) then
+ !$acc parallel loop present(x,y,z)
+ do i = 1, n
+ z(i) = alpha * (x(i)) * conjg(y(i)) + beta * z(i)
+ end do
+ else
+ !$acc parallel loop present(x,y,z)
+ do i = 1, n
+ z(i) = alpha * (x(i)) * (y(i)) + beta * z(i)
+ end do
+ end if
+ end subroutine c_inner_oacc_mlt_v_2
+end subroutine psb_c_oacc_mlt_v_2
diff --git a/openacc/impl/psb_d_oacc_mlt_v.f90 b/openacc/impl/psb_d_oacc_mlt_v.f90
index bedd0247..dac62a65 100644
--- a/openacc/impl/psb_d_oacc_mlt_v.f90
+++ b/openacc/impl/psb_d_oacc_mlt_v.f90
@@ -1,6 +1,6 @@
-subroutine d_oacc_mlt_v(x, y, info)
- use psb_d_oacc_vect_mod, psb_protect_name => d_oacc_mlt_v
+subroutine psb_d_oacc_mlt_v(x, y, info)
+ use psb_d_oacc_vect_mod, psb_protect_name => psb_d_oacc_mlt_v
implicit none
class(psb_d_base_vect_type), intent(inout) :: x
@@ -9,16 +9,19 @@ subroutine d_oacc_mlt_v(x, y, info)
integer(psb_ipk_) :: i, n
+ info = 0
+ n = min(x%get_nrows(), y%get_nrows())
info = 0
n = min(x%get_nrows(), y%get_nrows())
select type(xx => x)
class is (psb_d_vect_oacc)
if (y%is_host()) call y%sync()
if (xx%is_host()) call xx%sync()
- !$acc parallel loop
- do i = 1, n
- y%v(i) = y%v(i) * xx%v(i)
- end do
+ call d_inner_oacc_mlt_v(n,xx%v, y%v)
+!!$ !$acc parallel loop
+!!$ do i = 1, n
+!!$ y%v(i) = y%v(i) * xx%v(i)
+!!$ end do
call y%set_dev()
class default
if (xx%is_dev()) call xx%sync()
@@ -28,4 +31,16 @@ subroutine d_oacc_mlt_v(x, y, info)
end do
call y%set_host()
end select
-end subroutine d_oacc_mlt_v
+contains
+ subroutine d_inner_oacc_mlt_v(n,x, y)
+ implicit none
+ integer(psb_ipk_), intent(in) :: n
+ real(psb_dpk_), intent(inout) :: x(:), y(:)
+
+ integer(psb_ipk_) :: i
+ !$acc parallel loop present(x,y)
+ do i = 1, n
+ y(i) = (x(i)) * (y(i))
+ end do
+ end subroutine d_inner_oacc_mlt_v
+end subroutine psb_d_oacc_mlt_v
diff --git a/openacc/impl/psb_d_oacc_mlt_v_2.f90 b/openacc/impl/psb_d_oacc_mlt_v_2.f90
index e7dd604f..3f3a457d 100644
--- a/openacc/impl/psb_d_oacc_mlt_v_2.f90
+++ b/openacc/impl/psb_d_oacc_mlt_v_2.f90
@@ -1,5 +1,5 @@
-subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
- use psb_d_oacc_vect_mod, psb_protect_name => d_oacc_mlt_v_2
+subroutine psb_d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
+ use psb_d_oacc_vect_mod, psb_protect_name => psb_d_oacc_mlt_v_2
use psb_string_mod
implicit none
real(psb_dpk_), intent(in) :: alpha, beta
@@ -25,33 +25,13 @@ subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
if (xx%is_host()) call xx%sync()
if (yy%is_host()) call yy%sync()
if ((beta /= dzero) .and. (z%is_host())) call z%sync()
- if (conjgx_.and.conjgy_) then
- !$acc parallel loop
- do i = 1, n
- z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
- end do
- else if (conjgx_.and.(.not.conjgy_)) then
- !$acc parallel loop
- do i = 1, n
- z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
- end do
- else if ((.not.conjgx_).and.(conjgy_)) then
- !$acc parallel loop
- do i = 1, n
- z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
- end do
- else
- !$acc parallel loop
- do i = 1, n
- z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
- end do
-
- end if
+ call d_inner_oacc_mlt_v_2(n,alpha, xx%v, yy%v, beta, z%v, info, conjgx_, conjgy_)
call z%set_dev()
class default
if (xx%is_dev()) call xx%sync()
if (yy%is_dev()) call yy%sync()
if ((beta /= dzero) .and. (z%is_dev())) call z%sync()
+ !call d_inner_oacc_mlt_v_2(n,alpha, xx%v, yy%v, beta, z%v, info, conjgx_, conjgy_)
if (conjgx_.and.conjgy_) then
do i = 1, n
z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
@@ -67,7 +47,7 @@ subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
else
do i = 1, n
z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
- end do
+ end do
end if
call z%set_host()
end select
@@ -75,24 +55,56 @@ subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
if ((beta /= dzero) .and. (z%is_dev())) call z%sync()
- if (conjgx_.and.conjgy_) then
- do i = 1, n
- z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i)
- end do
- else if (conjgx_.and.(.not.conjgy_)) then
- do i = 1, n
- z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i)
- end do
- else if ((.not.conjgx_).and.(conjgy_)) then
- do i = 1, n
- z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i)
- end do
- else
- do i = 1, n
- z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i)
- end do
- end if
+ if (conjgx_.and.conjgy_) then
+ do i = 1, n
+ z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i)
+ end do
+ else if (conjgx_.and.(.not.conjgy_)) then
+ do i = 1, n
+ z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i)
+ end do
+ else if ((.not.conjgx_).and.(conjgy_)) then
+ do i = 1, n
+ z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i)
+ end do
+ else
+ do i = 1, n
+ z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i)
+ end do
+ end if
call z%set_host()
end select
-end subroutine d_oacc_mlt_v_2
+contains
+ subroutine d_inner_oacc_mlt_v_2(n,alpha, x, y, beta, z, info, conjgx, conjgy)
+ implicit none
+ integer(psb_ipk_), intent(in) :: n
+real(psb_dpk_), intent(in) :: alpha, beta
+real(psb_dpk_), intent(inout) :: x(:), y(:), z(:)
+ integer(psb_ipk_), intent(out) :: info
+ logical, intent(in) :: conjgx, conjgy
+
+ integer(psb_ipk_) :: i
+ if (conjgx.and.conjgy) then
+ !$acc parallel loop present(x,y,z)
+ do i = 1, n
+ z(i) = alpha * (x(i)) * (y(i)) + beta * z(i)
+ end do
+ else if (conjgx.and.(.not.conjgy)) then
+ !$acc parallel loop present(x,y,z)
+ do i = 1, n
+ z(i) = alpha * (x(i)) * (y(i)) + beta * z(i)
+ end do
+ else if ((.not.conjgx).and.(conjgy)) then
+ !$acc parallel loop present(x,y,z)
+ do i = 1, n
+ z(i) = alpha * (x(i)) * (y(i)) + beta * z(i)
+ end do
+ else
+ !$acc parallel loop present(x,y,z)
+ do i = 1, n
+ z(i) = alpha * (x(i)) * (y(i)) + beta * z(i)
+ end do
+ end if
+ end subroutine d_inner_oacc_mlt_v_2
+end subroutine psb_d_oacc_mlt_v_2
diff --git a/openacc/impl/psb_s_oacc_mlt_v.f90 b/openacc/impl/psb_s_oacc_mlt_v.f90
index fb043cf2..61a1d152 100644
--- a/openacc/impl/psb_s_oacc_mlt_v.f90
+++ b/openacc/impl/psb_s_oacc_mlt_v.f90
@@ -1,6 +1,6 @@
-subroutine s_oacc_mlt_v(x, y, info)
- use psb_s_oacc_vect_mod, psb_protect_name => s_oacc_mlt_v
+subroutine psb_s_oacc_mlt_v(x, y, info)
+ use psb_s_oacc_vect_mod, psb_protect_name => psb_s_oacc_mlt_v
implicit none
class(psb_s_base_vect_type), intent(inout) :: x
@@ -9,16 +9,19 @@ subroutine s_oacc_mlt_v(x, y, info)
integer(psb_ipk_) :: i, n
+ info = 0
+ n = min(x%get_nrows(), y%get_nrows())
info = 0
n = min(x%get_nrows(), y%get_nrows())
select type(xx => x)
class is (psb_s_vect_oacc)
if (y%is_host()) call y%sync()
if (xx%is_host()) call xx%sync()
- !$acc parallel loop
- do i = 1, n
- y%v(i) = y%v(i) * xx%v(i)
- end do
+ call s_inner_oacc_mlt_v(n,xx%v, y%v)
+!!$ !$acc parallel loop
+!!$ do i = 1, n
+!!$ y%v(i) = y%v(i) * xx%v(i)
+!!$ end do
call y%set_dev()
class default
if (xx%is_dev()) call xx%sync()
@@ -28,4 +31,16 @@ subroutine s_oacc_mlt_v(x, y, info)
end do
call y%set_host()
end select
-end subroutine s_oacc_mlt_v
+contains
+ subroutine s_inner_oacc_mlt_v(n,x, y)
+ implicit none
+ integer(psb_ipk_), intent(in) :: n
+ real(psb_spk_), intent(inout) :: x(:), y(:)
+
+ integer(psb_ipk_) :: i
+ !$acc parallel loop present(x,y)
+ do i = 1, n
+ y(i) = (x(i)) * (y(i))
+ end do
+ end subroutine s_inner_oacc_mlt_v
+end subroutine psb_s_oacc_mlt_v
diff --git a/openacc/impl/psb_s_oacc_mlt_v_2.f90 b/openacc/impl/psb_s_oacc_mlt_v_2.f90
index 04ee8e09..bcaebfbe 100644
--- a/openacc/impl/psb_s_oacc_mlt_v_2.f90
+++ b/openacc/impl/psb_s_oacc_mlt_v_2.f90
@@ -1,5 +1,5 @@
-subroutine s_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
- use psb_s_oacc_vect_mod, psb_protect_name => s_oacc_mlt_v_2
+subroutine psb_s_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
+ use psb_s_oacc_vect_mod, psb_protect_name => psb_s_oacc_mlt_v_2
use psb_string_mod
implicit none
real(psb_spk_), intent(in) :: alpha, beta
@@ -25,33 +25,13 @@ subroutine s_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
if (xx%is_host()) call xx%sync()
if (yy%is_host()) call yy%sync()
if ((beta /= szero) .and. (z%is_host())) call z%sync()
- if (conjgx_.and.conjgy_) then
- !$acc parallel loop
- do i = 1, n
- z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
- end do
- else if (conjgx_.and.(.not.conjgy_)) then
- !$acc parallel loop
- do i = 1, n
- z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
- end do
- else if ((.not.conjgx_).and.(conjgy_)) then
- !$acc parallel loop
- do i = 1, n
- z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
- end do
- else
- !$acc parallel loop
- do i = 1, n
- z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
- end do
-
- end if
+ call s_inner_oacc_mlt_v_2(n,alpha, xx%v, yy%v, beta, z%v, info, conjgx_, conjgy_)
call z%set_dev()
class default
if (xx%is_dev()) call xx%sync()
if (yy%is_dev()) call yy%sync()
if ((beta /= szero) .and. (z%is_dev())) call z%sync()
+ !call s_inner_oacc_mlt_v_2(n,alpha, xx%v, yy%v, beta, z%v, info, conjgx_, conjgy_)
if (conjgx_.and.conjgy_) then
do i = 1, n
z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
@@ -67,7 +47,7 @@ subroutine s_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
else
do i = 1, n
z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
- end do
+ end do
end if
call z%set_host()
end select
@@ -75,24 +55,56 @@ subroutine s_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
if ((beta /= szero) .and. (z%is_dev())) call z%sync()
- if (conjgx_.and.conjgy_) then
- do i = 1, n
- z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i)
- end do
- else if (conjgx_.and.(.not.conjgy_)) then
- do i = 1, n
- z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i)
- end do
- else if ((.not.conjgx_).and.(conjgy_)) then
- do i = 1, n
- z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i)
- end do
- else
- do i = 1, n
- z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i)
- end do
- end if
+ if (conjgx_.and.conjgy_) then
+ do i = 1, n
+ z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i)
+ end do
+ else if (conjgx_.and.(.not.conjgy_)) then
+ do i = 1, n
+ z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i)
+ end do
+ else if ((.not.conjgx_).and.(conjgy_)) then
+ do i = 1, n
+ z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i)
+ end do
+ else
+ do i = 1, n
+ z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i)
+ end do
+ end if
call z%set_host()
end select
-end subroutine s_oacc_mlt_v_2
+contains
+ subroutine s_inner_oacc_mlt_v_2(n,alpha, x, y, beta, z, info, conjgx, conjgy)
+ implicit none
+ integer(psb_ipk_), intent(in) :: n
+real(psb_spk_), intent(in) :: alpha, beta
+real(psb_spk_), intent(inout) :: x(:), y(:), z(:)
+ integer(psb_ipk_), intent(out) :: info
+ logical, intent(in) :: conjgx, conjgy
+
+ integer(psb_ipk_) :: i
+ if (conjgx.and.conjgy) then
+ !$acc parallel loop present(x,y,z)
+ do i = 1, n
+ z(i) = alpha * (x(i)) * (y(i)) + beta * z(i)
+ end do
+ else if (conjgx.and.(.not.conjgy)) then
+ !$acc parallel loop present(x,y,z)
+ do i = 1, n
+ z(i) = alpha * (x(i)) * (y(i)) + beta * z(i)
+ end do
+ else if ((.not.conjgx).and.(conjgy)) then
+ !$acc parallel loop present(x,y,z)
+ do i = 1, n
+ z(i) = alpha * (x(i)) * (y(i)) + beta * z(i)
+ end do
+ else
+ !$acc parallel loop present(x,y,z)
+ do i = 1, n
+ z(i) = alpha * (x(i)) * (y(i)) + beta * z(i)
+ end do
+ end if
+ end subroutine s_inner_oacc_mlt_v_2
+end subroutine psb_s_oacc_mlt_v_2
diff --git a/openacc/impl/psb_z_oacc_mlt_v.f90 b/openacc/impl/psb_z_oacc_mlt_v.f90
index 7018f009..4bc582d2 100644
--- a/openacc/impl/psb_z_oacc_mlt_v.f90
+++ b/openacc/impl/psb_z_oacc_mlt_v.f90
@@ -1,6 +1,6 @@
-subroutine z_oacc_mlt_v(x, y, info)
- use psb_z_oacc_vect_mod, psb_protect_name => z_oacc_mlt_v
+subroutine psb_z_oacc_mlt_v(x, y, info)
+ use psb_z_oacc_vect_mod, psb_protect_name => psb_z_oacc_mlt_v
implicit none
class(psb_z_base_vect_type), intent(inout) :: x
@@ -9,16 +9,19 @@ subroutine z_oacc_mlt_v(x, y, info)
integer(psb_ipk_) :: i, n
+ info = 0
+ n = min(x%get_nrows(), y%get_nrows())
info = 0
n = min(x%get_nrows(), y%get_nrows())
select type(xx => x)
class is (psb_z_vect_oacc)
if (y%is_host()) call y%sync()
if (xx%is_host()) call xx%sync()
- !$acc parallel loop
- do i = 1, n
- y%v(i) = y%v(i) * xx%v(i)
- end do
+ call z_inner_oacc_mlt_v(n,xx%v, y%v)
+!!$ !$acc parallel loop
+!!$ do i = 1, n
+!!$ y%v(i) = y%v(i) * xx%v(i)
+!!$ end do
call y%set_dev()
class default
if (xx%is_dev()) call xx%sync()
@@ -28,4 +31,16 @@ subroutine z_oacc_mlt_v(x, y, info)
end do
call y%set_host()
end select
-end subroutine z_oacc_mlt_v
+contains
+ subroutine z_inner_oacc_mlt_v(n,x, y)
+ implicit none
+ integer(psb_ipk_), intent(in) :: n
+ complex(psb_dpk_), intent(inout) :: x(:), y(:)
+
+ integer(psb_ipk_) :: i
+ !$acc parallel loop present(x,y)
+ do i = 1, n
+ y(i) = (x(i)) * (y(i))
+ end do
+ end subroutine z_inner_oacc_mlt_v
+end subroutine psb_z_oacc_mlt_v
diff --git a/openacc/impl/psb_z_oacc_mlt_v_2.f90 b/openacc/impl/psb_z_oacc_mlt_v_2.f90
index dbc0929c..337a0a96 100644
--- a/openacc/impl/psb_z_oacc_mlt_v_2.f90
+++ b/openacc/impl/psb_z_oacc_mlt_v_2.f90
@@ -1,5 +1,5 @@
-subroutine z_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
- use psb_z_oacc_vect_mod, psb_protect_name => z_oacc_mlt_v_2
+subroutine psb_z_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
+ use psb_z_oacc_vect_mod, psb_protect_name => psb_z_oacc_mlt_v_2
use psb_string_mod
implicit none
complex(psb_dpk_), intent(in) :: alpha, beta
@@ -25,33 +25,13 @@ subroutine z_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
if (xx%is_host()) call xx%sync()
if (yy%is_host()) call yy%sync()
if ((beta /= zzero) .and. (z%is_host())) call z%sync()
- if (conjgx_.and.conjgy_) then
- !$acc parallel loop
- do i = 1, n
- z%v(i) = alpha * conjg(xx%v(i)) * conjg(yy%v(i)) + beta * z%v(i)
- end do
- else if (conjgx_.and.(.not.conjgy_)) then
- !$acc parallel loop
- do i = 1, n
- z%v(i) = alpha * conjg(xx%v(i)) * (yy%v(i)) + beta * z%v(i)
- end do
- else if ((.not.conjgx_).and.(conjgy_)) then
- !$acc parallel loop
- do i = 1, n
- z%v(i) = alpha * (xx%v(i)) * conjg(yy%v(i)) + beta * z%v(i)
- end do
- else
- !$acc parallel loop
- do i = 1, n
- z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
- end do
-
- end if
+ call z_inner_oacc_mlt_v_2(n,alpha, xx%v, yy%v, beta, z%v, info, conjgx_, conjgy_)
call z%set_dev()
class default
if (xx%is_dev()) call xx%sync()
if (yy%is_dev()) call yy%sync()
if ((beta /= zzero) .and. (z%is_dev())) call z%sync()
+ !call z_inner_oacc_mlt_v_2(n,alpha, xx%v, yy%v, beta, z%v, info, conjgx_, conjgy_)
if (conjgx_.and.conjgy_) then
do i = 1, n
z%v(i) = alpha * conjg(xx%v(i)) * conjg(yy%v(i)) + beta * z%v(i)
@@ -67,7 +47,7 @@ subroutine z_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
else
do i = 1, n
z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
- end do
+ end do
end if
call z%set_host()
end select
@@ -75,24 +55,56 @@ subroutine z_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
if ((beta /= zzero) .and. (z%is_dev())) call z%sync()
- if (conjgx_.and.conjgy_) then
- do i = 1, n
- z%v(i) = alpha * conjg(x%v(i)) * conjg(y%v(i)) + beta * z%v(i)
- end do
- else if (conjgx_.and.(.not.conjgy_)) then
- do i = 1, n
- z%v(i) = alpha * conjg(x%v(i)) * (y%v(i)) + beta * z%v(i)
- end do
- else if ((.not.conjgx_).and.(conjgy_)) then
- do i = 1, n
- z%v(i) = alpha * (x%v(i)) * conjg(y%v(i)) + beta * z%v(i)
- end do
- else
- do i = 1, n
- z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i)
- end do
- end if
+ if (conjgx_.and.conjgy_) then
+ do i = 1, n
+ z%v(i) = alpha * conjg(x%v(i)) * conjg(y%v(i)) + beta * z%v(i)
+ end do
+ else if (conjgx_.and.(.not.conjgy_)) then
+ do i = 1, n
+ z%v(i) = alpha * conjg(x%v(i)) * (y%v(i)) + beta * z%v(i)
+ end do
+ else if ((.not.conjgx_).and.(conjgy_)) then
+ do i = 1, n
+ z%v(i) = alpha * (x%v(i)) * conjg(y%v(i)) + beta * z%v(i)
+ end do
+ else
+ do i = 1, n
+ z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i)
+ end do
+ end if
call z%set_host()
end select
-end subroutine z_oacc_mlt_v_2
+contains
+ subroutine z_inner_oacc_mlt_v_2(n,alpha, x, y, beta, z, info, conjgx, conjgy)
+ implicit none
+ integer(psb_ipk_), intent(in) :: n
+complex(psb_dpk_), intent(in) :: alpha, beta
+complex(psb_dpk_), intent(inout) :: x(:), y(:), z(:)
+ integer(psb_ipk_), intent(out) :: info
+ logical, intent(in) :: conjgx, conjgy
+
+ integer(psb_ipk_) :: i
+ if (conjgx.and.conjgy) then
+ !$acc parallel loop present(x,y,z)
+ do i = 1, n
+ z(i) = alpha * conjg(x(i)) * conjg(y(i)) + beta * z(i)
+ end do
+ else if (conjgx.and.(.not.conjgy)) then
+ !$acc parallel loop present(x,y,z)
+ do i = 1, n
+ z(i) = alpha * conjg(x(i)) * (y(i)) + beta * z(i)
+ end do
+ else if ((.not.conjgx).and.(conjgy)) then
+ !$acc parallel loop present(x,y,z)
+ do i = 1, n
+ z(i) = alpha * (x(i)) * conjg(y(i)) + beta * z(i)
+ end do
+ else
+ !$acc parallel loop present(x,y,z)
+ do i = 1, n
+ z(i) = alpha * (x(i)) * (y(i)) + beta * z(i)
+ end do
+ end if
+ end subroutine z_inner_oacc_mlt_v_2
+end subroutine psb_z_oacc_mlt_v_2
diff --git a/openacc/psb_c_oacc_vect_mod.F90 b/openacc/psb_c_oacc_vect_mod.F90
index fc501e04..7362ba0e 100644
--- a/openacc/psb_c_oacc_vect_mod.F90
+++ b/openacc/psb_c_oacc_vect_mod.F90
@@ -1,5 +1,6 @@
module psb_c_oacc_vect_mod
use iso_c_binding
+ use openacc
use psb_const_mod
use psb_error_mod
use psb_c_vect_mod
@@ -50,8 +51,8 @@ module psb_c_oacc_vect_mod
procedure, pass(z) :: upd_xyz => c_oacc_upd_xyz
procedure, pass(y) :: mlt_a => c_oacc_mlt_a
procedure, pass(z) :: mlt_a_2 => c_oacc_mlt_a_2
- procedure, pass(y) :: mlt_v => c_oacc_mlt_v
- procedure, pass(z) :: mlt_v_2 => c_oacc_mlt_v_2
+ procedure, pass(y) :: mlt_v => psb_c_oacc_mlt_v
+ procedure, pass(z) :: mlt_v_2 => psb_c_oacc_mlt_v_2
procedure, pass(x) :: scal => c_oacc_scal
procedure, pass(x) :: nrm2 => c_oacc_nrm2
procedure, pass(x) :: amax => c_oacc_amax
@@ -62,17 +63,17 @@ module psb_c_oacc_vect_mod
end type psb_c_vect_oacc
interface
- subroutine c_oacc_mlt_v(x, y, info)
+ subroutine psb_c_oacc_mlt_v(x, y, info)
import
implicit none
class(psb_c_base_vect_type), intent(inout) :: x
class(psb_c_vect_oacc), intent(inout) :: y
integer(psb_ipk_), intent(out) :: info
- end subroutine c_oacc_mlt_v
+ end subroutine psb_c_oacc_mlt_v
end interface
interface
- subroutine c_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
+ subroutine psb_c_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
import
implicit none
complex(psb_spk_), intent(in) :: alpha, beta
@@ -81,7 +82,7 @@ module psb_c_oacc_vect_mod
class(psb_c_vect_oacc), intent(inout) :: z
integer(psb_ipk_), intent(out) :: info
character(len=1), intent(in), optional :: conjgx, conjgy
- end subroutine c_oacc_mlt_v_2
+ end subroutine psb_c_oacc_mlt_v_2
end interface
contains
@@ -89,15 +90,23 @@ contains
subroutine c_oacc_absval1(x)
implicit none
class(psb_c_vect_oacc), intent(inout) :: x
- integer(psb_ipk_) :: n, i
+ integer(psb_ipk_) :: n
- if (x%is_host()) call x%sync_space()
+ if (x%is_host()) call x%sync()
n = size(x%v)
- !$acc parallel loop
- do i = 1, n
- x%v(i) = abs(x%v(i))
- end do
+ call c_inner_oacc_absval1(n,x%v)
call x%set_dev()
+ contains
+ subroutine c_inner_oacc_absval1(n,x)
+ implicit none
+ complex(psb_spk_), intent(inout) :: x(:)
+ integer(psb_ipk_) :: n
+ integer(psb_ipk_) :: i
+ !$acc parallel loop
+ do i = 1, n
+ x(i) = abs(x(i))
+ end do
+ end subroutine c_inner_oacc_absval1
end subroutine c_oacc_absval1
subroutine c_oacc_absval2(x, y)
@@ -112,15 +121,23 @@ contains
class is (psb_c_vect_oacc)
if (x%is_host()) call x%sync()
if (yy%is_host()) call yy%sync()
- !$acc parallel loop
- do i = 1, n
- yy%v(i) = abs(x%v(i))
- end do
+ call c_inner_oacc_absval2(n,x%v,yy%v)
class default
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
call x%psb_c_base_vect_type%absval(y)
end select
+ contains
+ subroutine c_inner_oacc_absval2(n,x,y)
+ implicit none
+ complex(psb_spk_), intent(inout) :: x(:),y(:)
+ integer(psb_ipk_) :: n
+ integer(psb_ipk_) :: i
+ !$acc parallel loop
+ do i = 1, n
+ y(i) = abs(x(i))
+ end do
+ end subroutine c_inner_oacc_absval2
end subroutine c_oacc_absval2
subroutine c_oacc_scal(alpha, x)
@@ -128,32 +145,46 @@ contains
class(psb_c_vect_oacc), intent(inout) :: x
complex(psb_spk_), intent(in) :: alpha
integer(psb_ipk_) :: info
- integer(psb_ipk_) :: i
-
- if (x%is_host()) call x%sync_space()
- !$acc parallel loop
- do i = 1, size(x%v)
- x%v(i) = alpha * x%v(i)
- end do
+ if (x%is_host()) call x%sync()
+ call c_inner_oacc_scal(alpha, x%v)
call x%set_dev()
+ contains
+ subroutine c_inner_oacc_scal(alpha, x)
+ complex(psb_spk_), intent(in) :: alpha
+ complex(psb_spk_), intent(inout) :: x(:)
+ integer(psb_ipk_) :: i
+ !$acc parallel loop
+ do i = 1, size(x)
+ x(i) = alpha * x(i)
+ end do
+ end subroutine c_inner_oacc_scal
end subroutine c_oacc_scal
function c_oacc_nrm2(n, x) result(res)
implicit none
class(psb_c_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n
- real(psb_spk_) :: res
+ real(psb_spk_) :: res
+ real(psb_spk_) :: mx
integer(psb_ipk_) :: info
- real(psb_spk_) :: sum
- integer(psb_ipk_) :: i
- if (x%is_host()) call x%sync_space()
- sum = 0.0
- !$acc parallel loop reduction(+:sum)
- do i = 1, n
- sum = sum + abs(x%v(i))**2
- end do
- res = sqrt(sum)
+ if (x%is_host()) call x%sync()
+ mx = c_oacc_amax(n,x)
+ res = c_inner_oacc_nrm2(n, mx, x%v)
+ contains
+ function c_inner_oacc_nrm2(n, mx,x) result(res)
+ integer(psb_ipk_) :: n
+ complex(psb_spk_) :: x(:)
+ real(psb_spk_) :: mx, res
+ real(psb_spk_) :: sum
+ integer(psb_ipk_) :: i
+ sum = 0.0
+ !$acc parallel loop reduction(+:sum)
+ do i = 1, n
+ sum = sum + abs(x(i)/mx)**2
+ end do
+ res = mx*sqrt(sum)
+ end function c_inner_oacc_nrm2
end function c_oacc_nrm2
function c_oacc_amax(n, x) result(res)
@@ -162,18 +193,25 @@ contains
integer(psb_ipk_), intent(in) :: n
real(psb_spk_) :: res
integer(psb_ipk_) :: info
- real(psb_spk_) :: max_val
- integer(psb_ipk_) :: i
- if (x%is_host()) call x%sync_space()
- max_val = -huge(0.0)
- !$acc parallel loop reduction(max:max_val)
- do i = 1, n
- if (abs(x%v(i)) > max_val) max_val = abs(x%v(i))
- end do
- res = max_val
+ if (x%is_host()) call x%sync()
+ res = c_inner_oacc_amax(n, x%v)
+ contains
+ function c_inner_oacc_amax(n, x) result(res)
+ integer(psb_ipk_) :: n
+ complex(psb_spk_) :: x(:)
+ real(psb_spk_) :: res
+ real(psb_spk_) :: max_val
+ integer(psb_ipk_) :: i
+ max_val = -huge(0.0)
+ !$acc parallel loop reduction(max:max_val)
+ do i = 1, n
+ if (abs(x(i)) > max_val) max_val = abs(x(i))
+ end do
+ res = max_val
+ end function c_inner_oacc_amax
end function c_oacc_amax
-
+
function c_oacc_asum(n, x) result(res)
implicit none
class(psb_c_vect_oacc), intent(inout) :: x
@@ -182,14 +220,20 @@ contains
integer(psb_ipk_) :: info
complex(psb_spk_) :: sum
integer(psb_ipk_) :: i
-
- if (x%is_host()) call x%sync_space()
- sum = 0.0
- !$acc parallel loop reduction(+:sum)
- do i = 1, n
- sum = sum + abs(x%v(i))
- end do
- res = sum
+ if (x%is_host()) call x%sync()
+ res = c_inner_oacc_asum(n, x%v)
+ contains
+ function c_inner_oacc_asum(n, x) result(res)
+ integer(psb_ipk_) :: n
+ complex(psb_spk_) :: x(:)
+ real(psb_spk_) :: res
+ integer(psb_ipk_) :: i
+ res = 0.0
+ !$acc parallel loop reduction(+:res)
+ do i = 1, n
+ res = res + abs(x(i))
+ end do
+ end function c_inner_oacc_asum
end function c_oacc_asum
@@ -201,7 +245,7 @@ contains
integer(psb_ipk_) :: i, n
info = 0
- if (y%is_dev()) call y%sync_space()
+ if (y%is_dev()) call y%sync()
!$acc parallel loop
do i = 1, size(x)
y%v(i) = y%v(i) * x(i)
@@ -219,7 +263,7 @@ contains
integer(psb_ipk_) :: i, n
info = 0
- if (z%is_dev()) call z%sync_space()
+ if (z%is_dev()) call z%sync()
!$acc parallel loop
do i = 1, size(x)
z%v(i) = alpha * x(i) * y(i) + beta * z%v(i)
@@ -282,18 +326,18 @@ contains
!!$ class is (psb_c_vect_oacc)
!!$ select type (yy => y)
!!$ class is (psb_c_vect_oacc)
-!!$ if (xx%is_host()) call xx%sync_space()
-!!$ if (yy%is_host()) call yy%sync_space()
-!!$ if ((beta /= czero) .and. (z%is_host())) call z%sync_space()
+!!$ if (xx%is_host()) call xx%sync()
+!!$ if (yy%is_host()) call yy%sync()
+!!$ if ((beta /= czero) .and. (z%is_host())) call z%sync()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
!!$ end do
!!$ call z%set_dev()
!!$ class default
-!!$ if (xx%is_dev()) call xx%sync_space()
+!!$ if (xx%is_dev()) call xx%sync()
!!$ if (yy%is_dev()) call yy%sync()
-!!$ if ((beta /= czero) .and. (z%is_dev())) call z%sync_space()
+!!$ if ((beta /= czero) .and. (z%is_dev())) call z%sync()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
@@ -303,7 +347,7 @@ contains
!!$ class default
!!$ if (x%is_dev()) call x%sync()
!!$ if (y%is_dev()) call y%sync()
-!!$ if ((beta /= czero) .and. (z%is_dev())) call z%sync_space()
+!!$ if ((beta /= czero) .and. (z%is_dev())) call z%sync()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i)
@@ -327,23 +371,36 @@ contains
select type(xx => x)
type is (psb_c_vect_oacc)
- if ((beta /= czero) .and. y%is_host()) call y%sync_space()
- if (xx%is_host()) call xx%sync_space()
+ if ((beta /= czero) .and. y%is_host()) call y%sync()
+ if (xx%is_host()) call xx%sync()
nx = size(xx%v)
ny = size(y%v)
if ((nx < m) .or. (ny < m)) then
info = psb_err_internal_error_
else
- !$acc parallel loop
- do i = 1, m
- y%v(i) = alpha * xx%v(i) + beta * y%v(i)
- end do
+ call c_inner_oacc_axpby(m, alpha, x%v, beta, y%v, info)
end if
call y%set_dev()
class default
if ((alpha /= czero) .and. (x%is_dev())) call x%sync()
call y%axpby(m, alpha, x%v, beta, info)
- end select
+ end select
+ contains
+ subroutine c_inner_oacc_axpby(m, alpha, x, beta, y, info)
+ !use psi_serial_mod
+ implicit none
+ integer(psb_ipk_), intent(in) :: m
+ complex(psb_spk_), intent(inout) :: x(:)
+ complex(psb_spk_), intent(inout) :: y(:)
+ complex(psb_spk_), intent(in) :: alpha, beta
+ integer(psb_ipk_), intent(out) :: info
+ !$acc parallel
+ !$acc loop
+ do i = 1, m
+ y(i) = alpha * x(i) + beta * y(i)
+ end do
+ !$acc end parallel
+ end subroutine c_inner_oacc_axpby
end subroutine c_oacc_axpby_v
subroutine c_oacc_axpby_a(m, alpha, x, beta, y, info)
@@ -356,7 +413,7 @@ contains
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
- if ((beta /= czero) .and. (y%is_dev())) call y%sync_space()
+ if ((beta /= czero) .and. (y%is_dev())) call y%sync()
!$acc parallel loop
do i = 1, m
y%v(i) = alpha * x(i) + beta * y%v(i)
@@ -375,7 +432,7 @@ contains
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nx, ny, nz, i
logical :: gpu_done
-
+ write(0,*)'upd_xyz'
info = psb_success_
gpu_done = .false.
@@ -385,9 +442,9 @@ contains
class is (psb_c_vect_oacc)
select type(zz => z)
class is (psb_c_vect_oacc)
- if ((beta /= czero) .and. yy%is_host()) call yy%sync_space()
- if ((delta /= czero) .and. zz%is_host()) call zz%sync_space()
- if (xx%is_host()) call xx%sync_space()
+ if ((beta /= czero) .and. yy%is_host()) call yy%sync()
+ if ((delta /= czero) .and. zz%is_host()) call zz%sync()
+ if (xx%is_host()) call xx%sync()
nx = size(xx%v)
ny = size(yy%v)
nz = size(zz%v)
@@ -432,8 +489,8 @@ contains
select type(ii => idx)
class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space()
- if (y%is_host()) call y%sync_space()
+ if (ii%is_host()) call ii%sync()
+ if (y%is_host()) call y%sync()
!$acc parallel loop
do i = 1, n
@@ -459,13 +516,13 @@ contains
select type(ii => idx)
class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space()
+ if (ii%is_host()) call ii%sync()
class default
call psb_errpush(info, 'c_oacc_sctb_x')
return
end select
- if (y%is_host()) call y%sync_space()
+ if (y%is_host()) call y%sync()
!$acc parallel loop
do i = 1, n
@@ -486,7 +543,7 @@ contains
integer(psb_ipk_) :: i
if (n == 0) return
- if (y%is_dev()) call y%sync_space()
+ if (y%is_dev()) call y%sync()
!$acc parallel loop
do i = 1, n
@@ -512,13 +569,13 @@ contains
select type(ii => idx)
class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space()
+ if (ii%is_host()) call ii%sync()
class default
call psb_errpush(info, 'c_oacc_gthzbuf')
return
end select
- if (x%is_host()) call x%sync_space()
+ if (x%is_host()) call x%sync()
!$acc parallel loop
do i = 1, n
@@ -539,13 +596,13 @@ contains
select type(ii => idx)
class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space()
+ if (ii%is_host()) call ii%sync()
class default
call psb_errpush(info, 'c_oacc_gthzv_x')
return
end select
- if (x%is_host()) call x%sync_space()
+ if (x%is_host()) call x%sync()
!$acc parallel loop
do i = 1, n
@@ -573,9 +630,9 @@ contains
type is (psb_i_vect_oacc)
select type(vval => val)
type is (psb_c_vect_oacc)
- if (vval%is_host()) call vval%sync_space()
- if (virl%is_host()) call virl%sync_space()
- if (x%is_host()) call x%sync_space()
+ if (vval%is_host()) call vval%sync()
+ if (virl%is_host()) call virl%sync()
+ if (x%is_host()) call x%sync()
!$acc parallel loop
do i = 1, n
x%v(virl%v(i)) = vval%v(i)
@@ -588,11 +645,11 @@ contains
if (.not.done_oacc) then
select type(virl => irl)
type is (psb_i_vect_oacc)
- if (virl%is_dev()) call virl%sync_space()
+ if (virl%is_dev()) call virl%sync()
end select
select type(vval => val)
type is (psb_c_vect_oacc)
- if (vval%is_dev()) call vval%sync_space()
+ if (vval%is_dev()) call vval%sync()
end select
call x%ins(n, irl%v, val%v, dupl, info)
end if
@@ -616,7 +673,7 @@ contains
integer(psb_ipk_) :: i
info = 0
- if (x%is_dev()) call x%sync_space()
+ if (x%is_dev()) call x%sync()
call x%psb_c_base_vect_type%ins(n, irl, val, dupl, info)
call x%set_host()
!$acc update device(x%v)
@@ -635,7 +692,10 @@ contains
call psb_errpush(info, 'c_oacc_bld_mn', i_err=(/n, n, n, n, n/))
end if
call x%set_host()
- !$acc update device(x%v)
+ if (acc_is_present(x%v)) then
+ !$acc exit data delete(x%v) finalize
+ end if
+ !$acc enter data copyin(x%v)
end subroutine c_oacc_bld_mn
@@ -657,7 +717,10 @@ contains
x%v(:) = this(:)
call x%set_host()
- !$acc update device(x%v)
+ if (acc_is_present(x%v)) then
+ !$acc exit data delete(x%v) finalize
+ end if
+ !$acc enter data copyin(x%v)
end subroutine c_oacc_bld_x
@@ -676,13 +739,13 @@ contains
if (nd < n) then
call x%sync()
call x%psb_c_base_vect_type%asb(n, info)
- if (info == psb_success_) call x%sync_space()
+ if (info == psb_success_) call x%sync()
call x%set_host()
end if
else
if (size(x%v) < n) then
call x%psb_c_base_vect_type%asb(n, info)
- if (info == psb_success_) call x%sync_space()
+ if (info == psb_success_) call x%sync()
call x%set_host()
end if
end if
@@ -740,10 +803,9 @@ contains
complex(psb_spk_) :: res
complex(psb_spk_), external :: ddot
integer(psb_ipk_) :: info
- integer(psb_ipk_) :: i
res = czero
-
+ !write(0,*) 'dot_v'
select type(yy => y)
type is (psb_c_base_vect_type)
if (x%is_dev()) call x%sync()
@@ -751,18 +813,26 @@ contains
type is (psb_c_vect_oacc)
if (x%is_host()) call x%sync()
if (yy%is_host()) call yy%sync()
-
- !$acc parallel loop reduction(+:res) present(x%v, yy%v)
- do i = 1, n
- res = res + x%v(i) * yy%v(i)
- end do
- !$acc end parallel loop
-
+ res = c_inner_oacc_dot(n, x%v, yy%v)
class default
call x%sync()
res = y%dot(n, x%v)
end select
-
+ contains
+ function c_inner_oacc_dot(n, x, y) result(res)
+ implicit none
+ complex(psb_spk_), intent(in) :: x(:)
+ complex(psb_spk_), intent(in) :: y(:)
+ integer(psb_ipk_), intent(in) :: n
+ complex(psb_spk_) :: res
+ integer(psb_ipk_) :: i
+
+ !$acc parallel loop reduction(+:res) present(x, y)
+ do i = 1, n
+ res = res + x(i) * y(i)
+ end do
+ !$acc end parallel loop
+ end function c_inner_oacc_dot
end function c_oacc_vect_dot
function c_oacc_dot_a(n, x, y) result(res)
@@ -808,7 +878,7 @@ contains
implicit none
class(psb_c_vect_oacc), intent(inout) :: x
if (allocated(x%v)) then
- call c_oacc_create_dev(x%v)
+ if (.not.acc_is_present(x%v)) call c_oacc_create_dev(x%v)
end if
contains
subroutine c_oacc_create_dev(v)
@@ -886,6 +956,9 @@ contains
call psb_realloc(n, x%v, info)
if (info == 0) then
call x%set_host()
+ if (acc_is_present(x%v)) then
+ !$acc exit data delete(x%v) finalize
+ end if
!$acc enter data create(x%v)
call x%sync_space()
end if
@@ -902,7 +975,9 @@ contains
integer(psb_ipk_), intent(out) :: info
info = 0
if (allocated(x%v)) then
- !$acc exit data delete(x%v) finalize
+ if (acc_is_present(x%v)) then
+ !$acc exit data delete(x%v) finalize
+ end if
deallocate(x%v, stat=info)
end if
diff --git a/openacc/psb_d_oacc_vect_mod.F90 b/openacc/psb_d_oacc_vect_mod.F90
index bfb97b5c..9ecbccb4 100644
--- a/openacc/psb_d_oacc_vect_mod.F90
+++ b/openacc/psb_d_oacc_vect_mod.F90
@@ -1,5 +1,6 @@
module psb_d_oacc_vect_mod
use iso_c_binding
+ use openacc
use psb_const_mod
use psb_error_mod
use psb_d_vect_mod
@@ -50,8 +51,8 @@ module psb_d_oacc_vect_mod
procedure, pass(z) :: upd_xyz => d_oacc_upd_xyz
procedure, pass(y) :: mlt_a => d_oacc_mlt_a
procedure, pass(z) :: mlt_a_2 => d_oacc_mlt_a_2
- procedure, pass(y) :: mlt_v => d_oacc_mlt_v
- procedure, pass(z) :: mlt_v_2 => d_oacc_mlt_v_2
+ procedure, pass(y) :: mlt_v => psb_d_oacc_mlt_v
+ procedure, pass(z) :: mlt_v_2 => psb_d_oacc_mlt_v_2
procedure, pass(x) :: scal => d_oacc_scal
procedure, pass(x) :: nrm2 => d_oacc_nrm2
procedure, pass(x) :: amax => d_oacc_amax
@@ -62,17 +63,17 @@ module psb_d_oacc_vect_mod
end type psb_d_vect_oacc
interface
- subroutine d_oacc_mlt_v(x, y, info)
+ subroutine psb_d_oacc_mlt_v(x, y, info)
import
implicit none
class(psb_d_base_vect_type), intent(inout) :: x
class(psb_d_vect_oacc), intent(inout) :: y
integer(psb_ipk_), intent(out) :: info
- end subroutine d_oacc_mlt_v
+ end subroutine psb_d_oacc_mlt_v
end interface
interface
- subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
+ subroutine psb_d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
import
implicit none
real(psb_dpk_), intent(in) :: alpha, beta
@@ -81,7 +82,7 @@ module psb_d_oacc_vect_mod
class(psb_d_vect_oacc), intent(inout) :: z
integer(psb_ipk_), intent(out) :: info
character(len=1), intent(in), optional :: conjgx, conjgy
- end subroutine d_oacc_mlt_v_2
+ end subroutine psb_d_oacc_mlt_v_2
end interface
contains
@@ -89,15 +90,23 @@ contains
subroutine d_oacc_absval1(x)
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
- integer(psb_ipk_) :: n, i
+ integer(psb_ipk_) :: n
- if (x%is_host()) call x%sync_space()
+ if (x%is_host()) call x%sync()
n = size(x%v)
- !$acc parallel loop
- do i = 1, n
- x%v(i) = abs(x%v(i))
- end do
+ call d_inner_oacc_absval1(n,x%v)
call x%set_dev()
+ contains
+ subroutine d_inner_oacc_absval1(n,x)
+ implicit none
+ real(psb_dpk_), intent(inout) :: x(:)
+ integer(psb_ipk_) :: n
+ integer(psb_ipk_) :: i
+ !$acc parallel loop
+ do i = 1, n
+ x(i) = abs(x(i))
+ end do
+ end subroutine d_inner_oacc_absval1
end subroutine d_oacc_absval1
subroutine d_oacc_absval2(x, y)
@@ -112,15 +121,23 @@ contains
class is (psb_d_vect_oacc)
if (x%is_host()) call x%sync()
if (yy%is_host()) call yy%sync()
- !$acc parallel loop
- do i = 1, n
- yy%v(i) = abs(x%v(i))
- end do
+ call d_inner_oacc_absval2(n,x%v,yy%v)
class default
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
call x%psb_d_base_vect_type%absval(y)
end select
+ contains
+ subroutine d_inner_oacc_absval2(n,x,y)
+ implicit none
+ real(psb_dpk_), intent(inout) :: x(:),y(:)
+ integer(psb_ipk_) :: n
+ integer(psb_ipk_) :: i
+ !$acc parallel loop
+ do i = 1, n
+ y(i) = abs(x(i))
+ end do
+ end subroutine d_inner_oacc_absval2
end subroutine d_oacc_absval2
subroutine d_oacc_scal(alpha, x)
@@ -128,32 +145,46 @@ contains
class(psb_d_vect_oacc), intent(inout) :: x
real(psb_dpk_), intent(in) :: alpha
integer(psb_ipk_) :: info
- integer(psb_ipk_) :: i
-
- if (x%is_host()) call x%sync_space()
- !$acc parallel loop
- do i = 1, size(x%v)
- x%v(i) = alpha * x%v(i)
- end do
+ if (x%is_host()) call x%sync()
+ call d_inner_oacc_scal(alpha, x%v)
call x%set_dev()
+ contains
+ subroutine d_inner_oacc_scal(alpha, x)
+ real(psb_dpk_), intent(in) :: alpha
+ real(psb_dpk_), intent(inout) :: x(:)
+ integer(psb_ipk_) :: i
+ !$acc parallel loop
+ do i = 1, size(x)
+ x(i) = alpha * x(i)
+ end do
+ end subroutine d_inner_oacc_scal
end subroutine d_oacc_scal
function d_oacc_nrm2(n, x) result(res)
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n
- real(psb_dpk_) :: res
+ real(psb_dpk_) :: res
+ real(psb_dpk_) :: mx
integer(psb_ipk_) :: info
- real(psb_dpk_) :: sum
- integer(psb_ipk_) :: i
- if (x%is_host()) call x%sync_space()
- sum = 0.0
- !$acc parallel loop reduction(+:sum)
- do i = 1, n
- sum = sum + abs(x%v(i))**2
- end do
- res = sqrt(sum)
+ if (x%is_host()) call x%sync()
+ mx = d_oacc_amax(n,x)
+ res = d_inner_oacc_nrm2(n, mx, x%v)
+ contains
+ function d_inner_oacc_nrm2(n, mx,x) result(res)
+ integer(psb_ipk_) :: n
+ real(psb_dpk_) :: x(:)
+ real(psb_dpk_) :: mx, res
+ real(psb_dpk_) :: sum
+ integer(psb_ipk_) :: i
+ sum = 0.0
+ !$acc parallel loop reduction(+:sum)
+ do i = 1, n
+ sum = sum + abs(x(i)/mx)**2
+ end do
+ res = mx*sqrt(sum)
+ end function d_inner_oacc_nrm2
end function d_oacc_nrm2
function d_oacc_amax(n, x) result(res)
@@ -162,18 +193,25 @@ contains
integer(psb_ipk_), intent(in) :: n
real(psb_dpk_) :: res
integer(psb_ipk_) :: info
- real(psb_dpk_) :: max_val
- integer(psb_ipk_) :: i
- if (x%is_host()) call x%sync_space()
- max_val = -huge(0.0)
- !$acc parallel loop reduction(max:max_val)
- do i = 1, n
- if (abs(x%v(i)) > max_val) max_val = abs(x%v(i))
- end do
- res = max_val
+ if (x%is_host()) call x%sync()
+ res = d_inner_oacc_amax(n, x%v)
+ contains
+ function d_inner_oacc_amax(n, x) result(res)
+ integer(psb_ipk_) :: n
+ real(psb_dpk_) :: x(:)
+ real(psb_dpk_) :: res
+ real(psb_dpk_) :: max_val
+ integer(psb_ipk_) :: i
+ max_val = -huge(0.0)
+ !$acc parallel loop reduction(max:max_val)
+ do i = 1, n
+ if (abs(x(i)) > max_val) max_val = abs(x(i))
+ end do
+ res = max_val
+ end function d_inner_oacc_amax
end function d_oacc_amax
-
+
function d_oacc_asum(n, x) result(res)
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
@@ -182,14 +220,20 @@ contains
integer(psb_ipk_) :: info
real(psb_dpk_) :: sum
integer(psb_ipk_) :: i
-
- if (x%is_host()) call x%sync_space()
- sum = 0.0
- !$acc parallel loop reduction(+:sum)
- do i = 1, n
- sum = sum + abs(x%v(i))
- end do
- res = sum
+ if (x%is_host()) call x%sync()
+ res = d_inner_oacc_asum(n, x%v)
+ contains
+ function d_inner_oacc_asum(n, x) result(res)
+ integer(psb_ipk_) :: n
+ real(psb_dpk_) :: x(:)
+ real(psb_dpk_) :: res
+ integer(psb_ipk_) :: i
+ res = 0.0
+ !$acc parallel loop reduction(+:res)
+ do i = 1, n
+ res = res + abs(x(i))
+ end do
+ end function d_inner_oacc_asum
end function d_oacc_asum
@@ -201,7 +245,7 @@ contains
integer(psb_ipk_) :: i, n
info = 0
- if (y%is_dev()) call y%sync_space()
+ if (y%is_dev()) call y%sync()
!$acc parallel loop
do i = 1, size(x)
y%v(i) = y%v(i) * x(i)
@@ -219,7 +263,7 @@ contains
integer(psb_ipk_) :: i, n
info = 0
- if (z%is_dev()) call z%sync_space()
+ if (z%is_dev()) call z%sync()
!$acc parallel loop
do i = 1, size(x)
z%v(i) = alpha * x(i) * y(i) + beta * z%v(i)
@@ -282,18 +326,18 @@ contains
!!$ class is (psb_d_vect_oacc)
!!$ select type (yy => y)
!!$ class is (psb_d_vect_oacc)
-!!$ if (xx%is_host()) call xx%sync_space()
-!!$ if (yy%is_host()) call yy%sync_space()
-!!$ if ((beta /= dzero) .and. (z%is_host())) call z%sync_space()
+!!$ if (xx%is_host()) call xx%sync()
+!!$ if (yy%is_host()) call yy%sync()
+!!$ if ((beta /= dzero) .and. (z%is_host())) call z%sync()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
!!$ end do
!!$ call z%set_dev()
!!$ class default
-!!$ if (xx%is_dev()) call xx%sync_space()
+!!$ if (xx%is_dev()) call xx%sync()
!!$ if (yy%is_dev()) call yy%sync()
-!!$ if ((beta /= dzero) .and. (z%is_dev())) call z%sync_space()
+!!$ if ((beta /= dzero) .and. (z%is_dev())) call z%sync()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
@@ -303,7 +347,7 @@ contains
!!$ class default
!!$ if (x%is_dev()) call x%sync()
!!$ if (y%is_dev()) call y%sync()
-!!$ if ((beta /= dzero) .and. (z%is_dev())) call z%sync_space()
+!!$ if ((beta /= dzero) .and. (z%is_dev())) call z%sync()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i)
@@ -327,23 +371,36 @@ contains
select type(xx => x)
type is (psb_d_vect_oacc)
- if ((beta /= dzero) .and. y%is_host()) call y%sync_space()
- if (xx%is_host()) call xx%sync_space()
+ if ((beta /= dzero) .and. y%is_host()) call y%sync()
+ if (xx%is_host()) call xx%sync()
nx = size(xx%v)
ny = size(y%v)
if ((nx < m) .or. (ny < m)) then
info = psb_err_internal_error_
else
- !$acc parallel loop
- do i = 1, m
- y%v(i) = alpha * xx%v(i) + beta * y%v(i)
- end do
+ call d_inner_oacc_axpby(m, alpha, x%v, beta, y%v, info)
end if
call y%set_dev()
class default
if ((alpha /= dzero) .and. (x%is_dev())) call x%sync()
call y%axpby(m, alpha, x%v, beta, info)
- end select
+ end select
+ contains
+ subroutine d_inner_oacc_axpby(m, alpha, x, beta, y, info)
+ !use psi_serial_mod
+ implicit none
+ integer(psb_ipk_), intent(in) :: m
+ real(psb_dpk_), intent(inout) :: x(:)
+ real(psb_dpk_), intent(inout) :: y(:)
+ real(psb_dpk_), intent(in) :: alpha, beta
+ integer(psb_ipk_), intent(out) :: info
+ !$acc parallel
+ !$acc loop
+ do i = 1, m
+ y(i) = alpha * x(i) + beta * y(i)
+ end do
+ !$acc end parallel
+ end subroutine d_inner_oacc_axpby
end subroutine d_oacc_axpby_v
subroutine d_oacc_axpby_a(m, alpha, x, beta, y, info)
@@ -356,7 +413,7 @@ contains
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
- if ((beta /= dzero) .and. (y%is_dev())) call y%sync_space()
+ if ((beta /= dzero) .and. (y%is_dev())) call y%sync()
!$acc parallel loop
do i = 1, m
y%v(i) = alpha * x(i) + beta * y%v(i)
@@ -375,7 +432,7 @@ contains
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nx, ny, nz, i
logical :: gpu_done
-
+ write(0,*)'upd_xyz'
info = psb_success_
gpu_done = .false.
@@ -385,9 +442,9 @@ contains
class is (psb_d_vect_oacc)
select type(zz => z)
class is (psb_d_vect_oacc)
- if ((beta /= dzero) .and. yy%is_host()) call yy%sync_space()
- if ((delta /= dzero) .and. zz%is_host()) call zz%sync_space()
- if (xx%is_host()) call xx%sync_space()
+ if ((beta /= dzero) .and. yy%is_host()) call yy%sync()
+ if ((delta /= dzero) .and. zz%is_host()) call zz%sync()
+ if (xx%is_host()) call xx%sync()
nx = size(xx%v)
ny = size(yy%v)
nz = size(zz%v)
@@ -432,8 +489,8 @@ contains
select type(ii => idx)
class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space()
- if (y%is_host()) call y%sync_space()
+ if (ii%is_host()) call ii%sync()
+ if (y%is_host()) call y%sync()
!$acc parallel loop
do i = 1, n
@@ -459,13 +516,13 @@ contains
select type(ii => idx)
class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space()
+ if (ii%is_host()) call ii%sync()
class default
call psb_errpush(info, 'd_oacc_sctb_x')
return
end select
- if (y%is_host()) call y%sync_space()
+ if (y%is_host()) call y%sync()
!$acc parallel loop
do i = 1, n
@@ -486,7 +543,7 @@ contains
integer(psb_ipk_) :: i
if (n == 0) return
- if (y%is_dev()) call y%sync_space()
+ if (y%is_dev()) call y%sync()
!$acc parallel loop
do i = 1, n
@@ -512,13 +569,13 @@ contains
select type(ii => idx)
class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space()
+ if (ii%is_host()) call ii%sync()
class default
call psb_errpush(info, 'd_oacc_gthzbuf')
return
end select
- if (x%is_host()) call x%sync_space()
+ if (x%is_host()) call x%sync()
!$acc parallel loop
do i = 1, n
@@ -539,13 +596,13 @@ contains
select type(ii => idx)
class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space()
+ if (ii%is_host()) call ii%sync()
class default
call psb_errpush(info, 'd_oacc_gthzv_x')
return
end select
- if (x%is_host()) call x%sync_space()
+ if (x%is_host()) call x%sync()
!$acc parallel loop
do i = 1, n
@@ -573,9 +630,9 @@ contains
type is (psb_i_vect_oacc)
select type(vval => val)
type is (psb_d_vect_oacc)
- if (vval%is_host()) call vval%sync_space()
- if (virl%is_host()) call virl%sync_space()
- if (x%is_host()) call x%sync_space()
+ if (vval%is_host()) call vval%sync()
+ if (virl%is_host()) call virl%sync()
+ if (x%is_host()) call x%sync()
!$acc parallel loop
do i = 1, n
x%v(virl%v(i)) = vval%v(i)
@@ -588,11 +645,11 @@ contains
if (.not.done_oacc) then
select type(virl => irl)
type is (psb_i_vect_oacc)
- if (virl%is_dev()) call virl%sync_space()
+ if (virl%is_dev()) call virl%sync()
end select
select type(vval => val)
type is (psb_d_vect_oacc)
- if (vval%is_dev()) call vval%sync_space()
+ if (vval%is_dev()) call vval%sync()
end select
call x%ins(n, irl%v, val%v, dupl, info)
end if
@@ -616,7 +673,7 @@ contains
integer(psb_ipk_) :: i
info = 0
- if (x%is_dev()) call x%sync_space()
+ if (x%is_dev()) call x%sync()
call x%psb_d_base_vect_type%ins(n, irl, val, dupl, info)
call x%set_host()
!$acc update device(x%v)
@@ -635,7 +692,10 @@ contains
call psb_errpush(info, 'd_oacc_bld_mn', i_err=(/n, n, n, n, n/))
end if
call x%set_host()
- !$acc update device(x%v)
+ if (acc_is_present(x%v)) then
+ !$acc exit data delete(x%v) finalize
+ end if
+ !$acc enter data copyin(x%v)
end subroutine d_oacc_bld_mn
@@ -657,7 +717,10 @@ contains
x%v(:) = this(:)
call x%set_host()
- !$acc update device(x%v)
+ if (acc_is_present(x%v)) then
+ !$acc exit data delete(x%v) finalize
+ end if
+ !$acc enter data copyin(x%v)
end subroutine d_oacc_bld_x
@@ -676,13 +739,13 @@ contains
if (nd < n) then
call x%sync()
call x%psb_d_base_vect_type%asb(n, info)
- if (info == psb_success_) call x%sync_space()
+ if (info == psb_success_) call x%sync()
call x%set_host()
end if
else
if (size(x%v) < n) then
call x%psb_d_base_vect_type%asb(n, info)
- if (info == psb_success_) call x%sync_space()
+ if (info == psb_success_) call x%sync()
call x%set_host()
end if
end if
@@ -740,10 +803,9 @@ contains
real(psb_dpk_) :: res
real(psb_dpk_), external :: ddot
integer(psb_ipk_) :: info
- integer(psb_ipk_) :: i
res = dzero
-
+ !write(0,*) 'dot_v'
select type(yy => y)
type is (psb_d_base_vect_type)
if (x%is_dev()) call x%sync()
@@ -751,18 +813,26 @@ contains
type is (psb_d_vect_oacc)
if (x%is_host()) call x%sync()
if (yy%is_host()) call yy%sync()
-
- !$acc parallel loop reduction(+:res) present(x%v, yy%v)
- do i = 1, n
- res = res + x%v(i) * yy%v(i)
- end do
- !$acc end parallel loop
-
+ res = d_inner_oacc_dot(n, x%v, yy%v)
class default
call x%sync()
res = y%dot(n, x%v)
end select
-
+ contains
+ function d_inner_oacc_dot(n, x, y) result(res)
+ implicit none
+ real(psb_dpk_), intent(in) :: x(:)
+ real(psb_dpk_), intent(in) :: y(:)
+ integer(psb_ipk_), intent(in) :: n
+ real(psb_dpk_) :: res
+ integer(psb_ipk_) :: i
+
+ !$acc parallel loop reduction(+:res) present(x, y)
+ do i = 1, n
+ res = res + x(i) * y(i)
+ end do
+ !$acc end parallel loop
+ end function d_inner_oacc_dot
end function d_oacc_vect_dot
function d_oacc_dot_a(n, x, y) result(res)
@@ -808,7 +878,7 @@ contains
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
if (allocated(x%v)) then
- call d_oacc_create_dev(x%v)
+ if (.not.acc_is_present(x%v)) call d_oacc_create_dev(x%v)
end if
contains
subroutine d_oacc_create_dev(v)
@@ -886,6 +956,9 @@ contains
call psb_realloc(n, x%v, info)
if (info == 0) then
call x%set_host()
+ if (acc_is_present(x%v)) then
+ !$acc exit data delete(x%v) finalize
+ end if
!$acc enter data create(x%v)
call x%sync_space()
end if
@@ -902,7 +975,9 @@ contains
integer(psb_ipk_), intent(out) :: info
info = 0
if (allocated(x%v)) then
- !$acc exit data delete(x%v) finalize
+ if (acc_is_present(x%v)) then
+ !$acc exit data delete(x%v) finalize
+ end if
deallocate(x%v, stat=info)
end if
diff --git a/openacc/psb_i_oacc_vect_mod.F90 b/openacc/psb_i_oacc_vect_mod.F90
index 72e9ada2..3dbc48f1 100644
--- a/openacc/psb_i_oacc_vect_mod.F90
+++ b/openacc/psb_i_oacc_vect_mod.F90
@@ -1,5 +1,6 @@
module psb_i_oacc_vect_mod
use iso_c_binding
+ use openacc
use psb_const_mod
use psb_error_mod
use psb_i_vect_mod
@@ -64,8 +65,8 @@ contains
select type(ii => idx)
class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space()
- if (y%is_host()) call y%sync_space()
+ if (ii%is_host()) call ii%sync()
+ if (y%is_host()) call y%sync()
!$acc parallel loop
do i = 1, n
@@ -91,13 +92,13 @@ contains
select type(ii => idx)
class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space()
+ if (ii%is_host()) call ii%sync()
class default
call psb_errpush(info, 'i_oacc_sctb_x')
return
end select
- if (y%is_host()) call y%sync_space()
+ if (y%is_host()) call y%sync()
!$acc parallel loop
do i = 1, n
@@ -118,7 +119,7 @@ contains
integer(psb_ipk_) :: i
if (n == 0) return
- if (y%is_dev()) call y%sync_space()
+ if (y%is_dev()) call y%sync()
!$acc parallel loop
do i = 1, n
@@ -144,13 +145,13 @@ contains
select type(ii => idx)
class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space()
+ if (ii%is_host()) call ii%sync()
class default
call psb_errpush(info, 'i_oacc_gthzbuf')
return
end select
- if (x%is_host()) call x%sync_space()
+ if (x%is_host()) call x%sync()
!$acc parallel loop
do i = 1, n
@@ -171,13 +172,13 @@ contains
select type(ii => idx)
class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space()
+ if (ii%is_host()) call ii%sync()
class default
call psb_errpush(info, 'i_oacc_gthzv_x')
return
end select
- if (x%is_host()) call x%sync_space()
+ if (x%is_host()) call x%sync()
!$acc parallel loop
do i = 1, n
@@ -205,9 +206,9 @@ contains
type is (psb_i_vect_oacc)
select type(vval => val)
type is (psb_i_vect_oacc)
- if (vval%is_host()) call vval%sync_space()
- if (virl%is_host()) call virl%sync_space()
- if (x%is_host()) call x%sync_space()
+ if (vval%is_host()) call vval%sync()
+ if (virl%is_host()) call virl%sync()
+ if (x%is_host()) call x%sync()
!$acc parallel loop
do i = 1, n
x%v(virl%v(i)) = vval%v(i)
@@ -220,11 +221,11 @@ contains
if (.not.done_oacc) then
select type(virl => irl)
type is (psb_i_vect_oacc)
- if (virl%is_dev()) call virl%sync_space()
+ if (virl%is_dev()) call virl%sync()
end select
select type(vval => val)
type is (psb_i_vect_oacc)
- if (vval%is_dev()) call vval%sync_space()
+ if (vval%is_dev()) call vval%sync()
end select
call x%ins(n, irl%v, val%v, dupl, info)
end if
@@ -248,7 +249,7 @@ contains
integer(psb_ipk_) :: i
info = 0
- if (x%is_dev()) call x%sync_space()
+ if (x%is_dev()) call x%sync()
call x%psb_i_base_vect_type%ins(n, irl, val, dupl, info)
call x%set_host()
!$acc update device(x%v)
@@ -267,7 +268,10 @@ contains
call psb_errpush(info, 'i_oacc_bld_mn', i_err=(/n, n, n, n, n/))
end if
call x%set_host()
- !$acc update device(x%v)
+ if (acc_is_present(x%v)) then
+ !$acc exit data delete(x%v) finalize
+ end if
+ !$acc enter data copyin(x%v)
end subroutine i_oacc_bld_mn
@@ -289,7 +293,10 @@ contains
x%v(:) = this(:)
call x%set_host()
- !$acc update device(x%v)
+ if (acc_is_present(x%v)) then
+ !$acc exit data delete(x%v) finalize
+ end if
+ !$acc enter data copyin(x%v)
end subroutine i_oacc_bld_x
@@ -308,13 +315,13 @@ contains
if (nd < n) then
call x%sync()
call x%psb_i_base_vect_type%asb(n, info)
- if (info == psb_success_) call x%sync_space()
+ if (info == psb_success_) call x%sync()
call x%set_host()
end if
else
if (size(x%v) < n) then
call x%psb_i_base_vect_type%asb(n, info)
- if (info == psb_success_) call x%sync_space()
+ if (info == psb_success_) call x%sync()
call x%set_host()
end if
end if
@@ -393,7 +400,7 @@ contains
implicit none
class(psb_i_vect_oacc), intent(inout) :: x
if (allocated(x%v)) then
- call i_oacc_create_dev(x%v)
+ if (.not.acc_is_present(x%v)) call i_oacc_create_dev(x%v)
end if
contains
subroutine i_oacc_create_dev(v)
@@ -471,6 +478,9 @@ contains
call psb_realloc(n, x%v, info)
if (info == 0) then
call x%set_host()
+ if (acc_is_present(x%v)) then
+ !$acc exit data delete(x%v) finalize
+ end if
!$acc enter data create(x%v)
call x%sync_space()
end if
@@ -487,7 +497,9 @@ contains
integer(psb_ipk_), intent(out) :: info
info = 0
if (allocated(x%v)) then
- !$acc exit data delete(x%v) finalize
+ if (acc_is_present(x%v)) then
+ !$acc exit data delete(x%v) finalize
+ end if
deallocate(x%v, stat=info)
end if
diff --git a/openacc/psb_l_oacc_vect_mod.F90 b/openacc/psb_l_oacc_vect_mod.F90
index aeba4537..cdf28366 100644
--- a/openacc/psb_l_oacc_vect_mod.F90
+++ b/openacc/psb_l_oacc_vect_mod.F90
@@ -1,5 +1,6 @@
module psb_l_oacc_vect_mod
use iso_c_binding
+ use openacc
use psb_const_mod
use psb_error_mod
use psb_l_vect_mod
@@ -66,8 +67,8 @@ contains
select type(ii => idx)
class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space()
- if (y%is_host()) call y%sync_space()
+ if (ii%is_host()) call ii%sync()
+ if (y%is_host()) call y%sync()
!$acc parallel loop
do i = 1, n
@@ -93,13 +94,13 @@ contains
select type(ii => idx)
class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space()
+ if (ii%is_host()) call ii%sync()
class default
call psb_errpush(info, 'l_oacc_sctb_x')
return
end select
- if (y%is_host()) call y%sync_space()
+ if (y%is_host()) call y%sync()
!$acc parallel loop
do i = 1, n
@@ -120,7 +121,7 @@ contains
integer(psb_ipk_) :: i
if (n == 0) return
- if (y%is_dev()) call y%sync_space()
+ if (y%is_dev()) call y%sync()
!$acc parallel loop
do i = 1, n
@@ -146,13 +147,13 @@ contains
select type(ii => idx)
class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space()
+ if (ii%is_host()) call ii%sync()
class default
call psb_errpush(info, 'l_oacc_gthzbuf')
return
end select
- if (x%is_host()) call x%sync_space()
+ if (x%is_host()) call x%sync()
!$acc parallel loop
do i = 1, n
@@ -173,13 +174,13 @@ contains
select type(ii => idx)
class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space()
+ if (ii%is_host()) call ii%sync()
class default
call psb_errpush(info, 'l_oacc_gthzv_x')
return
end select
- if (x%is_host()) call x%sync_space()
+ if (x%is_host()) call x%sync()
!$acc parallel loop
do i = 1, n
@@ -207,9 +208,9 @@ contains
type is (psb_i_vect_oacc)
select type(vval => val)
type is (psb_l_vect_oacc)
- if (vval%is_host()) call vval%sync_space()
- if (virl%is_host()) call virl%sync_space()
- if (x%is_host()) call x%sync_space()
+ if (vval%is_host()) call vval%sync()
+ if (virl%is_host()) call virl%sync()
+ if (x%is_host()) call x%sync()
!$acc parallel loop
do i = 1, n
x%v(virl%v(i)) = vval%v(i)
@@ -222,11 +223,11 @@ contains
if (.not.done_oacc) then
select type(virl => irl)
type is (psb_i_vect_oacc)
- if (virl%is_dev()) call virl%sync_space()
+ if (virl%is_dev()) call virl%sync()
end select
select type(vval => val)
type is (psb_l_vect_oacc)
- if (vval%is_dev()) call vval%sync_space()
+ if (vval%is_dev()) call vval%sync()
end select
call x%ins(n, irl%v, val%v, dupl, info)
end if
@@ -250,7 +251,7 @@ contains
integer(psb_ipk_) :: i
info = 0
- if (x%is_dev()) call x%sync_space()
+ if (x%is_dev()) call x%sync()
call x%psb_l_base_vect_type%ins(n, irl, val, dupl, info)
call x%set_host()
!$acc update device(x%v)
@@ -269,7 +270,10 @@ contains
call psb_errpush(info, 'l_oacc_bld_mn', i_err=(/n, n, n, n, n/))
end if
call x%set_host()
- !$acc update device(x%v)
+ if (acc_is_present(x%v)) then
+ !$acc exit data delete(x%v) finalize
+ end if
+ !$acc enter data copyin(x%v)
end subroutine l_oacc_bld_mn
@@ -291,7 +295,10 @@ contains
x%v(:) = this(:)
call x%set_host()
- !$acc update device(x%v)
+ if (acc_is_present(x%v)) then
+ !$acc exit data delete(x%v) finalize
+ end if
+ !$acc enter data copyin(x%v)
end subroutine l_oacc_bld_x
@@ -310,13 +317,13 @@ contains
if (nd < n) then
call x%sync()
call x%psb_l_base_vect_type%asb(n, info)
- if (info == psb_success_) call x%sync_space()
+ if (info == psb_success_) call x%sync()
call x%set_host()
end if
else
if (size(x%v) < n) then
call x%psb_l_base_vect_type%asb(n, info)
- if (info == psb_success_) call x%sync_space()
+ if (info == psb_success_) call x%sync()
call x%set_host()
end if
end if
@@ -395,7 +402,7 @@ contains
implicit none
class(psb_l_vect_oacc), intent(inout) :: x
if (allocated(x%v)) then
- call l_oacc_create_dev(x%v)
+ if (.not.acc_is_present(x%v)) call l_oacc_create_dev(x%v)
end if
contains
subroutine l_oacc_create_dev(v)
@@ -473,6 +480,9 @@ contains
call psb_realloc(n, x%v, info)
if (info == 0) then
call x%set_host()
+ if (acc_is_present(x%v)) then
+ !$acc exit data delete(x%v) finalize
+ end if
!$acc enter data create(x%v)
call x%sync_space()
end if
@@ -489,7 +499,9 @@ contains
integer(psb_ipk_), intent(out) :: info
info = 0
if (allocated(x%v)) then
- !$acc exit data delete(x%v) finalize
+ if (acc_is_present(x%v)) then
+ !$acc exit data delete(x%v) finalize
+ end if
deallocate(x%v, stat=info)
end if
diff --git a/openacc/psb_s_oacc_vect_mod.F90 b/openacc/psb_s_oacc_vect_mod.F90
index 47922d6a..c3b31af7 100644
--- a/openacc/psb_s_oacc_vect_mod.F90
+++ b/openacc/psb_s_oacc_vect_mod.F90
@@ -1,5 +1,6 @@
module psb_s_oacc_vect_mod
use iso_c_binding
+ use openacc
use psb_const_mod
use psb_error_mod
use psb_s_vect_mod
@@ -50,8 +51,8 @@ module psb_s_oacc_vect_mod
procedure, pass(z) :: upd_xyz => s_oacc_upd_xyz
procedure, pass(y) :: mlt_a => s_oacc_mlt_a
procedure, pass(z) :: mlt_a_2 => s_oacc_mlt_a_2
- procedure, pass(y) :: mlt_v => s_oacc_mlt_v
- procedure, pass(z) :: mlt_v_2 => s_oacc_mlt_v_2
+ procedure, pass(y) :: mlt_v => psb_s_oacc_mlt_v
+ procedure, pass(z) :: mlt_v_2 => psb_s_oacc_mlt_v_2
procedure, pass(x) :: scal => s_oacc_scal
procedure, pass(x) :: nrm2 => s_oacc_nrm2
procedure, pass(x) :: amax => s_oacc_amax
@@ -62,17 +63,17 @@ module psb_s_oacc_vect_mod
end type psb_s_vect_oacc
interface
- subroutine s_oacc_mlt_v(x, y, info)
+ subroutine psb_s_oacc_mlt_v(x, y, info)
import
implicit none
class(psb_s_base_vect_type), intent(inout) :: x
class(psb_s_vect_oacc), intent(inout) :: y
integer(psb_ipk_), intent(out) :: info
- end subroutine s_oacc_mlt_v
+ end subroutine psb_s_oacc_mlt_v
end interface
interface
- subroutine s_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
+ subroutine psb_s_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
import
implicit none
real(psb_spk_), intent(in) :: alpha, beta
@@ -81,7 +82,7 @@ module psb_s_oacc_vect_mod
class(psb_s_vect_oacc), intent(inout) :: z
integer(psb_ipk_), intent(out) :: info
character(len=1), intent(in), optional :: conjgx, conjgy
- end subroutine s_oacc_mlt_v_2
+ end subroutine psb_s_oacc_mlt_v_2
end interface
contains
@@ -89,15 +90,23 @@ contains
subroutine s_oacc_absval1(x)
implicit none
class(psb_s_vect_oacc), intent(inout) :: x
- integer(psb_ipk_) :: n, i
+ integer(psb_ipk_) :: n
- if (x%is_host()) call x%sync_space()
+ if (x%is_host()) call x%sync()
n = size(x%v)
- !$acc parallel loop
- do i = 1, n
- x%v(i) = abs(x%v(i))
- end do
+ call s_inner_oacc_absval1(n,x%v)
call x%set_dev()
+ contains
+ subroutine s_inner_oacc_absval1(n,x)
+ implicit none
+ real(psb_spk_), intent(inout) :: x(:)
+ integer(psb_ipk_) :: n
+ integer(psb_ipk_) :: i
+ !$acc parallel loop
+ do i = 1, n
+ x(i) = abs(x(i))
+ end do
+ end subroutine s_inner_oacc_absval1
end subroutine s_oacc_absval1
subroutine s_oacc_absval2(x, y)
@@ -112,15 +121,23 @@ contains
class is (psb_s_vect_oacc)
if (x%is_host()) call x%sync()
if (yy%is_host()) call yy%sync()
- !$acc parallel loop
- do i = 1, n
- yy%v(i) = abs(x%v(i))
- end do
+ call s_inner_oacc_absval2(n,x%v,yy%v)
class default
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
call x%psb_s_base_vect_type%absval(y)
end select
+ contains
+ subroutine s_inner_oacc_absval2(n,x,y)
+ implicit none
+ real(psb_spk_), intent(inout) :: x(:),y(:)
+ integer(psb_ipk_) :: n
+ integer(psb_ipk_) :: i
+ !$acc parallel loop
+ do i = 1, n
+ y(i) = abs(x(i))
+ end do
+ end subroutine s_inner_oacc_absval2
end subroutine s_oacc_absval2
subroutine s_oacc_scal(alpha, x)
@@ -128,32 +145,46 @@ contains
class(psb_s_vect_oacc), intent(inout) :: x
real(psb_spk_), intent(in) :: alpha
integer(psb_ipk_) :: info
- integer(psb_ipk_) :: i
-
- if (x%is_host()) call x%sync_space()
- !$acc parallel loop
- do i = 1, size(x%v)
- x%v(i) = alpha * x%v(i)
- end do
+ if (x%is_host()) call x%sync()
+ call s_inner_oacc_scal(alpha, x%v)
call x%set_dev()
+ contains
+ subroutine s_inner_oacc_scal(alpha, x)
+ real(psb_spk_), intent(in) :: alpha
+ real(psb_spk_), intent(inout) :: x(:)
+ integer(psb_ipk_) :: i
+ !$acc parallel loop
+ do i = 1, size(x)
+ x(i) = alpha * x(i)
+ end do
+ end subroutine s_inner_oacc_scal
end subroutine s_oacc_scal
function s_oacc_nrm2(n, x) result(res)
implicit none
class(psb_s_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n
- real(psb_spk_) :: res
+ real(psb_spk_) :: res
+ real(psb_spk_) :: mx
integer(psb_ipk_) :: info
- real(psb_spk_) :: sum
- integer(psb_ipk_) :: i
- if (x%is_host()) call x%sync_space()
- sum = 0.0
- !$acc parallel loop reduction(+:sum)
- do i = 1, n
- sum = sum + abs(x%v(i))**2
- end do
- res = sqrt(sum)
+ if (x%is_host()) call x%sync()
+ mx = s_oacc_amax(n,x)
+ res = s_inner_oacc_nrm2(n, mx, x%v)
+ contains
+ function s_inner_oacc_nrm2(n, mx,x) result(res)
+ integer(psb_ipk_) :: n
+ real(psb_spk_) :: x(:)
+ real(psb_spk_) :: mx, res
+ real(psb_spk_) :: sum
+ integer(psb_ipk_) :: i
+ sum = 0.0
+ !$acc parallel loop reduction(+:sum)
+ do i = 1, n
+ sum = sum + abs(x(i)/mx)**2
+ end do
+ res = mx*sqrt(sum)
+ end function s_inner_oacc_nrm2
end function s_oacc_nrm2
function s_oacc_amax(n, x) result(res)
@@ -162,18 +193,25 @@ contains
integer(psb_ipk_), intent(in) :: n
real(psb_spk_) :: res
integer(psb_ipk_) :: info
- real(psb_spk_) :: max_val
- integer(psb_ipk_) :: i
- if (x%is_host()) call x%sync_space()
- max_val = -huge(0.0)
- !$acc parallel loop reduction(max:max_val)
- do i = 1, n
- if (abs(x%v(i)) > max_val) max_val = abs(x%v(i))
- end do
- res = max_val
+ if (x%is_host()) call x%sync()
+ res = s_inner_oacc_amax(n, x%v)
+ contains
+ function s_inner_oacc_amax(n, x) result(res)
+ integer(psb_ipk_) :: n
+ real(psb_spk_) :: x(:)
+ real(psb_spk_) :: res
+ real(psb_spk_) :: max_val
+ integer(psb_ipk_) :: i
+ max_val = -huge(0.0)
+ !$acc parallel loop reduction(max:max_val)
+ do i = 1, n
+ if (abs(x(i)) > max_val) max_val = abs(x(i))
+ end do
+ res = max_val
+ end function s_inner_oacc_amax
end function s_oacc_amax
-
+
function s_oacc_asum(n, x) result(res)
implicit none
class(psb_s_vect_oacc), intent(inout) :: x
@@ -182,14 +220,20 @@ contains
integer(psb_ipk_) :: info
real(psb_spk_) :: sum
integer(psb_ipk_) :: i
-
- if (x%is_host()) call x%sync_space()
- sum = 0.0
- !$acc parallel loop reduction(+:sum)
- do i = 1, n
- sum = sum + abs(x%v(i))
- end do
- res = sum
+ if (x%is_host()) call x%sync()
+ res = s_inner_oacc_asum(n, x%v)
+ contains
+ function s_inner_oacc_asum(n, x) result(res)
+ integer(psb_ipk_) :: n
+ real(psb_spk_) :: x(:)
+ real(psb_spk_) :: res
+ integer(psb_ipk_) :: i
+ res = 0.0
+ !$acc parallel loop reduction(+:res)
+ do i = 1, n
+ res = res + abs(x(i))
+ end do
+ end function s_inner_oacc_asum
end function s_oacc_asum
@@ -201,7 +245,7 @@ contains
integer(psb_ipk_) :: i, n
info = 0
- if (y%is_dev()) call y%sync_space()
+ if (y%is_dev()) call y%sync()
!$acc parallel loop
do i = 1, size(x)
y%v(i) = y%v(i) * x(i)
@@ -219,7 +263,7 @@ contains
integer(psb_ipk_) :: i, n
info = 0
- if (z%is_dev()) call z%sync_space()
+ if (z%is_dev()) call z%sync()
!$acc parallel loop
do i = 1, size(x)
z%v(i) = alpha * x(i) * y(i) + beta * z%v(i)
@@ -282,18 +326,18 @@ contains
!!$ class is (psb_s_vect_oacc)
!!$ select type (yy => y)
!!$ class is (psb_s_vect_oacc)
-!!$ if (xx%is_host()) call xx%sync_space()
-!!$ if (yy%is_host()) call yy%sync_space()
-!!$ if ((beta /= szero) .and. (z%is_host())) call z%sync_space()
+!!$ if (xx%is_host()) call xx%sync()
+!!$ if (yy%is_host()) call yy%sync()
+!!$ if ((beta /= szero) .and. (z%is_host())) call z%sync()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
!!$ end do
!!$ call z%set_dev()
!!$ class default
-!!$ if (xx%is_dev()) call xx%sync_space()
+!!$ if (xx%is_dev()) call xx%sync()
!!$ if (yy%is_dev()) call yy%sync()
-!!$ if ((beta /= szero) .and. (z%is_dev())) call z%sync_space()
+!!$ if ((beta /= szero) .and. (z%is_dev())) call z%sync()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
@@ -303,7 +347,7 @@ contains
!!$ class default
!!$ if (x%is_dev()) call x%sync()
!!$ if (y%is_dev()) call y%sync()
-!!$ if ((beta /= szero) .and. (z%is_dev())) call z%sync_space()
+!!$ if ((beta /= szero) .and. (z%is_dev())) call z%sync()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i)
@@ -327,23 +371,36 @@ contains
select type(xx => x)
type is (psb_s_vect_oacc)
- if ((beta /= szero) .and. y%is_host()) call y%sync_space()
- if (xx%is_host()) call xx%sync_space()
+ if ((beta /= szero) .and. y%is_host()) call y%sync()
+ if (xx%is_host()) call xx%sync()
nx = size(xx%v)
ny = size(y%v)
if ((nx < m) .or. (ny < m)) then
info = psb_err_internal_error_
else
- !$acc parallel loop
- do i = 1, m
- y%v(i) = alpha * xx%v(i) + beta * y%v(i)
- end do
+ call s_inner_oacc_axpby(m, alpha, x%v, beta, y%v, info)
end if
call y%set_dev()
class default
if ((alpha /= szero) .and. (x%is_dev())) call x%sync()
call y%axpby(m, alpha, x%v, beta, info)
- end select
+ end select
+ contains
+ subroutine s_inner_oacc_axpby(m, alpha, x, beta, y, info)
+ !use psi_serial_mod
+ implicit none
+ integer(psb_ipk_), intent(in) :: m
+ real(psb_spk_), intent(inout) :: x(:)
+ real(psb_spk_), intent(inout) :: y(:)
+ real(psb_spk_), intent(in) :: alpha, beta
+ integer(psb_ipk_), intent(out) :: info
+ !$acc parallel
+ !$acc loop
+ do i = 1, m
+ y(i) = alpha * x(i) + beta * y(i)
+ end do
+ !$acc end parallel
+ end subroutine s_inner_oacc_axpby
end subroutine s_oacc_axpby_v
subroutine s_oacc_axpby_a(m, alpha, x, beta, y, info)
@@ -356,7 +413,7 @@ contains
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
- if ((beta /= szero) .and. (y%is_dev())) call y%sync_space()
+ if ((beta /= szero) .and. (y%is_dev())) call y%sync()
!$acc parallel loop
do i = 1, m
y%v(i) = alpha * x(i) + beta * y%v(i)
@@ -375,7 +432,7 @@ contains
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nx, ny, nz, i
logical :: gpu_done
-
+ write(0,*)'upd_xyz'
info = psb_success_
gpu_done = .false.
@@ -385,9 +442,9 @@ contains
class is (psb_s_vect_oacc)
select type(zz => z)
class is (psb_s_vect_oacc)
- if ((beta /= szero) .and. yy%is_host()) call yy%sync_space()
- if ((delta /= szero) .and. zz%is_host()) call zz%sync_space()
- if (xx%is_host()) call xx%sync_space()
+ if ((beta /= szero) .and. yy%is_host()) call yy%sync()
+ if ((delta /= szero) .and. zz%is_host()) call zz%sync()
+ if (xx%is_host()) call xx%sync()
nx = size(xx%v)
ny = size(yy%v)
nz = size(zz%v)
@@ -432,8 +489,8 @@ contains
select type(ii => idx)
class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space()
- if (y%is_host()) call y%sync_space()
+ if (ii%is_host()) call ii%sync()
+ if (y%is_host()) call y%sync()
!$acc parallel loop
do i = 1, n
@@ -459,13 +516,13 @@ contains
select type(ii => idx)
class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space()
+ if (ii%is_host()) call ii%sync()
class default
call psb_errpush(info, 's_oacc_sctb_x')
return
end select
- if (y%is_host()) call y%sync_space()
+ if (y%is_host()) call y%sync()
!$acc parallel loop
do i = 1, n
@@ -486,7 +543,7 @@ contains
integer(psb_ipk_) :: i
if (n == 0) return
- if (y%is_dev()) call y%sync_space()
+ if (y%is_dev()) call y%sync()
!$acc parallel loop
do i = 1, n
@@ -512,13 +569,13 @@ contains
select type(ii => idx)
class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space()
+ if (ii%is_host()) call ii%sync()
class default
call psb_errpush(info, 's_oacc_gthzbuf')
return
end select
- if (x%is_host()) call x%sync_space()
+ if (x%is_host()) call x%sync()
!$acc parallel loop
do i = 1, n
@@ -539,13 +596,13 @@ contains
select type(ii => idx)
class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space()
+ if (ii%is_host()) call ii%sync()
class default
call psb_errpush(info, 's_oacc_gthzv_x')
return
end select
- if (x%is_host()) call x%sync_space()
+ if (x%is_host()) call x%sync()
!$acc parallel loop
do i = 1, n
@@ -573,9 +630,9 @@ contains
type is (psb_i_vect_oacc)
select type(vval => val)
type is (psb_s_vect_oacc)
- if (vval%is_host()) call vval%sync_space()
- if (virl%is_host()) call virl%sync_space()
- if (x%is_host()) call x%sync_space()
+ if (vval%is_host()) call vval%sync()
+ if (virl%is_host()) call virl%sync()
+ if (x%is_host()) call x%sync()
!$acc parallel loop
do i = 1, n
x%v(virl%v(i)) = vval%v(i)
@@ -588,11 +645,11 @@ contains
if (.not.done_oacc) then
select type(virl => irl)
type is (psb_i_vect_oacc)
- if (virl%is_dev()) call virl%sync_space()
+ if (virl%is_dev()) call virl%sync()
end select
select type(vval => val)
type is (psb_s_vect_oacc)
- if (vval%is_dev()) call vval%sync_space()
+ if (vval%is_dev()) call vval%sync()
end select
call x%ins(n, irl%v, val%v, dupl, info)
end if
@@ -616,7 +673,7 @@ contains
integer(psb_ipk_) :: i
info = 0
- if (x%is_dev()) call x%sync_space()
+ if (x%is_dev()) call x%sync()
call x%psb_s_base_vect_type%ins(n, irl, val, dupl, info)
call x%set_host()
!$acc update device(x%v)
@@ -635,7 +692,10 @@ contains
call psb_errpush(info, 's_oacc_bld_mn', i_err=(/n, n, n, n, n/))
end if
call x%set_host()
- !$acc update device(x%v)
+ if (acc_is_present(x%v)) then
+ !$acc exit data delete(x%v) finalize
+ end if
+ !$acc enter data copyin(x%v)
end subroutine s_oacc_bld_mn
@@ -657,7 +717,10 @@ contains
x%v(:) = this(:)
call x%set_host()
- !$acc update device(x%v)
+ if (acc_is_present(x%v)) then
+ !$acc exit data delete(x%v) finalize
+ end if
+ !$acc enter data copyin(x%v)
end subroutine s_oacc_bld_x
@@ -676,13 +739,13 @@ contains
if (nd < n) then
call x%sync()
call x%psb_s_base_vect_type%asb(n, info)
- if (info == psb_success_) call x%sync_space()
+ if (info == psb_success_) call x%sync()
call x%set_host()
end if
else
if (size(x%v) < n) then
call x%psb_s_base_vect_type%asb(n, info)
- if (info == psb_success_) call x%sync_space()
+ if (info == psb_success_) call x%sync()
call x%set_host()
end if
end if
@@ -740,10 +803,9 @@ contains
real(psb_spk_) :: res
real(psb_spk_), external :: ddot
integer(psb_ipk_) :: info
- integer(psb_ipk_) :: i
res = szero
-
+ !write(0,*) 'dot_v'
select type(yy => y)
type is (psb_s_base_vect_type)
if (x%is_dev()) call x%sync()
@@ -751,18 +813,26 @@ contains
type is (psb_s_vect_oacc)
if (x%is_host()) call x%sync()
if (yy%is_host()) call yy%sync()
-
- !$acc parallel loop reduction(+:res) present(x%v, yy%v)
- do i = 1, n
- res = res + x%v(i) * yy%v(i)
- end do
- !$acc end parallel loop
-
+ res = s_inner_oacc_dot(n, x%v, yy%v)
class default
call x%sync()
res = y%dot(n, x%v)
end select
-
+ contains
+ function s_inner_oacc_dot(n, x, y) result(res)
+ implicit none
+ real(psb_spk_), intent(in) :: x(:)
+ real(psb_spk_), intent(in) :: y(:)
+ integer(psb_ipk_), intent(in) :: n
+ real(psb_spk_) :: res
+ integer(psb_ipk_) :: i
+
+ !$acc parallel loop reduction(+:res) present(x, y)
+ do i = 1, n
+ res = res + x(i) * y(i)
+ end do
+ !$acc end parallel loop
+ end function s_inner_oacc_dot
end function s_oacc_vect_dot
function s_oacc_dot_a(n, x, y) result(res)
@@ -808,7 +878,7 @@ contains
implicit none
class(psb_s_vect_oacc), intent(inout) :: x
if (allocated(x%v)) then
- call s_oacc_create_dev(x%v)
+ if (.not.acc_is_present(x%v)) call s_oacc_create_dev(x%v)
end if
contains
subroutine s_oacc_create_dev(v)
@@ -886,6 +956,9 @@ contains
call psb_realloc(n, x%v, info)
if (info == 0) then
call x%set_host()
+ if (acc_is_present(x%v)) then
+ !$acc exit data delete(x%v) finalize
+ end if
!$acc enter data create(x%v)
call x%sync_space()
end if
@@ -902,7 +975,9 @@ contains
integer(psb_ipk_), intent(out) :: info
info = 0
if (allocated(x%v)) then
- !$acc exit data delete(x%v) finalize
+ if (acc_is_present(x%v)) then
+ !$acc exit data delete(x%v) finalize
+ end if
deallocate(x%v, stat=info)
end if
diff --git a/openacc/psb_z_oacc_vect_mod.F90 b/openacc/psb_z_oacc_vect_mod.F90
index be03b1cd..bab1a0a0 100644
--- a/openacc/psb_z_oacc_vect_mod.F90
+++ b/openacc/psb_z_oacc_vect_mod.F90
@@ -1,5 +1,6 @@
module psb_z_oacc_vect_mod
use iso_c_binding
+ use openacc
use psb_const_mod
use psb_error_mod
use psb_z_vect_mod
@@ -50,8 +51,8 @@ module psb_z_oacc_vect_mod
procedure, pass(z) :: upd_xyz => z_oacc_upd_xyz
procedure, pass(y) :: mlt_a => z_oacc_mlt_a
procedure, pass(z) :: mlt_a_2 => z_oacc_mlt_a_2
- procedure, pass(y) :: mlt_v => z_oacc_mlt_v
- procedure, pass(z) :: mlt_v_2 => z_oacc_mlt_v_2
+ procedure, pass(y) :: mlt_v => psb_z_oacc_mlt_v
+ procedure, pass(z) :: mlt_v_2 => psb_z_oacc_mlt_v_2
procedure, pass(x) :: scal => z_oacc_scal
procedure, pass(x) :: nrm2 => z_oacc_nrm2
procedure, pass(x) :: amax => z_oacc_amax
@@ -62,17 +63,17 @@ module psb_z_oacc_vect_mod
end type psb_z_vect_oacc
interface
- subroutine z_oacc_mlt_v(x, y, info)
+ subroutine psb_z_oacc_mlt_v(x, y, info)
import
implicit none
class(psb_z_base_vect_type), intent(inout) :: x
class(psb_z_vect_oacc), intent(inout) :: y
integer(psb_ipk_), intent(out) :: info
- end subroutine z_oacc_mlt_v
+ end subroutine psb_z_oacc_mlt_v
end interface
interface
- subroutine z_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
+ subroutine psb_z_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
import
implicit none
complex(psb_dpk_), intent(in) :: alpha, beta
@@ -81,7 +82,7 @@ module psb_z_oacc_vect_mod
class(psb_z_vect_oacc), intent(inout) :: z
integer(psb_ipk_), intent(out) :: info
character(len=1), intent(in), optional :: conjgx, conjgy
- end subroutine z_oacc_mlt_v_2
+ end subroutine psb_z_oacc_mlt_v_2
end interface
contains
@@ -89,15 +90,23 @@ contains
subroutine z_oacc_absval1(x)
implicit none
class(psb_z_vect_oacc), intent(inout) :: x
- integer(psb_ipk_) :: n, i
+ integer(psb_ipk_) :: n
- if (x%is_host()) call x%sync_space()
+ if (x%is_host()) call x%sync()
n = size(x%v)
- !$acc parallel loop
- do i = 1, n
- x%v(i) = abs(x%v(i))
- end do
+ call z_inner_oacc_absval1(n,x%v)
call x%set_dev()
+ contains
+ subroutine z_inner_oacc_absval1(n,x)
+ implicit none
+ complex(psb_dpk_), intent(inout) :: x(:)
+ integer(psb_ipk_) :: n
+ integer(psb_ipk_) :: i
+ !$acc parallel loop
+ do i = 1, n
+ x(i) = abs(x(i))
+ end do
+ end subroutine z_inner_oacc_absval1
end subroutine z_oacc_absval1
subroutine z_oacc_absval2(x, y)
@@ -112,15 +121,23 @@ contains
class is (psb_z_vect_oacc)
if (x%is_host()) call x%sync()
if (yy%is_host()) call yy%sync()
- !$acc parallel loop
- do i = 1, n
- yy%v(i) = abs(x%v(i))
- end do
+ call z_inner_oacc_absval2(n,x%v,yy%v)
class default
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
call x%psb_z_base_vect_type%absval(y)
end select
+ contains
+ subroutine z_inner_oacc_absval2(n,x,y)
+ implicit none
+ complex(psb_dpk_), intent(inout) :: x(:),y(:)
+ integer(psb_ipk_) :: n
+ integer(psb_ipk_) :: i
+ !$acc parallel loop
+ do i = 1, n
+ y(i) = abs(x(i))
+ end do
+ end subroutine z_inner_oacc_absval2
end subroutine z_oacc_absval2
subroutine z_oacc_scal(alpha, x)
@@ -128,32 +145,46 @@ contains
class(psb_z_vect_oacc), intent(inout) :: x
complex(psb_dpk_), intent(in) :: alpha
integer(psb_ipk_) :: info
- integer(psb_ipk_) :: i
-
- if (x%is_host()) call x%sync_space()
- !$acc parallel loop
- do i = 1, size(x%v)
- x%v(i) = alpha * x%v(i)
- end do
+ if (x%is_host()) call x%sync()
+ call z_inner_oacc_scal(alpha, x%v)
call x%set_dev()
+ contains
+ subroutine z_inner_oacc_scal(alpha, x)
+ complex(psb_dpk_), intent(in) :: alpha
+ complex(psb_dpk_), intent(inout) :: x(:)
+ integer(psb_ipk_) :: i
+ !$acc parallel loop
+ do i = 1, size(x)
+ x(i) = alpha * x(i)
+ end do
+ end subroutine z_inner_oacc_scal
end subroutine z_oacc_scal
function z_oacc_nrm2(n, x) result(res)
implicit none
class(psb_z_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n
- real(psb_dpk_) :: res
+ real(psb_dpk_) :: res
+ real(psb_dpk_) :: mx
integer(psb_ipk_) :: info
- real(psb_dpk_) :: sum
- integer(psb_ipk_) :: i
- if (x%is_host()) call x%sync_space()
- sum = 0.0
- !$acc parallel loop reduction(+:sum)
- do i = 1, n
- sum = sum + abs(x%v(i))**2
- end do
- res = sqrt(sum)
+ if (x%is_host()) call x%sync()
+ mx = z_oacc_amax(n,x)
+ res = z_inner_oacc_nrm2(n, mx, x%v)
+ contains
+ function z_inner_oacc_nrm2(n, mx,x) result(res)
+ integer(psb_ipk_) :: n
+ complex(psb_dpk_) :: x(:)
+ real(psb_dpk_) :: mx, res
+ real(psb_dpk_) :: sum
+ integer(psb_ipk_) :: i
+ sum = 0.0
+ !$acc parallel loop reduction(+:sum)
+ do i = 1, n
+ sum = sum + abs(x(i)/mx)**2
+ end do
+ res = mx*sqrt(sum)
+ end function z_inner_oacc_nrm2
end function z_oacc_nrm2
function z_oacc_amax(n, x) result(res)
@@ -162,18 +193,25 @@ contains
integer(psb_ipk_), intent(in) :: n
real(psb_dpk_) :: res
integer(psb_ipk_) :: info
- real(psb_dpk_) :: max_val
- integer(psb_ipk_) :: i
- if (x%is_host()) call x%sync_space()
- max_val = -huge(0.0)
- !$acc parallel loop reduction(max:max_val)
- do i = 1, n
- if (abs(x%v(i)) > max_val) max_val = abs(x%v(i))
- end do
- res = max_val
+ if (x%is_host()) call x%sync()
+ res = z_inner_oacc_amax(n, x%v)
+ contains
+ function z_inner_oacc_amax(n, x) result(res)
+ integer(psb_ipk_) :: n
+ complex(psb_dpk_) :: x(:)
+ real(psb_dpk_) :: res
+ real(psb_dpk_) :: max_val
+ integer(psb_ipk_) :: i
+ max_val = -huge(0.0)
+ !$acc parallel loop reduction(max:max_val)
+ do i = 1, n
+ if (abs(x(i)) > max_val) max_val = abs(x(i))
+ end do
+ res = max_val
+ end function z_inner_oacc_amax
end function z_oacc_amax
-
+
function z_oacc_asum(n, x) result(res)
implicit none
class(psb_z_vect_oacc), intent(inout) :: x
@@ -182,14 +220,20 @@ contains
integer(psb_ipk_) :: info
complex(psb_dpk_) :: sum
integer(psb_ipk_) :: i
-
- if (x%is_host()) call x%sync_space()
- sum = 0.0
- !$acc parallel loop reduction(+:sum)
- do i = 1, n
- sum = sum + abs(x%v(i))
- end do
- res = sum
+ if (x%is_host()) call x%sync()
+ res = z_inner_oacc_asum(n, x%v)
+ contains
+ function z_inner_oacc_asum(n, x) result(res)
+ integer(psb_ipk_) :: n
+ complex(psb_dpk_) :: x(:)
+ real(psb_dpk_) :: res
+ integer(psb_ipk_) :: i
+ res = 0.0
+ !$acc parallel loop reduction(+:res)
+ do i = 1, n
+ res = res + abs(x(i))
+ end do
+ end function z_inner_oacc_asum
end function z_oacc_asum
@@ -201,7 +245,7 @@ contains
integer(psb_ipk_) :: i, n
info = 0
- if (y%is_dev()) call y%sync_space()
+ if (y%is_dev()) call y%sync()
!$acc parallel loop
do i = 1, size(x)
y%v(i) = y%v(i) * x(i)
@@ -219,7 +263,7 @@ contains
integer(psb_ipk_) :: i, n
info = 0
- if (z%is_dev()) call z%sync_space()
+ if (z%is_dev()) call z%sync()
!$acc parallel loop
do i = 1, size(x)
z%v(i) = alpha * x(i) * y(i) + beta * z%v(i)
@@ -282,18 +326,18 @@ contains
!!$ class is (psb_z_vect_oacc)
!!$ select type (yy => y)
!!$ class is (psb_z_vect_oacc)
-!!$ if (xx%is_host()) call xx%sync_space()
-!!$ if (yy%is_host()) call yy%sync_space()
-!!$ if ((beta /= zzero) .and. (z%is_host())) call z%sync_space()
+!!$ if (xx%is_host()) call xx%sync()
+!!$ if (yy%is_host()) call yy%sync()
+!!$ if ((beta /= zzero) .and. (z%is_host())) call z%sync()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
!!$ end do
!!$ call z%set_dev()
!!$ class default
-!!$ if (xx%is_dev()) call xx%sync_space()
+!!$ if (xx%is_dev()) call xx%sync()
!!$ if (yy%is_dev()) call yy%sync()
-!!$ if ((beta /= zzero) .and. (z%is_dev())) call z%sync_space()
+!!$ if ((beta /= zzero) .and. (z%is_dev())) call z%sync()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
@@ -303,7 +347,7 @@ contains
!!$ class default
!!$ if (x%is_dev()) call x%sync()
!!$ if (y%is_dev()) call y%sync()
-!!$ if ((beta /= zzero) .and. (z%is_dev())) call z%sync_space()
+!!$ if ((beta /= zzero) .and. (z%is_dev())) call z%sync()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i)
@@ -327,23 +371,36 @@ contains
select type(xx => x)
type is (psb_z_vect_oacc)
- if ((beta /= zzero) .and. y%is_host()) call y%sync_space()
- if (xx%is_host()) call xx%sync_space()
+ if ((beta /= zzero) .and. y%is_host()) call y%sync()
+ if (xx%is_host()) call xx%sync()
nx = size(xx%v)
ny = size(y%v)
if ((nx < m) .or. (ny < m)) then
info = psb_err_internal_error_
else
- !$acc parallel loop
- do i = 1, m
- y%v(i) = alpha * xx%v(i) + beta * y%v(i)
- end do
+ call z_inner_oacc_axpby(m, alpha, x%v, beta, y%v, info)
end if
call y%set_dev()
class default
if ((alpha /= zzero) .and. (x%is_dev())) call x%sync()
call y%axpby(m, alpha, x%v, beta, info)
- end select
+ end select
+ contains
+ subroutine z_inner_oacc_axpby(m, alpha, x, beta, y, info)
+ !use psi_serial_mod
+ implicit none
+ integer(psb_ipk_), intent(in) :: m
+ complex(psb_dpk_), intent(inout) :: x(:)
+ complex(psb_dpk_), intent(inout) :: y(:)
+ complex(psb_dpk_), intent(in) :: alpha, beta
+ integer(psb_ipk_), intent(out) :: info
+ !$acc parallel
+ !$acc loop
+ do i = 1, m
+ y(i) = alpha * x(i) + beta * y(i)
+ end do
+ !$acc end parallel
+ end subroutine z_inner_oacc_axpby
end subroutine z_oacc_axpby_v
subroutine z_oacc_axpby_a(m, alpha, x, beta, y, info)
@@ -356,7 +413,7 @@ contains
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i
- if ((beta /= zzero) .and. (y%is_dev())) call y%sync_space()
+ if ((beta /= zzero) .and. (y%is_dev())) call y%sync()
!$acc parallel loop
do i = 1, m
y%v(i) = alpha * x(i) + beta * y%v(i)
@@ -375,7 +432,7 @@ contains
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nx, ny, nz, i
logical :: gpu_done
-
+ write(0,*)'upd_xyz'
info = psb_success_
gpu_done = .false.
@@ -385,9 +442,9 @@ contains
class is (psb_z_vect_oacc)
select type(zz => z)
class is (psb_z_vect_oacc)
- if ((beta /= zzero) .and. yy%is_host()) call yy%sync_space()
- if ((delta /= zzero) .and. zz%is_host()) call zz%sync_space()
- if (xx%is_host()) call xx%sync_space()
+ if ((beta /= zzero) .and. yy%is_host()) call yy%sync()
+ if ((delta /= zzero) .and. zz%is_host()) call zz%sync()
+ if (xx%is_host()) call xx%sync()
nx = size(xx%v)
ny = size(yy%v)
nz = size(zz%v)
@@ -432,8 +489,8 @@ contains
select type(ii => idx)
class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space()
- if (y%is_host()) call y%sync_space()
+ if (ii%is_host()) call ii%sync()
+ if (y%is_host()) call y%sync()
!$acc parallel loop
do i = 1, n
@@ -459,13 +516,13 @@ contains
select type(ii => idx)
class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space()
+ if (ii%is_host()) call ii%sync()
class default
call psb_errpush(info, 'z_oacc_sctb_x')
return
end select
- if (y%is_host()) call y%sync_space()
+ if (y%is_host()) call y%sync()
!$acc parallel loop
do i = 1, n
@@ -486,7 +543,7 @@ contains
integer(psb_ipk_) :: i
if (n == 0) return
- if (y%is_dev()) call y%sync_space()
+ if (y%is_dev()) call y%sync()
!$acc parallel loop
do i = 1, n
@@ -512,13 +569,13 @@ contains
select type(ii => idx)
class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space()
+ if (ii%is_host()) call ii%sync()
class default
call psb_errpush(info, 'z_oacc_gthzbuf')
return
end select
- if (x%is_host()) call x%sync_space()
+ if (x%is_host()) call x%sync()
!$acc parallel loop
do i = 1, n
@@ -539,13 +596,13 @@ contains
select type(ii => idx)
class is (psb_i_vect_oacc)
- if (ii%is_host()) call ii%sync_space()
+ if (ii%is_host()) call ii%sync()
class default
call psb_errpush(info, 'z_oacc_gthzv_x')
return
end select
- if (x%is_host()) call x%sync_space()
+ if (x%is_host()) call x%sync()
!$acc parallel loop
do i = 1, n
@@ -573,9 +630,9 @@ contains
type is (psb_i_vect_oacc)
select type(vval => val)
type is (psb_z_vect_oacc)
- if (vval%is_host()) call vval%sync_space()
- if (virl%is_host()) call virl%sync_space()
- if (x%is_host()) call x%sync_space()
+ if (vval%is_host()) call vval%sync()
+ if (virl%is_host()) call virl%sync()
+ if (x%is_host()) call x%sync()
!$acc parallel loop
do i = 1, n
x%v(virl%v(i)) = vval%v(i)
@@ -588,11 +645,11 @@ contains
if (.not.done_oacc) then
select type(virl => irl)
type is (psb_i_vect_oacc)
- if (virl%is_dev()) call virl%sync_space()
+ if (virl%is_dev()) call virl%sync()
end select
select type(vval => val)
type is (psb_z_vect_oacc)
- if (vval%is_dev()) call vval%sync_space()
+ if (vval%is_dev()) call vval%sync()
end select
call x%ins(n, irl%v, val%v, dupl, info)
end if
@@ -616,7 +673,7 @@ contains
integer(psb_ipk_) :: i
info = 0
- if (x%is_dev()) call x%sync_space()
+ if (x%is_dev()) call x%sync()
call x%psb_z_base_vect_type%ins(n, irl, val, dupl, info)
call x%set_host()
!$acc update device(x%v)
@@ -635,7 +692,10 @@ contains
call psb_errpush(info, 'z_oacc_bld_mn', i_err=(/n, n, n, n, n/))
end if
call x%set_host()
- !$acc update device(x%v)
+ if (acc_is_present(x%v)) then
+ !$acc exit data delete(x%v) finalize
+ end if
+ !$acc enter data copyin(x%v)
end subroutine z_oacc_bld_mn
@@ -657,7 +717,10 @@ contains
x%v(:) = this(:)
call x%set_host()
- !$acc update device(x%v)
+ if (acc_is_present(x%v)) then
+ !$acc exit data delete(x%v) finalize
+ end if
+ !$acc enter data copyin(x%v)
end subroutine z_oacc_bld_x
@@ -676,13 +739,13 @@ contains
if (nd < n) then
call x%sync()
call x%psb_z_base_vect_type%asb(n, info)
- if (info == psb_success_) call x%sync_space()
+ if (info == psb_success_) call x%sync()
call x%set_host()
end if
else
if (size(x%v) < n) then
call x%psb_z_base_vect_type%asb(n, info)
- if (info == psb_success_) call x%sync_space()
+ if (info == psb_success_) call x%sync()
call x%set_host()
end if
end if
@@ -740,10 +803,9 @@ contains
complex(psb_dpk_) :: res
complex(psb_dpk_), external :: ddot
integer(psb_ipk_) :: info
- integer(psb_ipk_) :: i
res = zzero
-
+ !write(0,*) 'dot_v'
select type(yy => y)
type is (psb_z_base_vect_type)
if (x%is_dev()) call x%sync()
@@ -751,18 +813,26 @@ contains
type is (psb_z_vect_oacc)
if (x%is_host()) call x%sync()
if (yy%is_host()) call yy%sync()
-
- !$acc parallel loop reduction(+:res) present(x%v, yy%v)
- do i = 1, n
- res = res + x%v(i) * yy%v(i)
- end do
- !$acc end parallel loop
-
+ res = z_inner_oacc_dot(n, x%v, yy%v)
class default
call x%sync()
res = y%dot(n, x%v)
end select
-
+ contains
+ function z_inner_oacc_dot(n, x, y) result(res)
+ implicit none
+ complex(psb_dpk_), intent(in) :: x(:)
+ complex(psb_dpk_), intent(in) :: y(:)
+ integer(psb_ipk_), intent(in) :: n
+ complex(psb_dpk_) :: res
+ integer(psb_ipk_) :: i
+
+ !$acc parallel loop reduction(+:res) present(x, y)
+ do i = 1, n
+ res = res + x(i) * y(i)
+ end do
+ !$acc end parallel loop
+ end function z_inner_oacc_dot
end function z_oacc_vect_dot
function z_oacc_dot_a(n, x, y) result(res)
@@ -808,7 +878,7 @@ contains
implicit none
class(psb_z_vect_oacc), intent(inout) :: x
if (allocated(x%v)) then
- call z_oacc_create_dev(x%v)
+ if (.not.acc_is_present(x%v)) call z_oacc_create_dev(x%v)
end if
contains
subroutine z_oacc_create_dev(v)
@@ -886,6 +956,9 @@ contains
call psb_realloc(n, x%v, info)
if (info == 0) then
call x%set_host()
+ if (acc_is_present(x%v)) then
+ !$acc exit data delete(x%v) finalize
+ end if
!$acc enter data create(x%v)
call x%sync_space()
end if
@@ -902,7 +975,9 @@ contains
integer(psb_ipk_), intent(out) :: info
info = 0
if (allocated(x%v)) then
- !$acc exit data delete(x%v) finalize
+ if (acc_is_present(x%v)) then
+ !$acc exit data delete(x%v) finalize
+ end if
deallocate(x%v, stat=info)
end if
From 95c546aaddeb4280de1b768d7de4f12accc1c336 Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Mon, 26 Aug 2024 08:22:35 +0200
Subject: [PATCH 33/86] Fix OpenACC version of ELL vect_mv
---
openacc/impl/psb_c_oacc_ell_vect_mv.F90 | 13 +++++++------
openacc/impl/psb_d_oacc_ell_vect_mv.F90 | 13 +++++++------
openacc/impl/psb_s_oacc_ell_vect_mv.F90 | 13 +++++++------
openacc/impl/psb_z_oacc_ell_vect_mv.F90 | 13 +++++++------
4 files changed, 28 insertions(+), 24 deletions(-)
diff --git a/openacc/impl/psb_c_oacc_ell_vect_mv.F90 b/openacc/impl/psb_c_oacc_ell_vect_mv.F90
index e65d00ba..f3e78d98 100644
--- a/openacc/impl/psb_c_oacc_ell_vect_mv.F90
+++ b/openacc/impl/psb_c_oacc_ell_vect_mv.F90
@@ -10,13 +10,13 @@ contains
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
- integer(psb_ipk_) :: m, n, nzt
+ integer(psb_ipk_) :: m, n, nzt, nc
info = psb_success_
m = a%get_nrows()
n = a%get_ncols()
nzt = a%nzt
-
+ nc = size(a%ja,2)
if ((n /= size(x%v)) .or. (m /= size(y%v))) then
write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_
@@ -27,14 +27,15 @@ contains
if (x%is_host()) call x%sync()
if (y%is_host()) call y%sync()
- call inner_spmv(m, n, nzt, alpha, a%val, a%ja, x%v, beta, y%v, info)
+ call inner_spmv(m, n, nc, alpha, a%val, a%ja, x%v, beta, y%v, info)
+
call y%set_dev()
contains
- subroutine inner_spmv(m, n, nzt, alpha, val, ja, x, beta, y, info)
+ subroutine inner_spmv(m, n, nc, alpha, val, ja, x, beta, y, info)
implicit none
- integer(psb_ipk_) :: m, n, nzt
+ integer(psb_ipk_) :: m, n, nc
complex(psb_spk_), intent(in) :: alpha, beta
complex(psb_spk_) :: val(:,:), x(:), y(:)
integer(psb_ipk_) :: ja(:,:)
@@ -52,7 +53,7 @@ contains
do i = ii, ii + isz - 1
tmp = 0.0_psb_dpk_
!$acc loop seq
- do j = 1, nzt
+ do j = 1, nc
if (ja(i,j) > 0) then
tmp = tmp + val(i,j) * x(ja(i,j))
end if
diff --git a/openacc/impl/psb_d_oacc_ell_vect_mv.F90 b/openacc/impl/psb_d_oacc_ell_vect_mv.F90
index ebcdb405..4239d049 100644
--- a/openacc/impl/psb_d_oacc_ell_vect_mv.F90
+++ b/openacc/impl/psb_d_oacc_ell_vect_mv.F90
@@ -10,13 +10,13 @@ contains
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
- integer(psb_ipk_) :: m, n, nzt
+ integer(psb_ipk_) :: m, n, nzt, nc
info = psb_success_
m = a%get_nrows()
n = a%get_ncols()
nzt = a%nzt
-
+ nc = size(a%ja,2)
if ((n /= size(x%v)) .or. (m /= size(y%v))) then
write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_
@@ -27,14 +27,15 @@ contains
if (x%is_host()) call x%sync()
if (y%is_host()) call y%sync()
- call inner_spmv(m, n, nzt, alpha, a%val, a%ja, x%v, beta, y%v, info)
+ call inner_spmv(m, n, nc, alpha, a%val, a%ja, x%v, beta, y%v, info)
+
call y%set_dev()
contains
- subroutine inner_spmv(m, n, nzt, alpha, val, ja, x, beta, y, info)
+ subroutine inner_spmv(m, n, nc, alpha, val, ja, x, beta, y, info)
implicit none
- integer(psb_ipk_) :: m, n, nzt
+ integer(psb_ipk_) :: m, n, nc
real(psb_dpk_), intent(in) :: alpha, beta
real(psb_dpk_) :: val(:,:), x(:), y(:)
integer(psb_ipk_) :: ja(:,:)
@@ -52,7 +53,7 @@ contains
do i = ii, ii + isz - 1
tmp = 0.0_psb_dpk_
!$acc loop seq
- do j = 1, nzt
+ do j = 1, nc
if (ja(i,j) > 0) then
tmp = tmp + val(i,j) * x(ja(i,j))
end if
diff --git a/openacc/impl/psb_s_oacc_ell_vect_mv.F90 b/openacc/impl/psb_s_oacc_ell_vect_mv.F90
index f48ba041..bbbdd6a3 100644
--- a/openacc/impl/psb_s_oacc_ell_vect_mv.F90
+++ b/openacc/impl/psb_s_oacc_ell_vect_mv.F90
@@ -10,13 +10,13 @@ contains
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
- integer(psb_ipk_) :: m, n, nzt
+ integer(psb_ipk_) :: m, n, nzt, nc
info = psb_success_
m = a%get_nrows()
n = a%get_ncols()
nzt = a%nzt
-
+ nc = size(a%ja,2)
if ((n /= size(x%v)) .or. (m /= size(y%v))) then
write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_
@@ -27,14 +27,15 @@ contains
if (x%is_host()) call x%sync()
if (y%is_host()) call y%sync()
- call inner_spmv(m, n, nzt, alpha, a%val, a%ja, x%v, beta, y%v, info)
+ call inner_spmv(m, n, nc, alpha, a%val, a%ja, x%v, beta, y%v, info)
+
call y%set_dev()
contains
- subroutine inner_spmv(m, n, nzt, alpha, val, ja, x, beta, y, info)
+ subroutine inner_spmv(m, n, nc, alpha, val, ja, x, beta, y, info)
implicit none
- integer(psb_ipk_) :: m, n, nzt
+ integer(psb_ipk_) :: m, n, nc
real(psb_spk_), intent(in) :: alpha, beta
real(psb_spk_) :: val(:,:), x(:), y(:)
integer(psb_ipk_) :: ja(:,:)
@@ -52,7 +53,7 @@ contains
do i = ii, ii + isz - 1
tmp = 0.0_psb_dpk_
!$acc loop seq
- do j = 1, nzt
+ do j = 1, nc
if (ja(i,j) > 0) then
tmp = tmp + val(i,j) * x(ja(i,j))
end if
diff --git a/openacc/impl/psb_z_oacc_ell_vect_mv.F90 b/openacc/impl/psb_z_oacc_ell_vect_mv.F90
index ecb61adf..ad8460b8 100644
--- a/openacc/impl/psb_z_oacc_ell_vect_mv.F90
+++ b/openacc/impl/psb_z_oacc_ell_vect_mv.F90
@@ -10,13 +10,13 @@ contains
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
- integer(psb_ipk_) :: m, n, nzt
+ integer(psb_ipk_) :: m, n, nzt, nc
info = psb_success_
m = a%get_nrows()
n = a%get_ncols()
nzt = a%nzt
-
+ nc = size(a%ja,2)
if ((n /= size(x%v)) .or. (m /= size(y%v))) then
write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_
@@ -27,14 +27,15 @@ contains
if (x%is_host()) call x%sync()
if (y%is_host()) call y%sync()
- call inner_spmv(m, n, nzt, alpha, a%val, a%ja, x%v, beta, y%v, info)
+ call inner_spmv(m, n, nc, alpha, a%val, a%ja, x%v, beta, y%v, info)
+
call y%set_dev()
contains
- subroutine inner_spmv(m, n, nzt, alpha, val, ja, x, beta, y, info)
+ subroutine inner_spmv(m, n, nc, alpha, val, ja, x, beta, y, info)
implicit none
- integer(psb_ipk_) :: m, n, nzt
+ integer(psb_ipk_) :: m, n, nc
complex(psb_dpk_), intent(in) :: alpha, beta
complex(psb_dpk_) :: val(:,:), x(:), y(:)
integer(psb_ipk_) :: ja(:,:)
@@ -52,7 +53,7 @@ contains
do i = ii, ii + isz - 1
tmp = 0.0_psb_dpk_
!$acc loop seq
- do j = 1, nzt
+ do j = 1, nc
if (ja(i,j) > 0) then
tmp = tmp + val(i,j) * x(ja(i,j))
end if
From 479135c62d99319b3c6c1123b45302976f4cceb2 Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Thu, 29 Aug 2024 16:36:24 +0200
Subject: [PATCH 34/86] Merge some changes from V4
---
openacc/impl/psb_c_oacc_csr_cp_from_coo.F90 | 7 +-
openacc/impl/psb_c_oacc_csr_cp_from_fmt.F90 | 6 +-
openacc/impl/psb_c_oacc_csr_mv_from_coo.F90 | 6 +-
openacc/impl/psb_c_oacc_csr_mv_from_fmt.F90 | 6 +-
openacc/impl/psb_c_oacc_csr_vect_mv.F90 | 4 +-
openacc/impl/psb_c_oacc_ell_cp_from_coo.F90 | 61 +----
openacc/impl/psb_c_oacc_ell_cp_from_fmt.F90 | 6 +-
openacc/impl/psb_c_oacc_ell_mv_from_coo.F90 | 8 +-
openacc/impl/psb_c_oacc_ell_mv_from_fmt.F90 | 6 +-
openacc/impl/psb_c_oacc_hll_cp_from_coo.F90 | 72 +-----
openacc/impl/psb_c_oacc_hll_cp_from_fmt.F90 | 6 +-
openacc/impl/psb_c_oacc_hll_mv_from_coo.F90 | 6 +-
openacc/impl/psb_c_oacc_hll_mv_from_fmt.F90 | 7 +-
openacc/impl/psb_c_oacc_hll_vect_mv.F90 | 32 +--
openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 | 7 +-
openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 | 6 +-
openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 | 6 +-
openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 | 6 +-
openacc/impl/psb_d_oacc_csr_vect_mv.F90 | 4 +-
openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 | 61 +----
openacc/impl/psb_d_oacc_ell_cp_from_fmt.F90 | 6 +-
openacc/impl/psb_d_oacc_ell_mv_from_coo.F90 | 8 +-
openacc/impl/psb_d_oacc_ell_mv_from_fmt.F90 | 6 +-
openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 | 72 +-----
openacc/impl/psb_d_oacc_hll_cp_from_fmt.F90 | 6 +-
openacc/impl/psb_d_oacc_hll_mv_from_coo.F90 | 6 +-
openacc/impl/psb_d_oacc_hll_mv_from_fmt.F90 | 7 +-
openacc/impl/psb_d_oacc_hll_vect_mv.F90 | 32 +--
openacc/impl/psb_s_oacc_csr_cp_from_coo.F90 | 7 +-
openacc/impl/psb_s_oacc_csr_cp_from_fmt.F90 | 6 +-
openacc/impl/psb_s_oacc_csr_mv_from_coo.F90 | 6 +-
openacc/impl/psb_s_oacc_csr_mv_from_fmt.F90 | 6 +-
openacc/impl/psb_s_oacc_csr_vect_mv.F90 | 4 +-
openacc/impl/psb_s_oacc_ell_cp_from_coo.F90 | 61 +----
openacc/impl/psb_s_oacc_ell_cp_from_fmt.F90 | 6 +-
openacc/impl/psb_s_oacc_ell_mv_from_coo.F90 | 8 +-
openacc/impl/psb_s_oacc_ell_mv_from_fmt.F90 | 6 +-
openacc/impl/psb_s_oacc_hll_cp_from_coo.F90 | 72 +-----
openacc/impl/psb_s_oacc_hll_cp_from_fmt.F90 | 6 +-
openacc/impl/psb_s_oacc_hll_mv_from_coo.F90 | 6 +-
openacc/impl/psb_s_oacc_hll_mv_from_fmt.F90 | 7 +-
openacc/impl/psb_s_oacc_hll_vect_mv.F90 | 32 +--
openacc/impl/psb_z_oacc_csr_cp_from_coo.F90 | 7 +-
openacc/impl/psb_z_oacc_csr_cp_from_fmt.F90 | 6 +-
openacc/impl/psb_z_oacc_csr_mv_from_coo.F90 | 6 +-
openacc/impl/psb_z_oacc_csr_mv_from_fmt.F90 | 6 +-
openacc/impl/psb_z_oacc_csr_vect_mv.F90 | 4 +-
openacc/impl/psb_z_oacc_ell_cp_from_coo.F90 | 61 +----
openacc/impl/psb_z_oacc_ell_cp_from_fmt.F90 | 6 +-
openacc/impl/psb_z_oacc_ell_mv_from_coo.F90 | 8 +-
openacc/impl/psb_z_oacc_ell_mv_from_fmt.F90 | 6 +-
openacc/impl/psb_z_oacc_hll_cp_from_coo.F90 | 72 +-----
openacc/impl/psb_z_oacc_hll_cp_from_fmt.F90 | 6 +-
openacc/impl/psb_z_oacc_hll_mv_from_coo.F90 | 6 +-
openacc/impl/psb_z_oacc_hll_mv_from_fmt.F90 | 7 +-
openacc/impl/psb_z_oacc_hll_vect_mv.F90 | 32 +--
openacc/psb_c_oacc_csr_mat_mod.F90 | 102 +++-----
openacc/psb_c_oacc_ell_mat_mod.F90 | 133 +++-------
openacc/psb_c_oacc_hll_mat_mod.F90 | 133 +++-------
openacc/psb_c_oacc_vect_mod.F90 | 253 ++++++--------------
openacc/psb_d_oacc_csr_mat_mod.F90 | 102 +++-----
openacc/psb_d_oacc_ell_mat_mod.F90 | 133 +++-------
openacc/psb_d_oacc_hll_mat_mod.F90 | 133 +++-------
openacc/psb_d_oacc_vect_mod.F90 | 253 ++++++--------------
openacc/psb_i_oacc_vect_mod.F90 | 85 ++-----
openacc/psb_l_oacc_vect_mod.F90 | 85 ++-----
openacc/psb_s_oacc_csr_mat_mod.F90 | 102 +++-----
openacc/psb_s_oacc_ell_mat_mod.F90 | 133 +++-------
openacc/psb_s_oacc_hll_mat_mod.F90 | 133 +++-------
openacc/psb_s_oacc_vect_mod.F90 | 253 ++++++--------------
openacc/psb_z_oacc_csr_mat_mod.F90 | 102 +++-----
openacc/psb_z_oacc_ell_mat_mod.F90 | 133 +++-------
openacc/psb_z_oacc_hll_mat_mod.F90 | 133 +++-------
openacc/psb_z_oacc_vect_mod.F90 | 253 ++++++--------------
74 files changed, 974 insertions(+), 2612 deletions(-)
diff --git a/openacc/impl/psb_c_oacc_csr_cp_from_coo.F90 b/openacc/impl/psb_c_oacc_csr_cp_from_coo.F90
index 70380c95..a411cc6a 100644
--- a/openacc/impl/psb_c_oacc_csr_cp_from_coo.F90
+++ b/openacc/impl/psb_c_oacc_csr_cp_from_coo.F90
@@ -10,11 +10,12 @@ contains
info = psb_success_
+ call a%free_space()
call a%psb_c_csr_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
-
- call a%set_dev()
- if (info /= 0) goto 9999
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
return
diff --git a/openacc/impl/psb_c_oacc_csr_cp_from_fmt.F90 b/openacc/impl/psb_c_oacc_csr_cp_from_fmt.F90
index 7e664791..a8cd93a0 100644
--- a/openacc/impl/psb_c_oacc_csr_cp_from_fmt.F90
+++ b/openacc/impl/psb_c_oacc_csr_cp_from_fmt.F90
@@ -14,10 +14,12 @@ contains
type is (psb_c_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
+ call a%free_space()
call a%psb_c_csr_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
-
- !$acc update device(a%val, a%ja, a%irp)
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
end select
end subroutine psb_c_oacc_csr_cp_from_fmt
diff --git a/openacc/impl/psb_c_oacc_csr_mv_from_coo.F90 b/openacc/impl/psb_c_oacc_csr_mv_from_coo.F90
index f8c5c39d..30691030 100644
--- a/openacc/impl/psb_c_oacc_csr_mv_from_coo.F90
+++ b/openacc/impl/psb_c_oacc_csr_mv_from_coo.F90
@@ -10,10 +10,12 @@ contains
info = psb_success_
+ call a%free_space()
call a%psb_c_csr_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
-
- !$acc update device(a%val, a%ja, a%irp)
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
return
diff --git a/openacc/impl/psb_c_oacc_csr_mv_from_fmt.F90 b/openacc/impl/psb_c_oacc_csr_mv_from_fmt.F90
index 7ba971b4..b37011c0 100644
--- a/openacc/impl/psb_c_oacc_csr_mv_from_fmt.F90
+++ b/openacc/impl/psb_c_oacc_csr_mv_from_fmt.F90
@@ -14,10 +14,12 @@ contains
type is (psb_c_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
+ call a%free_space()
call a%psb_c_csr_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
-
- !$acc update device(a%val, a%ja, a%irp)
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
end select
end subroutine psb_c_oacc_csr_mv_from_fmt
diff --git a/openacc/impl/psb_c_oacc_csr_vect_mv.F90 b/openacc/impl/psb_c_oacc_csr_vect_mv.F90
index 0fd1ed35..db56d9fc 100644
--- a/openacc/impl/psb_c_oacc_csr_vect_mv.F90
+++ b/openacc/impl/psb_c_oacc_csr_vect_mv.F90
@@ -16,8 +16,8 @@ contains
m = a%get_nrows()
n = a%get_ncols()
- if ((n /= size(x%v)) .or. (n /= size(y%v))) then
- write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
+ if ((n /= size(x%v)) .or. (m /= size(y%v))) then
+ write(0,*) 'ocsrmv Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_
return
end if
diff --git a/openacc/impl/psb_c_oacc_ell_cp_from_coo.F90 b/openacc/impl/psb_c_oacc_ell_cp_from_coo.F90
index 7bd17478..836874fe 100644
--- a/openacc/impl/psb_c_oacc_ell_cp_from_coo.F90
+++ b/openacc/impl/psb_c_oacc_ell_cp_from_coo.F90
@@ -8,65 +8,14 @@ contains
class(psb_c_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
- integer(psb_ipk_) :: i, j, k, row, col, nz_per_row
- complex(psb_spk_) :: value
- integer(psb_ipk_), allocatable :: row_counts(:)
- integer(psb_ipk_) :: hacksize, nza
info = psb_success_
- hacksize = 1
-
- call a%set_nrows(b%get_nrows())
- call a%set_ncols(b%get_ncols())
- nz_per_row = a%nzt
-
- if (.not.allocated(a%val)) then
- allocate(a%val(a%get_nrows(), nz_per_row))
- allocate(a%ja(a%get_nrows(), nz_per_row))
- allocate(a%irn(a%get_nrows()))
- allocate(a%idiag(a%get_nrows()))
- end if
- a%val = czero
- a%ja = -1
- a%irn = 0
- a%idiag = 0
-
- allocate(row_counts(a%get_nrows()))
- row_counts = 0
-
- nza = b%get_nzeros()
-
- !$acc parallel loop present(b, a, row_counts)
- do k = 1, nza
- row = b%ia(k)
- col = b%ja(k)
- value = b%val(k)
- if (row_counts(row) < nz_per_row) then
- a%val(row, row_counts(row) + 1) = value
- a%ja(row, row_counts(row) + 1) = col
- row_counts(row) = row_counts(row) + 1
- else
- info = psb_err_invalid_mat_state_
- !goto 9999
- end if
- end do
-
- a%irn = row_counts
-
- !$acc parallel loop present(a)
- do i = 1, a%get_nrows()
- do j = 1, nz_per_row
- if (a%ja(i, j) == i) then
- a%idiag(i) = j
- exit
- end if
- end do
- end do
-
- deallocate(row_counts)
-
- call a%set_dev()
+ call a%free_space()
+ call a%psb_c_ell_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
return
diff --git a/openacc/impl/psb_c_oacc_ell_cp_from_fmt.F90 b/openacc/impl/psb_c_oacc_ell_cp_from_fmt.F90
index 7ee231c5..31d6c4b4 100644
--- a/openacc/impl/psb_c_oacc_ell_cp_from_fmt.F90
+++ b/openacc/impl/psb_c_oacc_ell_cp_from_fmt.F90
@@ -14,10 +14,12 @@ contains
type is (psb_c_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
+ call a%free_space()
call a%psb_c_ell_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
-
- !$acc update device(a%val, a%ja, a%irn, a%idiag)
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
end select
end subroutine psb_c_oacc_ell_cp_from_fmt
diff --git a/openacc/impl/psb_c_oacc_ell_mv_from_coo.F90 b/openacc/impl/psb_c_oacc_ell_mv_from_coo.F90
index 7e703aa2..1ca43435 100644
--- a/openacc/impl/psb_c_oacc_ell_mv_from_coo.F90
+++ b/openacc/impl/psb_c_oacc_ell_mv_from_coo.F90
@@ -9,11 +9,13 @@ contains
integer(psb_ipk_), intent(out) :: info
info = psb_success_
-
+
+ call a%free_space()
call a%psb_c_ell_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
-
- !$acc update device(a%val, a%ja, a%irn, a%idiag)
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
return
diff --git a/openacc/impl/psb_c_oacc_ell_mv_from_fmt.F90 b/openacc/impl/psb_c_oacc_ell_mv_from_fmt.F90
index 7d1f790d..95798429 100644
--- a/openacc/impl/psb_c_oacc_ell_mv_from_fmt.F90
+++ b/openacc/impl/psb_c_oacc_ell_mv_from_fmt.F90
@@ -14,10 +14,12 @@ contains
type is (psb_c_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
+ call a%free_space()
call a%psb_c_ell_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
-
- !$acc update device(a%val, a%ja, a%irn, a%idiag)
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
end select
end subroutine psb_c_oacc_ell_mv_from_fmt
diff --git a/openacc/impl/psb_c_oacc_hll_cp_from_coo.F90 b/openacc/impl/psb_c_oacc_hll_cp_from_coo.F90
index 4c12cdf8..32391cc8 100644
--- a/openacc/impl/psb_c_oacc_hll_cp_from_coo.F90
+++ b/openacc/impl/psb_c_oacc_hll_cp_from_coo.F90
@@ -2,78 +2,20 @@ submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_cp_from_coo_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_hll_cp_from_coo(a, b, info)
- implicit none
+ implicit none
class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
- class(psb_c_coo_sparse_mat), intent(in) :: b
+ class(psb_c_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
- integer(psb_ipk_) :: i, j, k, row, col, nz_per_row
- complex(psb_spk_) :: value
- integer(psb_ipk_), allocatable :: row_counts(:)
- integer(psb_ipk_) :: hacksize, nza
-
info = psb_success_
- hacksize = 32 ! Assuming a default hack size of 32
-
- call a%set_nrows(b%get_nrows())
- call a%set_ncols(b%get_ncols())
- nz_per_row = a%nzt
-
- if (.not.allocated(a%val)) then
- allocate(a%val(nz_per_row * a%get_nrows()))
- allocate(a%ja(nz_per_row * a%get_nrows()))
- allocate(a%irn(a%get_nrows()))
- allocate(a%idiag(a%get_nrows()))
- allocate(a%hkoffs((a%get_nrows() + hacksize - 1) / hacksize))
- end if
- a%val = czero
- a%ja = -1
- a%irn = 0
- a%idiag = 0
-
- allocate(row_counts(a%get_nrows()))
- row_counts = 0
-
- nza = b%get_nzeros()
-
- !$acc parallel loop present(b, a, row_counts)
- do k = 1, nza
- row = b%ia(k)
- col = b%ja(k)
- value = b%val(k)
- if (row_counts(row) < nz_per_row) then
- a%val(row_counts(row) + 1 + (row - 1) * nz_per_row) = value
- a%ja(row_counts(row) + 1 + (row - 1) * nz_per_row) = col
- row_counts(row) = row_counts(row) + 1
- else
- info = psb_err_invalid_mat_state_
- !goto 9999
- end if
- end do
-
- a%irn = row_counts
-
- !$acc parallel loop present(a)
- do i = 1, a%get_nrows()
- do j = 1, nz_per_row
- if (a%ja(j + (i - 1) * nz_per_row) == i) then
- a%idiag(i) = j
- exit
- end if
- end do
- end do
-
- ! Calculate hkoffs for HLL format
- !$acc parallel loop present(a)
- do i = 1, size(a%hkoffs)
- a%hkoffs(i) = (i - 1) * hacksize
- end do
-
- deallocate(row_counts)
- call a%set_dev()
+ call a%free_space()
+ call a%psb_c_hll_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
return
diff --git a/openacc/impl/psb_c_oacc_hll_cp_from_fmt.F90 b/openacc/impl/psb_c_oacc_hll_cp_from_fmt.F90
index af6cc1d5..e442b668 100644
--- a/openacc/impl/psb_c_oacc_hll_cp_from_fmt.F90
+++ b/openacc/impl/psb_c_oacc_hll_cp_from_fmt.F90
@@ -14,10 +14,12 @@ contains
type is (psb_c_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
+ call a%free_space()
call a%psb_c_hll_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
-
- !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs)
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
end select
end subroutine psb_c_oacc_hll_cp_from_fmt
diff --git a/openacc/impl/psb_c_oacc_hll_mv_from_coo.F90 b/openacc/impl/psb_c_oacc_hll_mv_from_coo.F90
index dec52d40..30d723fe 100644
--- a/openacc/impl/psb_c_oacc_hll_mv_from_coo.F90
+++ b/openacc/impl/psb_c_oacc_hll_mv_from_coo.F90
@@ -10,10 +10,12 @@ contains
info = psb_success_
+ call a%free_space()
call a%psb_c_hll_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
-
- !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs)
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
return
diff --git a/openacc/impl/psb_c_oacc_hll_mv_from_fmt.F90 b/openacc/impl/psb_c_oacc_hll_mv_from_fmt.F90
index f2a064cb..0ac69af8 100644
--- a/openacc/impl/psb_c_oacc_hll_mv_from_fmt.F90
+++ b/openacc/impl/psb_c_oacc_hll_mv_from_fmt.F90
@@ -14,11 +14,12 @@ contains
type is (psb_c_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
+ call a%free_space()
call a%psb_c_hll_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
-
- !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs)
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
end select
-
end subroutine psb_c_oacc_hll_mv_from_fmt
end submodule psb_c_oacc_hll_mv_from_fmt_impl
diff --git a/openacc/impl/psb_c_oacc_hll_vect_mv.F90 b/openacc/impl/psb_c_oacc_hll_vect_mv.F90
index 3b74d11a..68141e42 100644
--- a/openacc/impl/psb_c_oacc_hll_vect_mv.F90
+++ b/openacc/impl/psb_c_oacc_hll_vect_mv.F90
@@ -40,28 +40,28 @@ contains
complex(psb_spk_) :: val(:), x(:), y(:)
integer(psb_ipk_) :: ja(:), hkoffs(:)
integer(psb_ipk_), intent(out) :: info
- integer(psb_ipk_) :: i, j, idx, k
+ integer(psb_ipk_) :: i, j, idx, k, ipnt,ir,nr,nlc,isz,ii
complex(psb_spk_) :: tmp
info = 0
-
- !$acc parallel loop present(val, ja, hkoffs, x, y)
+ !$acc parallel loop private(nlc, isz,ir,nr)
do i = 1, nhacks
- do k = 0, hksz - 1
- idx = hkoffs(i) + k
- if (idx <= hkoffs(i + 1) - 1) then
- tmp = 0.0_psb_dpk_
- !$acc loop seq
- do j = hkoffs(i) + k, hkoffs(i + 1) - 1, hksz
- if (ja(j) > 0) then
- tmp = tmp + val(j) * x(ja(j))
- end if
- end do
- y(k + 1 + (i - 1) * hksz) = alpha * tmp + beta * y(k + 1 + (i - 1) * hksz)
- end if
+ isz = hkoffs(i + 1) - hkoffs(i)
+ nlc = isz/hksz
+ ir = (i-1)*hksz
+ nr = min(hksz,m-ir)
+ !$acc loop independent private(tmp,ii,ipnt)
+ do ii = 1, nr
+ ipnt = hkoffs(i) + ii
+ tmp = czero
+ !$acc loop seq
+ do j = 1, nlc
+ tmp = tmp + val(ipnt) * x(ja(ipnt))
+ ipnt = ipnt + hksz
+ end do
+ y(ii+ir) = alpha * tmp + beta * y(ii+ir)
end do
end do
end subroutine inner_spmv
-
end subroutine psb_c_oacc_hll_vect_mv
end submodule psb_c_oacc_hll_vect_mv_impl
diff --git a/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 b/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90
index ec92f618..50da7692 100644
--- a/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90
+++ b/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90
@@ -10,11 +10,12 @@ contains
info = psb_success_
+ call a%free_space()
call a%psb_d_csr_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
-
- call a%set_dev()
- if (info /= 0) goto 9999
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
return
diff --git a/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 b/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90
index 37541ea9..0d35e247 100644
--- a/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90
+++ b/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90
@@ -14,10 +14,12 @@ contains
type is (psb_d_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
+ call a%free_space()
call a%psb_d_csr_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
-
- !$acc update device(a%val, a%ja, a%irp)
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
end select
end subroutine psb_d_oacc_csr_cp_from_fmt
diff --git a/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 b/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90
index 2ed9b032..97fa07d1 100644
--- a/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90
+++ b/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90
@@ -10,10 +10,12 @@ contains
info = psb_success_
+ call a%free_space()
call a%psb_d_csr_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
-
- !$acc update device(a%val, a%ja, a%irp)
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
return
diff --git a/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 b/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90
index 197ed911..e992f41a 100644
--- a/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90
+++ b/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90
@@ -14,10 +14,12 @@ contains
type is (psb_d_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
+ call a%free_space()
call a%psb_d_csr_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
-
- !$acc update device(a%val, a%ja, a%irp)
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
end select
end subroutine psb_d_oacc_csr_mv_from_fmt
diff --git a/openacc/impl/psb_d_oacc_csr_vect_mv.F90 b/openacc/impl/psb_d_oacc_csr_vect_mv.F90
index 1dca2ba2..0001cc76 100644
--- a/openacc/impl/psb_d_oacc_csr_vect_mv.F90
+++ b/openacc/impl/psb_d_oacc_csr_vect_mv.F90
@@ -16,8 +16,8 @@ contains
m = a%get_nrows()
n = a%get_ncols()
- if ((n /= size(x%v)) .or. (n /= size(y%v))) then
- write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
+ if ((n /= size(x%v)) .or. (m /= size(y%v))) then
+ write(0,*) 'ocsrmv Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_
return
end if
diff --git a/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 b/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90
index c13d1edd..6c24098e 100644
--- a/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90
+++ b/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90
@@ -8,65 +8,14 @@ contains
class(psb_d_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
- integer(psb_ipk_) :: i, j, k, row, col, nz_per_row
- real(psb_dpk_) :: value
- integer(psb_ipk_), allocatable :: row_counts(:)
- integer(psb_ipk_) :: hacksize, nza
info = psb_success_
- hacksize = 1
-
- call a%set_nrows(b%get_nrows())
- call a%set_ncols(b%get_ncols())
- nz_per_row = a%nzt
-
- if (.not.allocated(a%val)) then
- allocate(a%val(a%get_nrows(), nz_per_row))
- allocate(a%ja(a%get_nrows(), nz_per_row))
- allocate(a%irn(a%get_nrows()))
- allocate(a%idiag(a%get_nrows()))
- end if
- a%val = dzero
- a%ja = -1
- a%irn = 0
- a%idiag = 0
-
- allocate(row_counts(a%get_nrows()))
- row_counts = 0
-
- nza = b%get_nzeros()
-
- !$acc parallel loop present(b, a, row_counts)
- do k = 1, nza
- row = b%ia(k)
- col = b%ja(k)
- value = b%val(k)
- if (row_counts(row) < nz_per_row) then
- a%val(row, row_counts(row) + 1) = value
- a%ja(row, row_counts(row) + 1) = col
- row_counts(row) = row_counts(row) + 1
- else
- info = psb_err_invalid_mat_state_
- !goto 9999
- end if
- end do
-
- a%irn = row_counts
-
- !$acc parallel loop present(a)
- do i = 1, a%get_nrows()
- do j = 1, nz_per_row
- if (a%ja(i, j) == i) then
- a%idiag(i) = j
- exit
- end if
- end do
- end do
-
- deallocate(row_counts)
-
- call a%set_dev()
+ call a%free_space()
+ call a%psb_d_ell_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
return
diff --git a/openacc/impl/psb_d_oacc_ell_cp_from_fmt.F90 b/openacc/impl/psb_d_oacc_ell_cp_from_fmt.F90
index b62a40d5..991681e9 100644
--- a/openacc/impl/psb_d_oacc_ell_cp_from_fmt.F90
+++ b/openacc/impl/psb_d_oacc_ell_cp_from_fmt.F90
@@ -14,10 +14,12 @@ contains
type is (psb_d_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
+ call a%free_space()
call a%psb_d_ell_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
-
- !$acc update device(a%val, a%ja, a%irn, a%idiag)
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
end select
end subroutine psb_d_oacc_ell_cp_from_fmt
diff --git a/openacc/impl/psb_d_oacc_ell_mv_from_coo.F90 b/openacc/impl/psb_d_oacc_ell_mv_from_coo.F90
index 688d182b..9214ba3f 100644
--- a/openacc/impl/psb_d_oacc_ell_mv_from_coo.F90
+++ b/openacc/impl/psb_d_oacc_ell_mv_from_coo.F90
@@ -9,11 +9,13 @@ contains
integer(psb_ipk_), intent(out) :: info
info = psb_success_
-
+
+ call a%free_space()
call a%psb_d_ell_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
-
- !$acc update device(a%val, a%ja, a%irn, a%idiag)
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
return
diff --git a/openacc/impl/psb_d_oacc_ell_mv_from_fmt.F90 b/openacc/impl/psb_d_oacc_ell_mv_from_fmt.F90
index 8bda6e6e..292165fc 100644
--- a/openacc/impl/psb_d_oacc_ell_mv_from_fmt.F90
+++ b/openacc/impl/psb_d_oacc_ell_mv_from_fmt.F90
@@ -14,10 +14,12 @@ contains
type is (psb_d_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
+ call a%free_space()
call a%psb_d_ell_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
-
- !$acc update device(a%val, a%ja, a%irn, a%idiag)
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
end select
end subroutine psb_d_oacc_ell_mv_from_fmt
diff --git a/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 b/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90
index 18bd768b..e39a29b7 100644
--- a/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90
+++ b/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90
@@ -2,78 +2,20 @@ submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_cp_from_coo_impl
use psb_base_mod
contains
module subroutine psb_d_oacc_hll_cp_from_coo(a, b, info)
- implicit none
+ implicit none
class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
- class(psb_d_coo_sparse_mat), intent(in) :: b
+ class(psb_d_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
- integer(psb_ipk_) :: i, j, k, row, col, nz_per_row
- real(psb_dpk_) :: value
- integer(psb_ipk_), allocatable :: row_counts(:)
- integer(psb_ipk_) :: hacksize, nza
-
info = psb_success_
- hacksize = 32 ! Assuming a default hack size of 32
-
- call a%set_nrows(b%get_nrows())
- call a%set_ncols(b%get_ncols())
- nz_per_row = a%nzt
-
- if (.not.allocated(a%val)) then
- allocate(a%val(nz_per_row * a%get_nrows()))
- allocate(a%ja(nz_per_row * a%get_nrows()))
- allocate(a%irn(a%get_nrows()))
- allocate(a%idiag(a%get_nrows()))
- allocate(a%hkoffs((a%get_nrows() + hacksize - 1) / hacksize))
- end if
- a%val = dzero
- a%ja = -1
- a%irn = 0
- a%idiag = 0
-
- allocate(row_counts(a%get_nrows()))
- row_counts = 0
-
- nza = b%get_nzeros()
-
- !$acc parallel loop present(b, a, row_counts)
- do k = 1, nza
- row = b%ia(k)
- col = b%ja(k)
- value = b%val(k)
- if (row_counts(row) < nz_per_row) then
- a%val(row_counts(row) + 1 + (row - 1) * nz_per_row) = value
- a%ja(row_counts(row) + 1 + (row - 1) * nz_per_row) = col
- row_counts(row) = row_counts(row) + 1
- else
- info = psb_err_invalid_mat_state_
- !goto 9999
- end if
- end do
-
- a%irn = row_counts
-
- !$acc parallel loop present(a)
- do i = 1, a%get_nrows()
- do j = 1, nz_per_row
- if (a%ja(j + (i - 1) * nz_per_row) == i) then
- a%idiag(i) = j
- exit
- end if
- end do
- end do
-
- ! Calculate hkoffs for HLL format
- !$acc parallel loop present(a)
- do i = 1, size(a%hkoffs)
- a%hkoffs(i) = (i - 1) * hacksize
- end do
-
- deallocate(row_counts)
- call a%set_dev()
+ call a%free_space()
+ call a%psb_d_hll_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
return
diff --git a/openacc/impl/psb_d_oacc_hll_cp_from_fmt.F90 b/openacc/impl/psb_d_oacc_hll_cp_from_fmt.F90
index fb99737c..a838e31e 100644
--- a/openacc/impl/psb_d_oacc_hll_cp_from_fmt.F90
+++ b/openacc/impl/psb_d_oacc_hll_cp_from_fmt.F90
@@ -14,10 +14,12 @@ contains
type is (psb_d_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
+ call a%free_space()
call a%psb_d_hll_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
-
- !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs)
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
end select
end subroutine psb_d_oacc_hll_cp_from_fmt
diff --git a/openacc/impl/psb_d_oacc_hll_mv_from_coo.F90 b/openacc/impl/psb_d_oacc_hll_mv_from_coo.F90
index 7bf22c13..29494a39 100644
--- a/openacc/impl/psb_d_oacc_hll_mv_from_coo.F90
+++ b/openacc/impl/psb_d_oacc_hll_mv_from_coo.F90
@@ -10,10 +10,12 @@ contains
info = psb_success_
+ call a%free_space()
call a%psb_d_hll_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
-
- !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs)
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
return
diff --git a/openacc/impl/psb_d_oacc_hll_mv_from_fmt.F90 b/openacc/impl/psb_d_oacc_hll_mv_from_fmt.F90
index e6615365..e03e2f30 100644
--- a/openacc/impl/psb_d_oacc_hll_mv_from_fmt.F90
+++ b/openacc/impl/psb_d_oacc_hll_mv_from_fmt.F90
@@ -14,11 +14,12 @@ contains
type is (psb_d_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
+ call a%free_space()
call a%psb_d_hll_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
-
- !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs)
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
end select
-
end subroutine psb_d_oacc_hll_mv_from_fmt
end submodule psb_d_oacc_hll_mv_from_fmt_impl
diff --git a/openacc/impl/psb_d_oacc_hll_vect_mv.F90 b/openacc/impl/psb_d_oacc_hll_vect_mv.F90
index 875b646f..e7c47b7a 100644
--- a/openacc/impl/psb_d_oacc_hll_vect_mv.F90
+++ b/openacc/impl/psb_d_oacc_hll_vect_mv.F90
@@ -40,28 +40,28 @@ contains
real(psb_dpk_) :: val(:), x(:), y(:)
integer(psb_ipk_) :: ja(:), hkoffs(:)
integer(psb_ipk_), intent(out) :: info
- integer(psb_ipk_) :: i, j, idx, k
+ integer(psb_ipk_) :: i, j, idx, k, ipnt,ir,nr,nlc,isz,ii
real(psb_dpk_) :: tmp
info = 0
-
- !$acc parallel loop present(val, ja, hkoffs, x, y)
+ !$acc parallel loop private(nlc, isz,ir,nr)
do i = 1, nhacks
- do k = 0, hksz - 1
- idx = hkoffs(i) + k
- if (idx <= hkoffs(i + 1) - 1) then
- tmp = 0.0_psb_dpk_
- !$acc loop seq
- do j = hkoffs(i) + k, hkoffs(i + 1) - 1, hksz
- if (ja(j) > 0) then
- tmp = tmp + val(j) * x(ja(j))
- end if
- end do
- y(k + 1 + (i - 1) * hksz) = alpha * tmp + beta * y(k + 1 + (i - 1) * hksz)
- end if
+ isz = hkoffs(i + 1) - hkoffs(i)
+ nlc = isz/hksz
+ ir = (i-1)*hksz
+ nr = min(hksz,m-ir)
+ !$acc loop independent private(tmp,ii,ipnt)
+ do ii = 1, nr
+ ipnt = hkoffs(i) + ii
+ tmp = dzero
+ !$acc loop seq
+ do j = 1, nlc
+ tmp = tmp + val(ipnt) * x(ja(ipnt))
+ ipnt = ipnt + hksz
+ end do
+ y(ii+ir) = alpha * tmp + beta * y(ii+ir)
end do
end do
end subroutine inner_spmv
-
end subroutine psb_d_oacc_hll_vect_mv
end submodule psb_d_oacc_hll_vect_mv_impl
diff --git a/openacc/impl/psb_s_oacc_csr_cp_from_coo.F90 b/openacc/impl/psb_s_oacc_csr_cp_from_coo.F90
index 94ef67b3..3c2fb1cb 100644
--- a/openacc/impl/psb_s_oacc_csr_cp_from_coo.F90
+++ b/openacc/impl/psb_s_oacc_csr_cp_from_coo.F90
@@ -10,11 +10,12 @@ contains
info = psb_success_
+ call a%free_space()
call a%psb_s_csr_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
-
- call a%set_dev()
- if (info /= 0) goto 9999
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
return
diff --git a/openacc/impl/psb_s_oacc_csr_cp_from_fmt.F90 b/openacc/impl/psb_s_oacc_csr_cp_from_fmt.F90
index 2c64b5fe..e47959f8 100644
--- a/openacc/impl/psb_s_oacc_csr_cp_from_fmt.F90
+++ b/openacc/impl/psb_s_oacc_csr_cp_from_fmt.F90
@@ -14,10 +14,12 @@ contains
type is (psb_s_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
+ call a%free_space()
call a%psb_s_csr_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
-
- !$acc update device(a%val, a%ja, a%irp)
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
end select
end subroutine psb_s_oacc_csr_cp_from_fmt
diff --git a/openacc/impl/psb_s_oacc_csr_mv_from_coo.F90 b/openacc/impl/psb_s_oacc_csr_mv_from_coo.F90
index e531d309..fdbf3a0c 100644
--- a/openacc/impl/psb_s_oacc_csr_mv_from_coo.F90
+++ b/openacc/impl/psb_s_oacc_csr_mv_from_coo.F90
@@ -10,10 +10,12 @@ contains
info = psb_success_
+ call a%free_space()
call a%psb_s_csr_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
-
- !$acc update device(a%val, a%ja, a%irp)
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
return
diff --git a/openacc/impl/psb_s_oacc_csr_mv_from_fmt.F90 b/openacc/impl/psb_s_oacc_csr_mv_from_fmt.F90
index a9dc0c70..a7a581b8 100644
--- a/openacc/impl/psb_s_oacc_csr_mv_from_fmt.F90
+++ b/openacc/impl/psb_s_oacc_csr_mv_from_fmt.F90
@@ -14,10 +14,12 @@ contains
type is (psb_s_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
+ call a%free_space()
call a%psb_s_csr_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
-
- !$acc update device(a%val, a%ja, a%irp)
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
end select
end subroutine psb_s_oacc_csr_mv_from_fmt
diff --git a/openacc/impl/psb_s_oacc_csr_vect_mv.F90 b/openacc/impl/psb_s_oacc_csr_vect_mv.F90
index c2bbd6b1..13ee1651 100644
--- a/openacc/impl/psb_s_oacc_csr_vect_mv.F90
+++ b/openacc/impl/psb_s_oacc_csr_vect_mv.F90
@@ -16,8 +16,8 @@ contains
m = a%get_nrows()
n = a%get_ncols()
- if ((n /= size(x%v)) .or. (n /= size(y%v))) then
- write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
+ if ((n /= size(x%v)) .or. (m /= size(y%v))) then
+ write(0,*) 'ocsrmv Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_
return
end if
diff --git a/openacc/impl/psb_s_oacc_ell_cp_from_coo.F90 b/openacc/impl/psb_s_oacc_ell_cp_from_coo.F90
index 9aaaff73..0f6fbc48 100644
--- a/openacc/impl/psb_s_oacc_ell_cp_from_coo.F90
+++ b/openacc/impl/psb_s_oacc_ell_cp_from_coo.F90
@@ -8,65 +8,14 @@ contains
class(psb_s_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
- integer(psb_ipk_) :: i, j, k, row, col, nz_per_row
- real(psb_spk_) :: value
- integer(psb_ipk_), allocatable :: row_counts(:)
- integer(psb_ipk_) :: hacksize, nza
info = psb_success_
- hacksize = 1
-
- call a%set_nrows(b%get_nrows())
- call a%set_ncols(b%get_ncols())
- nz_per_row = a%nzt
-
- if (.not.allocated(a%val)) then
- allocate(a%val(a%get_nrows(), nz_per_row))
- allocate(a%ja(a%get_nrows(), nz_per_row))
- allocate(a%irn(a%get_nrows()))
- allocate(a%idiag(a%get_nrows()))
- end if
- a%val = szero
- a%ja = -1
- a%irn = 0
- a%idiag = 0
-
- allocate(row_counts(a%get_nrows()))
- row_counts = 0
-
- nza = b%get_nzeros()
-
- !$acc parallel loop present(b, a, row_counts)
- do k = 1, nza
- row = b%ia(k)
- col = b%ja(k)
- value = b%val(k)
- if (row_counts(row) < nz_per_row) then
- a%val(row, row_counts(row) + 1) = value
- a%ja(row, row_counts(row) + 1) = col
- row_counts(row) = row_counts(row) + 1
- else
- info = psb_err_invalid_mat_state_
- !goto 9999
- end if
- end do
-
- a%irn = row_counts
-
- !$acc parallel loop present(a)
- do i = 1, a%get_nrows()
- do j = 1, nz_per_row
- if (a%ja(i, j) == i) then
- a%idiag(i) = j
- exit
- end if
- end do
- end do
-
- deallocate(row_counts)
-
- call a%set_dev()
+ call a%free_space()
+ call a%psb_s_ell_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
return
diff --git a/openacc/impl/psb_s_oacc_ell_cp_from_fmt.F90 b/openacc/impl/psb_s_oacc_ell_cp_from_fmt.F90
index d4c1a233..793c2779 100644
--- a/openacc/impl/psb_s_oacc_ell_cp_from_fmt.F90
+++ b/openacc/impl/psb_s_oacc_ell_cp_from_fmt.F90
@@ -14,10 +14,12 @@ contains
type is (psb_s_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
+ call a%free_space()
call a%psb_s_ell_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
-
- !$acc update device(a%val, a%ja, a%irn, a%idiag)
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
end select
end subroutine psb_s_oacc_ell_cp_from_fmt
diff --git a/openacc/impl/psb_s_oacc_ell_mv_from_coo.F90 b/openacc/impl/psb_s_oacc_ell_mv_from_coo.F90
index d6bbec13..ba82049f 100644
--- a/openacc/impl/psb_s_oacc_ell_mv_from_coo.F90
+++ b/openacc/impl/psb_s_oacc_ell_mv_from_coo.F90
@@ -9,11 +9,13 @@ contains
integer(psb_ipk_), intent(out) :: info
info = psb_success_
-
+
+ call a%free_space()
call a%psb_s_ell_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
-
- !$acc update device(a%val, a%ja, a%irn, a%idiag)
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
return
diff --git a/openacc/impl/psb_s_oacc_ell_mv_from_fmt.F90 b/openacc/impl/psb_s_oacc_ell_mv_from_fmt.F90
index ebb82901..df789664 100644
--- a/openacc/impl/psb_s_oacc_ell_mv_from_fmt.F90
+++ b/openacc/impl/psb_s_oacc_ell_mv_from_fmt.F90
@@ -14,10 +14,12 @@ contains
type is (psb_s_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
+ call a%free_space()
call a%psb_s_ell_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
-
- !$acc update device(a%val, a%ja, a%irn, a%idiag)
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
end select
end subroutine psb_s_oacc_ell_mv_from_fmt
diff --git a/openacc/impl/psb_s_oacc_hll_cp_from_coo.F90 b/openacc/impl/psb_s_oacc_hll_cp_from_coo.F90
index 34a0b5d5..dfba3c6c 100644
--- a/openacc/impl/psb_s_oacc_hll_cp_from_coo.F90
+++ b/openacc/impl/psb_s_oacc_hll_cp_from_coo.F90
@@ -2,78 +2,20 @@ submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_cp_from_coo_impl
use psb_base_mod
contains
module subroutine psb_s_oacc_hll_cp_from_coo(a, b, info)
- implicit none
+ implicit none
class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a
- class(psb_s_coo_sparse_mat), intent(in) :: b
+ class(psb_s_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
- integer(psb_ipk_) :: i, j, k, row, col, nz_per_row
- real(psb_spk_) :: value
- integer(psb_ipk_), allocatable :: row_counts(:)
- integer(psb_ipk_) :: hacksize, nza
-
info = psb_success_
- hacksize = 32 ! Assuming a default hack size of 32
-
- call a%set_nrows(b%get_nrows())
- call a%set_ncols(b%get_ncols())
- nz_per_row = a%nzt
-
- if (.not.allocated(a%val)) then
- allocate(a%val(nz_per_row * a%get_nrows()))
- allocate(a%ja(nz_per_row * a%get_nrows()))
- allocate(a%irn(a%get_nrows()))
- allocate(a%idiag(a%get_nrows()))
- allocate(a%hkoffs((a%get_nrows() + hacksize - 1) / hacksize))
- end if
- a%val = szero
- a%ja = -1
- a%irn = 0
- a%idiag = 0
-
- allocate(row_counts(a%get_nrows()))
- row_counts = 0
-
- nza = b%get_nzeros()
-
- !$acc parallel loop present(b, a, row_counts)
- do k = 1, nza
- row = b%ia(k)
- col = b%ja(k)
- value = b%val(k)
- if (row_counts(row) < nz_per_row) then
- a%val(row_counts(row) + 1 + (row - 1) * nz_per_row) = value
- a%ja(row_counts(row) + 1 + (row - 1) * nz_per_row) = col
- row_counts(row) = row_counts(row) + 1
- else
- info = psb_err_invalid_mat_state_
- !goto 9999
- end if
- end do
-
- a%irn = row_counts
-
- !$acc parallel loop present(a)
- do i = 1, a%get_nrows()
- do j = 1, nz_per_row
- if (a%ja(j + (i - 1) * nz_per_row) == i) then
- a%idiag(i) = j
- exit
- end if
- end do
- end do
-
- ! Calculate hkoffs for HLL format
- !$acc parallel loop present(a)
- do i = 1, size(a%hkoffs)
- a%hkoffs(i) = (i - 1) * hacksize
- end do
-
- deallocate(row_counts)
- call a%set_dev()
+ call a%free_space()
+ call a%psb_s_hll_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
return
diff --git a/openacc/impl/psb_s_oacc_hll_cp_from_fmt.F90 b/openacc/impl/psb_s_oacc_hll_cp_from_fmt.F90
index 4d023f8b..849e03b7 100644
--- a/openacc/impl/psb_s_oacc_hll_cp_from_fmt.F90
+++ b/openacc/impl/psb_s_oacc_hll_cp_from_fmt.F90
@@ -14,10 +14,12 @@ contains
type is (psb_s_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
+ call a%free_space()
call a%psb_s_hll_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
-
- !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs)
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
end select
end subroutine psb_s_oacc_hll_cp_from_fmt
diff --git a/openacc/impl/psb_s_oacc_hll_mv_from_coo.F90 b/openacc/impl/psb_s_oacc_hll_mv_from_coo.F90
index 08b553b7..c22818fa 100644
--- a/openacc/impl/psb_s_oacc_hll_mv_from_coo.F90
+++ b/openacc/impl/psb_s_oacc_hll_mv_from_coo.F90
@@ -10,10 +10,12 @@ contains
info = psb_success_
+ call a%free_space()
call a%psb_s_hll_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
-
- !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs)
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
return
diff --git a/openacc/impl/psb_s_oacc_hll_mv_from_fmt.F90 b/openacc/impl/psb_s_oacc_hll_mv_from_fmt.F90
index d5867289..992b1c7b 100644
--- a/openacc/impl/psb_s_oacc_hll_mv_from_fmt.F90
+++ b/openacc/impl/psb_s_oacc_hll_mv_from_fmt.F90
@@ -14,11 +14,12 @@ contains
type is (psb_s_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
+ call a%free_space()
call a%psb_s_hll_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
-
- !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs)
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
end select
-
end subroutine psb_s_oacc_hll_mv_from_fmt
end submodule psb_s_oacc_hll_mv_from_fmt_impl
diff --git a/openacc/impl/psb_s_oacc_hll_vect_mv.F90 b/openacc/impl/psb_s_oacc_hll_vect_mv.F90
index 9d9e9197..efe9a9ca 100644
--- a/openacc/impl/psb_s_oacc_hll_vect_mv.F90
+++ b/openacc/impl/psb_s_oacc_hll_vect_mv.F90
@@ -40,28 +40,28 @@ contains
real(psb_spk_) :: val(:), x(:), y(:)
integer(psb_ipk_) :: ja(:), hkoffs(:)
integer(psb_ipk_), intent(out) :: info
- integer(psb_ipk_) :: i, j, idx, k
+ integer(psb_ipk_) :: i, j, idx, k, ipnt,ir,nr,nlc,isz,ii
real(psb_spk_) :: tmp
info = 0
-
- !$acc parallel loop present(val, ja, hkoffs, x, y)
+ !$acc parallel loop private(nlc, isz,ir,nr)
do i = 1, nhacks
- do k = 0, hksz - 1
- idx = hkoffs(i) + k
- if (idx <= hkoffs(i + 1) - 1) then
- tmp = 0.0_psb_dpk_
- !$acc loop seq
- do j = hkoffs(i) + k, hkoffs(i + 1) - 1, hksz
- if (ja(j) > 0) then
- tmp = tmp + val(j) * x(ja(j))
- end if
- end do
- y(k + 1 + (i - 1) * hksz) = alpha * tmp + beta * y(k + 1 + (i - 1) * hksz)
- end if
+ isz = hkoffs(i + 1) - hkoffs(i)
+ nlc = isz/hksz
+ ir = (i-1)*hksz
+ nr = min(hksz,m-ir)
+ !$acc loop independent private(tmp,ii,ipnt)
+ do ii = 1, nr
+ ipnt = hkoffs(i) + ii
+ tmp = szero
+ !$acc loop seq
+ do j = 1, nlc
+ tmp = tmp + val(ipnt) * x(ja(ipnt))
+ ipnt = ipnt + hksz
+ end do
+ y(ii+ir) = alpha * tmp + beta * y(ii+ir)
end do
end do
end subroutine inner_spmv
-
end subroutine psb_s_oacc_hll_vect_mv
end submodule psb_s_oacc_hll_vect_mv_impl
diff --git a/openacc/impl/psb_z_oacc_csr_cp_from_coo.F90 b/openacc/impl/psb_z_oacc_csr_cp_from_coo.F90
index 0485c9ca..6c40d0d2 100644
--- a/openacc/impl/psb_z_oacc_csr_cp_from_coo.F90
+++ b/openacc/impl/psb_z_oacc_csr_cp_from_coo.F90
@@ -10,11 +10,12 @@ contains
info = psb_success_
+ call a%free_space()
call a%psb_z_csr_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
-
- call a%set_dev()
- if (info /= 0) goto 9999
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
return
diff --git a/openacc/impl/psb_z_oacc_csr_cp_from_fmt.F90 b/openacc/impl/psb_z_oacc_csr_cp_from_fmt.F90
index f2c68816..3025fde2 100644
--- a/openacc/impl/psb_z_oacc_csr_cp_from_fmt.F90
+++ b/openacc/impl/psb_z_oacc_csr_cp_from_fmt.F90
@@ -14,10 +14,12 @@ contains
type is (psb_z_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
+ call a%free_space()
call a%psb_z_csr_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
-
- !$acc update device(a%val, a%ja, a%irp)
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
end select
end subroutine psb_z_oacc_csr_cp_from_fmt
diff --git a/openacc/impl/psb_z_oacc_csr_mv_from_coo.F90 b/openacc/impl/psb_z_oacc_csr_mv_from_coo.F90
index 44b01b68..6dae625a 100644
--- a/openacc/impl/psb_z_oacc_csr_mv_from_coo.F90
+++ b/openacc/impl/psb_z_oacc_csr_mv_from_coo.F90
@@ -10,10 +10,12 @@ contains
info = psb_success_
+ call a%free_space()
call a%psb_z_csr_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
-
- !$acc update device(a%val, a%ja, a%irp)
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
return
diff --git a/openacc/impl/psb_z_oacc_csr_mv_from_fmt.F90 b/openacc/impl/psb_z_oacc_csr_mv_from_fmt.F90
index bf777e85..1d7dd723 100644
--- a/openacc/impl/psb_z_oacc_csr_mv_from_fmt.F90
+++ b/openacc/impl/psb_z_oacc_csr_mv_from_fmt.F90
@@ -14,10 +14,12 @@ contains
type is (psb_z_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
+ call a%free_space()
call a%psb_z_csr_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
-
- !$acc update device(a%val, a%ja, a%irp)
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
end select
end subroutine psb_z_oacc_csr_mv_from_fmt
diff --git a/openacc/impl/psb_z_oacc_csr_vect_mv.F90 b/openacc/impl/psb_z_oacc_csr_vect_mv.F90
index b8da5c8f..cb34dce1 100644
--- a/openacc/impl/psb_z_oacc_csr_vect_mv.F90
+++ b/openacc/impl/psb_z_oacc_csr_vect_mv.F90
@@ -16,8 +16,8 @@ contains
m = a%get_nrows()
n = a%get_ncols()
- if ((n /= size(x%v)) .or. (n /= size(y%v))) then
- write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
+ if ((n /= size(x%v)) .or. (m /= size(y%v))) then
+ write(0,*) 'ocsrmv Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_
return
end if
diff --git a/openacc/impl/psb_z_oacc_ell_cp_from_coo.F90 b/openacc/impl/psb_z_oacc_ell_cp_from_coo.F90
index e4d3b731..6dd60bd7 100644
--- a/openacc/impl/psb_z_oacc_ell_cp_from_coo.F90
+++ b/openacc/impl/psb_z_oacc_ell_cp_from_coo.F90
@@ -8,65 +8,14 @@ contains
class(psb_z_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
- integer(psb_ipk_) :: i, j, k, row, col, nz_per_row
- complex(psb_dpk_) :: value
- integer(psb_ipk_), allocatable :: row_counts(:)
- integer(psb_ipk_) :: hacksize, nza
info = psb_success_
- hacksize = 1
-
- call a%set_nrows(b%get_nrows())
- call a%set_ncols(b%get_ncols())
- nz_per_row = a%nzt
-
- if (.not.allocated(a%val)) then
- allocate(a%val(a%get_nrows(), nz_per_row))
- allocate(a%ja(a%get_nrows(), nz_per_row))
- allocate(a%irn(a%get_nrows()))
- allocate(a%idiag(a%get_nrows()))
- end if
- a%val = zzero
- a%ja = -1
- a%irn = 0
- a%idiag = 0
-
- allocate(row_counts(a%get_nrows()))
- row_counts = 0
-
- nza = b%get_nzeros()
-
- !$acc parallel loop present(b, a, row_counts)
- do k = 1, nza
- row = b%ia(k)
- col = b%ja(k)
- value = b%val(k)
- if (row_counts(row) < nz_per_row) then
- a%val(row, row_counts(row) + 1) = value
- a%ja(row, row_counts(row) + 1) = col
- row_counts(row) = row_counts(row) + 1
- else
- info = psb_err_invalid_mat_state_
- !goto 9999
- end if
- end do
-
- a%irn = row_counts
-
- !$acc parallel loop present(a)
- do i = 1, a%get_nrows()
- do j = 1, nz_per_row
- if (a%ja(i, j) == i) then
- a%idiag(i) = j
- exit
- end if
- end do
- end do
-
- deallocate(row_counts)
-
- call a%set_dev()
+ call a%free_space()
+ call a%psb_z_ell_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
return
diff --git a/openacc/impl/psb_z_oacc_ell_cp_from_fmt.F90 b/openacc/impl/psb_z_oacc_ell_cp_from_fmt.F90
index 98404ae2..60d94bb2 100644
--- a/openacc/impl/psb_z_oacc_ell_cp_from_fmt.F90
+++ b/openacc/impl/psb_z_oacc_ell_cp_from_fmt.F90
@@ -14,10 +14,12 @@ contains
type is (psb_z_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
+ call a%free_space()
call a%psb_z_ell_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
-
- !$acc update device(a%val, a%ja, a%irn, a%idiag)
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
end select
end subroutine psb_z_oacc_ell_cp_from_fmt
diff --git a/openacc/impl/psb_z_oacc_ell_mv_from_coo.F90 b/openacc/impl/psb_z_oacc_ell_mv_from_coo.F90
index 26388e5e..db70b944 100644
--- a/openacc/impl/psb_z_oacc_ell_mv_from_coo.F90
+++ b/openacc/impl/psb_z_oacc_ell_mv_from_coo.F90
@@ -9,11 +9,13 @@ contains
integer(psb_ipk_), intent(out) :: info
info = psb_success_
-
+
+ call a%free_space()
call a%psb_z_ell_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
-
- !$acc update device(a%val, a%ja, a%irn, a%idiag)
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
return
diff --git a/openacc/impl/psb_z_oacc_ell_mv_from_fmt.F90 b/openacc/impl/psb_z_oacc_ell_mv_from_fmt.F90
index e0f75828..f99b3817 100644
--- a/openacc/impl/psb_z_oacc_ell_mv_from_fmt.F90
+++ b/openacc/impl/psb_z_oacc_ell_mv_from_fmt.F90
@@ -14,10 +14,12 @@ contains
type is (psb_z_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
+ call a%free_space()
call a%psb_z_ell_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
-
- !$acc update device(a%val, a%ja, a%irn, a%idiag)
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
end select
end subroutine psb_z_oacc_ell_mv_from_fmt
diff --git a/openacc/impl/psb_z_oacc_hll_cp_from_coo.F90 b/openacc/impl/psb_z_oacc_hll_cp_from_coo.F90
index 62be2252..e018e762 100644
--- a/openacc/impl/psb_z_oacc_hll_cp_from_coo.F90
+++ b/openacc/impl/psb_z_oacc_hll_cp_from_coo.F90
@@ -2,78 +2,20 @@ submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_cp_from_coo_impl
use psb_base_mod
contains
module subroutine psb_z_oacc_hll_cp_from_coo(a, b, info)
- implicit none
+ implicit none
class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a
- class(psb_z_coo_sparse_mat), intent(in) :: b
+ class(psb_z_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
- integer(psb_ipk_) :: i, j, k, row, col, nz_per_row
- complex(psb_dpk_) :: value
- integer(psb_ipk_), allocatable :: row_counts(:)
- integer(psb_ipk_) :: hacksize, nza
-
info = psb_success_
- hacksize = 32 ! Assuming a default hack size of 32
-
- call a%set_nrows(b%get_nrows())
- call a%set_ncols(b%get_ncols())
- nz_per_row = a%nzt
-
- if (.not.allocated(a%val)) then
- allocate(a%val(nz_per_row * a%get_nrows()))
- allocate(a%ja(nz_per_row * a%get_nrows()))
- allocate(a%irn(a%get_nrows()))
- allocate(a%idiag(a%get_nrows()))
- allocate(a%hkoffs((a%get_nrows() + hacksize - 1) / hacksize))
- end if
- a%val = zzero
- a%ja = -1
- a%irn = 0
- a%idiag = 0
-
- allocate(row_counts(a%get_nrows()))
- row_counts = 0
-
- nza = b%get_nzeros()
-
- !$acc parallel loop present(b, a, row_counts)
- do k = 1, nza
- row = b%ia(k)
- col = b%ja(k)
- value = b%val(k)
- if (row_counts(row) < nz_per_row) then
- a%val(row_counts(row) + 1 + (row - 1) * nz_per_row) = value
- a%ja(row_counts(row) + 1 + (row - 1) * nz_per_row) = col
- row_counts(row) = row_counts(row) + 1
- else
- info = psb_err_invalid_mat_state_
- !goto 9999
- end if
- end do
-
- a%irn = row_counts
-
- !$acc parallel loop present(a)
- do i = 1, a%get_nrows()
- do j = 1, nz_per_row
- if (a%ja(j + (i - 1) * nz_per_row) == i) then
- a%idiag(i) = j
- exit
- end if
- end do
- end do
-
- ! Calculate hkoffs for HLL format
- !$acc parallel loop present(a)
- do i = 1, size(a%hkoffs)
- a%hkoffs(i) = (i - 1) * hacksize
- end do
-
- deallocate(row_counts)
- call a%set_dev()
+ call a%free_space()
+ call a%psb_z_hll_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
return
diff --git a/openacc/impl/psb_z_oacc_hll_cp_from_fmt.F90 b/openacc/impl/psb_z_oacc_hll_cp_from_fmt.F90
index f267e1c6..7b18b255 100644
--- a/openacc/impl/psb_z_oacc_hll_cp_from_fmt.F90
+++ b/openacc/impl/psb_z_oacc_hll_cp_from_fmt.F90
@@ -14,10 +14,12 @@ contains
type is (psb_z_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
+ call a%free_space()
call a%psb_z_hll_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
-
- !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs)
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
end select
end subroutine psb_z_oacc_hll_cp_from_fmt
diff --git a/openacc/impl/psb_z_oacc_hll_mv_from_coo.F90 b/openacc/impl/psb_z_oacc_hll_mv_from_coo.F90
index 2ff574d3..267f13f3 100644
--- a/openacc/impl/psb_z_oacc_hll_mv_from_coo.F90
+++ b/openacc/impl/psb_z_oacc_hll_mv_from_coo.F90
@@ -10,10 +10,12 @@ contains
info = psb_success_
+ call a%free_space()
call a%psb_z_hll_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
-
- !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs)
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
return
diff --git a/openacc/impl/psb_z_oacc_hll_mv_from_fmt.F90 b/openacc/impl/psb_z_oacc_hll_mv_from_fmt.F90
index 5fa00e38..151dc6ce 100644
--- a/openacc/impl/psb_z_oacc_hll_mv_from_fmt.F90
+++ b/openacc/impl/psb_z_oacc_hll_mv_from_fmt.F90
@@ -14,11 +14,12 @@ contains
type is (psb_z_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
+ call a%free_space()
call a%psb_z_hll_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
-
- !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs)
+ call a%sync_space()
+ call a%set_host()
+ call a%sync()
end select
-
end subroutine psb_z_oacc_hll_mv_from_fmt
end submodule psb_z_oacc_hll_mv_from_fmt_impl
diff --git a/openacc/impl/psb_z_oacc_hll_vect_mv.F90 b/openacc/impl/psb_z_oacc_hll_vect_mv.F90
index 89d970c0..dbadf034 100644
--- a/openacc/impl/psb_z_oacc_hll_vect_mv.F90
+++ b/openacc/impl/psb_z_oacc_hll_vect_mv.F90
@@ -40,28 +40,28 @@ contains
complex(psb_dpk_) :: val(:), x(:), y(:)
integer(psb_ipk_) :: ja(:), hkoffs(:)
integer(psb_ipk_), intent(out) :: info
- integer(psb_ipk_) :: i, j, idx, k
+ integer(psb_ipk_) :: i, j, idx, k, ipnt,ir,nr,nlc,isz,ii
complex(psb_dpk_) :: tmp
info = 0
-
- !$acc parallel loop present(val, ja, hkoffs, x, y)
+ !$acc parallel loop private(nlc, isz,ir,nr)
do i = 1, nhacks
- do k = 0, hksz - 1
- idx = hkoffs(i) + k
- if (idx <= hkoffs(i + 1) - 1) then
- tmp = 0.0_psb_dpk_
- !$acc loop seq
- do j = hkoffs(i) + k, hkoffs(i + 1) - 1, hksz
- if (ja(j) > 0) then
- tmp = tmp + val(j) * x(ja(j))
- end if
- end do
- y(k + 1 + (i - 1) * hksz) = alpha * tmp + beta * y(k + 1 + (i - 1) * hksz)
- end if
+ isz = hkoffs(i + 1) - hkoffs(i)
+ nlc = isz/hksz
+ ir = (i-1)*hksz
+ nr = min(hksz,m-ir)
+ !$acc loop independent private(tmp,ii,ipnt)
+ do ii = 1, nr
+ ipnt = hkoffs(i) + ii
+ tmp = zzero
+ !$acc loop seq
+ do j = 1, nlc
+ tmp = tmp + val(ipnt) * x(ja(ipnt))
+ ipnt = ipnt + hksz
+ end do
+ y(ii+ir) = alpha * tmp + beta * y(ii+ir)
end do
end do
end subroutine inner_spmv
-
end subroutine psb_z_oacc_hll_vect_mv
end submodule psb_z_oacc_hll_vect_mv_impl
diff --git a/openacc/psb_c_oacc_csr_mat_mod.F90 b/openacc/psb_c_oacc_csr_mat_mod.F90
index 00e79570..07734762 100644
--- a/openacc/psb_c_oacc_csr_mat_mod.F90
+++ b/openacc/psb_c_oacc_csr_mat_mod.F90
@@ -1,6 +1,7 @@
module psb_c_oacc_csr_mat_mod
use iso_c_binding
+ use openacc
use psb_c_mat_mod
use psb_c_oacc_vect_mod
!use oaccsparse_mod
@@ -35,6 +36,7 @@ module psb_c_oacc_csr_mat_mod
procedure, pass(a) :: set_host => c_oacc_csr_set_host
procedure, pass(a) :: set_sync => c_oacc_csr_set_sync
procedure, pass(a) :: set_dev => c_oacc_csr_set_dev
+ procedure, pass(a) :: free_space => c_oacc_csr_free_space
procedure, pass(a) :: sync_space => c_oacc_csr_sync_space
procedure, pass(a) :: sync => c_oacc_csr_sync
end type psb_c_oacc_csr_sparse_mat
@@ -154,22 +156,26 @@ module psb_c_oacc_csr_mat_mod
contains
- subroutine c_oacc_csr_free(a)
+ subroutine c_oacc_csr_free_space(a)
use psb_base_mod
implicit none
class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
- if (allocated(a%val)) then
- !$acc exit data delete(a%val)
- end if
- if (allocated(a%ja)) then
- !$acc exit data delete(a%ja)
- end if
- if (allocated(a%irp)) then
- !$acc exit data delete(a%irp)
- end if
+ if (allocated(a%val)) call acc_delete_finalize(a%val)
+ if (allocated(a%ja)) call acc_delete_finalize(a%ja)
+ if (allocated(a%irp)) call acc_delete_finalize(a%irp)
+
+ return
+ end subroutine c_oacc_csr_free_space
+
+ subroutine c_oacc_csr_free(a)
+ use psb_base_mod
+ implicit none
+ class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_) :: info
+ call a%free_space()
call a%psb_c_csr_sparse_mat%free()
return
@@ -193,7 +199,7 @@ contains
function c_oacc_csr_get_fmt() result(res)
implicit none
character(len=5) :: res
- res = 'CSR_oacc'
+ res = 'CSROA'
end function c_oacc_csr_get_fmt
subroutine c_oacc_csr_all(m, n, nz, a, info)
@@ -202,19 +208,8 @@ contains
class(psb_c_oacc_csr_sparse_mat), intent(out) :: a
integer(psb_ipk_), intent(out) :: info
- info = 0
- if (allocated(a%val)) then
- !$acc exit data delete(a%val) finalize
- deallocate(a%val, stat=info)
- end if
- if (allocated(a%ja)) then
- !$acc exit data delete(a%ja) finalize
- deallocate(a%ja, stat=info)
- end if
- if (allocated(a%irp)) then
- !$acc exit data delete(a%irp) finalize
- deallocate(a%irp, stat=info)
- end if
+ info = 0
+ call a%free()
call a%set_nrows(m)
call a%set_ncols(n)
@@ -274,26 +269,9 @@ contains
subroutine c_oacc_csr_sync_space(a)
implicit none
class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
- if (allocated(a%val)) then
- call c_oacc_create_dev(a%val)
- end if
- if (allocated(a%ja)) then
- call i_oacc_create_dev(a%ja)
- end if
- if (allocated(a%irp)) then
- call i_oacc_create_dev(a%irp)
- end if
- contains
- subroutine c_oacc_create_dev(v)
- implicit none
- complex(psb_spk_), intent(in) :: v(:)
- !$acc enter data copyin(v)
- end subroutine c_oacc_create_dev
- subroutine i_oacc_create_dev(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc enter data copyin(v)
- end subroutine i_oacc_create_dev
+ if (allocated(a%val)) call acc_create(a%val)
+ if (allocated(a%ja)) call acc_create(a%ja)
+ if (allocated(a%irp)) call acc_create(a%irp)
end subroutine c_oacc_csr_sync_space
subroutine c_oacc_csr_sync(a)
@@ -304,40 +282,16 @@ contains
tmpa => a
if (a%is_dev()) then
- call c_oacc_csr_to_host(a%val)
- call i_oacc_csr_to_host(a%ja)
- call i_oacc_csr_to_host(a%irp)
+ call acc_update_self(a%val)
+ call acc_update_self(a%ja)
+ call acc_update_self(a%irp)
else if (a%is_host()) then
- call c_oacc_csr_to_dev(a%val)
- call i_oacc_csr_to_dev(a%ja)
- call i_oacc_csr_to_dev(a%irp)
+ call acc_update_device(a%val)
+ call acc_update_device(a%ja)
+ call acc_update_device(a%irp)
end if
call tmpa%set_sync()
end subroutine c_oacc_csr_sync
- subroutine c_oacc_csr_to_dev(v)
- implicit none
- complex(psb_spk_), intent(in) :: v(:)
- !$acc update device(v)
- end subroutine c_oacc_csr_to_dev
-
- subroutine c_oacc_csr_to_host(v)
- implicit none
- complex(psb_spk_), intent(in) :: v(:)
- !$acc update self(v)
- end subroutine c_oacc_csr_to_host
-
- subroutine i_oacc_csr_to_dev(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc update device(v)
- end subroutine i_oacc_csr_to_dev
-
- subroutine i_oacc_csr_to_host(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc update self(v)
- end subroutine i_oacc_csr_to_host
-
end module psb_c_oacc_csr_mat_mod
diff --git a/openacc/psb_c_oacc_ell_mat_mod.F90 b/openacc/psb_c_oacc_ell_mat_mod.F90
index 5e5dc302..d23d4d0a 100644
--- a/openacc/psb_c_oacc_ell_mat_mod.F90
+++ b/openacc/psb_c_oacc_ell_mat_mod.F90
@@ -1,5 +1,6 @@
module psb_c_oacc_ell_mat_mod
use iso_c_binding
+ use openacc
use psb_c_mat_mod
use psb_c_ell_mat_mod
use psb_c_oacc_vect_mod
@@ -21,6 +22,7 @@ module psb_c_oacc_ell_mat_mod
procedure, pass(a) :: set_dev => c_oacc_ell_set_dev
procedure, pass(a) :: sync_space => c_oacc_ell_sync_space
procedure, pass(a) :: sync => c_oacc_ell_sync
+ procedure, pass(a) :: free_space => c_oacc_ell_free_space
procedure, pass(a) :: free => c_oacc_ell_free
procedure, pass(a) :: vect_mv => psb_c_oacc_ell_vect_mv
procedure, pass(a) :: in_vect_sv => psb_c_oacc_ell_inner_vect_sv
@@ -152,31 +154,32 @@ module psb_c_oacc_ell_mat_mod
contains
- subroutine c_oacc_ell_free(a)
+ subroutine c_oacc_ell_free_space(a)
use psb_base_mod
implicit none
class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
- if (allocated(a%val)) then
- !$acc exit data delete(a%val)
- end if
- if (allocated(a%ja)) then
- !$acc exit data delete(a%ja)
- end if
- if (allocated(a%irn)) then
- !$acc exit data delete(a%irn)
- end if
- if (allocated(a%idiag)) then
- !$acc exit data delete(a%idiag)
- end if
+ if (allocated(a%val)) call acc_delete_finalize(a%val)
+ if (allocated(a%ja)) call acc_delete_finalize(a%ja)
+ if (allocated(a%irn)) call acc_delete_finalize(a%irn)
+ if (allocated(a%idiag)) call acc_delete_finalize(a%idiag)
+
+ return
+ end subroutine c_oacc_ell_free_space
+
+ subroutine c_oacc_ell_free(a)
+ use psb_base_mod
+ implicit none
+ class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_) :: info
+ call a%free_space()
call a%psb_c_ell_sparse_mat%free()
return
end subroutine c_oacc_ell_free
-
function c_oacc_ell_sizeof(a) result(res)
implicit none
class(psb_c_oacc_ell_sparse_mat), intent(in) :: a
@@ -196,41 +199,12 @@ contains
implicit none
class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
- if (allocated(a%val)) then
- call c_oacc_create_dev(a%val)
- end if
- if (allocated(a%ja)) then
- call i_oacc_create_dev(a%ja)
- end if
- if (allocated(a%irn)) then
- call i_oacc_create_dev_scalar(a%irn)
- end if
- if (allocated(a%idiag)) then
- call i_oacc_create_dev_scalar(a%idiag)
- end if
-
- contains
- subroutine c_oacc_create_dev(v)
- implicit none
- complex(psb_spk_), intent(in) :: v(:,:)
- !$acc enter data copyin(v)
- end subroutine c_oacc_create_dev
-
- subroutine i_oacc_create_dev(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:,:)
- !$acc enter data copyin(v)
- end subroutine i_oacc_create_dev
-
- subroutine i_oacc_create_dev_scalar(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc enter data copyin(v)
- end subroutine i_oacc_create_dev_scalar
+ if (allocated(a%val)) call acc_create(a%val)
+ if (allocated(a%ja)) call acc_create(a%ja)
+ if (allocated(a%irn)) call acc_create(a%irn)
+ if (allocated(a%idiag)) call acc_create(a%idiag)
end subroutine c_oacc_ell_sync_space
-
-
function c_oacc_ell_is_host(a) result(res)
implicit none
class(psb_c_oacc_ell_sparse_mat), intent(in) :: a
@@ -279,7 +253,7 @@ contains
function c_oacc_ell_get_fmt() result(res)
implicit none
character(len=5) :: res
- res = 'ELL_oacc'
+ res = 'ELLOA'
end function c_oacc_ell_get_fmt
subroutine c_oacc_ell_sync(a)
@@ -290,64 +264,17 @@ contains
tmpa => a
if (a%is_dev()) then
- call c_oacc_ell_to_host(a%val)
- call i_oacc_ell_to_host(a%ja)
- call i_oacc_ell_to_host_scalar(a%irn)
- call i_oacc_ell_to_host_scalar(a%idiag)
+ call acc_update_self(a%val)
+ call acc_update_self(a%ja)
+ call acc_update_self(a%irn)
+ call acc_update_self(a%idiag)
else if (a%is_host()) then
- call c_oacc_ell_to_dev(a%val)
- call i_oacc_ell_to_dev(a%ja)
- call i_oacc_ell_to_dev_scalar(a%irn)
- call i_oacc_ell_to_dev_scalar(a%idiag)
+ call acc_update_device(a%val)
+ call acc_update_device(a%ja)
+ call acc_update_device(a%irn)
+ call acc_update_device(a%idiag)
end if
call tmpa%set_sync()
end subroutine c_oacc_ell_sync
- subroutine c_oacc_ell_to_dev_scalar(v)
- implicit none
- complex(psb_spk_), intent(in) :: v(:)
- !$acc update device(v)
- end subroutine c_oacc_ell_to_dev_scalar
-
- subroutine c_oacc_ell_to_dev(v)
- implicit none
- complex(psb_spk_), intent(in) :: v(:,:)
- !$acc update device(v)
- end subroutine c_oacc_ell_to_dev
-
- subroutine c_oacc_ell_to_host(v)
- implicit none
- complex(psb_spk_), intent(in) :: v(:,:)
- !$acc update self(v)
- end subroutine c_oacc_ell_to_host
-
- subroutine c_oacc_ell_to_host_scalar(v)
- implicit none
- complex(psb_spk_), intent(in) :: v(:)
- !$acc update self(v)
- end subroutine c_oacc_ell_to_host_scalar
-
- subroutine i_oacc_ell_to_dev(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:,:)
- !$acc update device(v)
- end subroutine i_oacc_ell_to_dev
-
- subroutine i_oacc_ell_to_dev_scalar(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc update device(v)
- end subroutine i_oacc_ell_to_dev_scalar
-
- subroutine i_oacc_ell_to_host(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:,:)
- !$acc update self(v)
- end subroutine i_oacc_ell_to_host
-
- subroutine i_oacc_ell_to_host_scalar(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc update self(v)
- end subroutine i_oacc_ell_to_host_scalar
end module psb_c_oacc_ell_mat_mod
diff --git a/openacc/psb_c_oacc_hll_mat_mod.F90 b/openacc/psb_c_oacc_hll_mat_mod.F90
index faad0a1b..2d2095bf 100644
--- a/openacc/psb_c_oacc_hll_mat_mod.F90
+++ b/openacc/psb_c_oacc_hll_mat_mod.F90
@@ -1,5 +1,6 @@
module psb_c_oacc_hll_mat_mod
use iso_c_binding
+ use openacc
use psb_c_mat_mod
use psb_c_hll_mat_mod
use psb_c_oacc_vect_mod
@@ -21,6 +22,7 @@ module psb_c_oacc_hll_mat_mod
procedure, pass(a) :: set_dev => c_oacc_hll_set_dev
procedure, pass(a) :: sync_space => c_oacc_hll_sync_space
procedure, pass(a) :: sync => c_oacc_hll_sync
+ procedure, pass(a) :: free_space => c_oacc_hll_free_space
procedure, pass(a) :: free => c_oacc_hll_free
procedure, pass(a) :: vect_mv => psb_c_oacc_hll_vect_mv
procedure, pass(a) :: in_vect_sv => psb_c_oacc_hll_inner_vect_sv
@@ -152,28 +154,28 @@ module psb_c_oacc_hll_mat_mod
contains
- subroutine c_oacc_hll_free(a)
+ subroutine c_oacc_hll_free_space(a)
use psb_base_mod
implicit none
class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
- if (allocated(a%val)) then
- !$acc exit data delete(a%val)
- end if
- if (allocated(a%ja)) then
- !$acc exit data delete(a%ja)
- end if
- if (allocated(a%irn)) then
- !$acc exit data delete(a%irn)
- end if
- if (allocated(a%idiag)) then
- !$acc exit data delete(a%idiag)
- end if
- if (allocated(a%hkoffs)) then
- !$acc exit data delete(a%hkoffs)
- end if
+ if (allocated(a%val)) call acc_delete_finalize(a%val)
+ if (allocated(a%ja)) call acc_delete_finalize(a%ja)
+ if (allocated(a%irn)) call acc_delete_finalize(a%irn)
+ if (allocated(a%idiag)) call acc_delete_finalize(a%idiag)
+ if (allocated(a%hkoffs)) call acc_delete_finalize(a%hkoffs)
+
+ return
+ end subroutine c_oacc_hll_free_space
+
+ subroutine c_oacc_hll_free(a)
+ use psb_base_mod
+ implicit none
+ class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_) :: info
+ call a%free_space()
call a%psb_c_hll_sparse_mat%free()
return
@@ -244,48 +246,18 @@ contains
function c_oacc_hll_get_fmt() result(res)
implicit none
character(len=5) :: res
- res = 'HLL_oacc'
+ res = 'HLLOA'
end function c_oacc_hll_get_fmt
subroutine c_oacc_hll_sync_space(a)
implicit none
class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
- if (allocated(a%val)) then
- call c_oacc_create_dev(a%val)
- end if
- if (allocated(a%ja)) then
- call i_oacc_create_dev(a%ja)
- end if
- if (allocated(a%irn)) then
- call i_oacc_create_dev_scalar(a%irn)
- end if
- if (allocated(a%idiag)) then
- call i_oacc_create_dev_scalar(a%idiag)
- end if
- if (allocated(a%hkoffs)) then
- call i_oacc_create_dev_scalar(a%hkoffs)
- end if
-
- contains
- subroutine c_oacc_create_dev(v)
- implicit none
- complex(psb_spk_), intent(in) :: v(:)
- !$acc enter data copyin(v)
- end subroutine c_oacc_create_dev
-
- subroutine i_oacc_create_dev(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc enter data copyin(v)
- end subroutine i_oacc_create_dev
-
- subroutine i_oacc_create_dev_scalar(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc enter data copyin(v)
- end subroutine i_oacc_create_dev_scalar
-
+ if (allocated(a%val)) call acc_create(a%val)
+ if (allocated(a%ja)) call acc_create(a%ja)
+ if (allocated(a%irn)) call acc_create(a%irn)
+ if (allocated(a%idiag)) call acc_create(a%idiag)
+ if (allocated(a%hkoffs)) call acc_create(a%hkoffs)
end subroutine c_oacc_hll_sync_space
@@ -297,56 +269,19 @@ contains
tmpa => a
if (a%is_dev()) then
- call c_oacc_hll_to_host(a%val)
- call i_oacc_hll_to_host(a%ja)
- call i_oacc_hll_to_host_scalar(a%irn)
- call i_oacc_hll_to_host_scalar(a%idiag)
- call i_oacc_hll_to_host_scalar(a%hkoffs)
+ call acc_update_self(a%val)
+ call acc_update_self(a%ja)
+ call acc_update_self(a%irn)
+ call acc_update_self(a%idiag)
+ call acc_update_self(a%hkoffs)
else if (a%is_host()) then
- call c_oacc_hll_to_dev(a%val)
- call i_oacc_hll_to_dev(a%ja)
- call i_oacc_hll_to_dev_scalar(a%irn)
- call i_oacc_hll_to_dev_scalar(a%idiag)
- call i_oacc_hll_to_dev_scalar(a%hkoffs)
+ call acc_update_device(a%val)
+ call acc_update_device(a%ja)
+ call acc_update_device(a%irn)
+ call acc_update_device(a%idiag)
+ call acc_update_device(a%hkoffs)
end if
call tmpa%set_sync()
end subroutine c_oacc_hll_sync
- subroutine c_oacc_hll_to_host(v)
- implicit none
- complex(psb_spk_), intent(in) :: v(:)
- !$acc update self(v)
- end subroutine c_oacc_hll_to_host
-
- subroutine c_oacc_hll_to_dev(v)
- implicit none
- complex(psb_spk_), intent(in) :: v(:)
- !$acc update device(v)
- end subroutine c_oacc_hll_to_dev
-
- subroutine i_oacc_hll_to_host(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc update self(v)
- end subroutine i_oacc_hll_to_host
-
- subroutine i_oacc_hll_to_dev(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc update device(v)
- end subroutine i_oacc_hll_to_dev
-
- subroutine i_oacc_hll_to_host_scalar(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc update self(v)
- end subroutine i_oacc_hll_to_host_scalar
-
- subroutine i_oacc_hll_to_dev_scalar(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc update device(v)
- end subroutine i_oacc_hll_to_dev_scalar
-
-
end module psb_c_oacc_hll_mat_mod
diff --git a/openacc/psb_c_oacc_vect_mod.F90 b/openacc/psb_c_oacc_vect_mod.F90
index 7362ba0e..79ff0ca3 100644
--- a/openacc/psb_c_oacc_vect_mod.F90
+++ b/openacc/psb_c_oacc_vect_mod.F90
@@ -59,7 +59,7 @@ module psb_c_oacc_vect_mod
procedure, pass(x) :: asum => c_oacc_asum
procedure, pass(x) :: absval1 => c_oacc_absval1
procedure, pass(x) :: absval2 => c_oacc_absval2
-
+ final :: c_oacc_final_vect_free
end type psb_c_vect_oacc
interface
@@ -164,21 +164,25 @@ contains
implicit none
class(psb_c_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n
- real(psb_spk_) :: res
- real(psb_spk_) :: mx
+ real(psb_spk_) :: res
integer(psb_ipk_) :: info
if (x%is_host()) call x%sync()
- mx = c_oacc_amax(n,x)
- res = c_inner_oacc_nrm2(n, mx, x%v)
+!!$ write(0,*)'oacc_nrm2'
+ res = c_inner_oacc_nrm2(n, x%v)
contains
- function c_inner_oacc_nrm2(n, mx,x) result(res)
+ function c_inner_oacc_nrm2(n, x) result(res)
integer(psb_ipk_) :: n
complex(psb_spk_) :: x(:)
- real(psb_spk_) :: mx, res
- real(psb_spk_) :: sum
+ real(psb_spk_) :: res
+ real(psb_spk_) :: sum, mx
integer(psb_ipk_) :: i
- sum = 0.0
+ mx = szero
+ !$acc parallel loop reduction(max:mx)
+ do i = 1, n
+ if (abs(x(i)) > mx) mx = abs(x(i))
+ end do
+ sum = szero
!$acc parallel loop reduction(+:sum)
do i = 1, n
sum = sum + abs(x(i)/mx)**2
@@ -203,7 +207,7 @@ contains
real(psb_spk_) :: res
real(psb_spk_) :: max_val
integer(psb_ipk_) :: i
- max_val = -huge(0.0)
+ max_val = szero
!$acc parallel loop reduction(max:max_val)
do i = 1, n
if (abs(x(i)) > max_val) max_val = abs(x(i))
@@ -228,7 +232,7 @@ contains
complex(psb_spk_) :: x(:)
real(psb_spk_) :: res
integer(psb_ipk_) :: i
- res = 0.0
+ res = szero
!$acc parallel loop reduction(+:res)
do i = 1, n
res = res + abs(x(i))
@@ -271,92 +275,6 @@ contains
call z%set_host()
end subroutine c_oacc_mlt_a_2
-
-!!$ subroutine c_oacc_mlt_v(x, y, info)
-!!$ implicit none
-!!$ class(psb_c_base_vect_type), intent(inout) :: x
-!!$ class(psb_c_vect_oacc), intent(inout) :: y
-!!$ integer(psb_ipk_), intent(out) :: info
-!!$
-!!$ integer(psb_ipk_) :: i, n
-!!$
-!!$ info = 0
-!!$ n = min(x%get_nrows(), y%get_nrows())
-!!$ select type(xx => x)
-!!$ type is (psb_c_base_vect_type)
-!!$ if (y%is_dev()) call y%sync()
-!!$ !$acc parallel loop
-!!$ do i = 1, n
-!!$ y%v(i) = y%v(i) * xx%v(i)
-!!$ end do
-!!$ call y%set_host()
-!!$ class default
-!!$ if (xx%is_dev()) call xx%sync()
-!!$ if (y%is_dev()) call y%sync()
-!!$ !$acc parallel loop
-!!$ do i = 1, n
-!!$ y%v(i) = y%v(i) * xx%v(i)
-!!$ end do
-!!$ call y%set_host()
-!!$ end select
-!!$ end subroutine c_oacc_mlt_v
-!!$
-!!$ subroutine c_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
-!!$ use psi_serial_mod
-!!$ use psb_string_mod
-!!$ implicit none
-!!$ complex(psb_spk_), intent(in) :: alpha, beta
-!!$ class(psb_c_base_vect_type), intent(inout) :: x
-!!$ class(psb_c_base_vect_type), intent(inout) :: y
-!!$ class(psb_c_vect_oacc), intent(inout) :: z
-!!$ integer(psb_ipk_), intent(out) :: info
-!!$ character(len=1), intent(in), optional :: conjgx, conjgy
-!!$ integer(psb_ipk_) :: i, n
-!!$ logical :: conjgx_, conjgy_
-!!$
-!!$ conjgx_ = .false.
-!!$ conjgy_ = .false.
-!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C')
-!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C')
-!!$
-!!$ n = min(x%get_nrows(), y%get_nrows(), z%get_nrows())
-!!$
-!!$ info = 0
-!!$ select type(xx => x)
-!!$ class is (psb_c_vect_oacc)
-!!$ select type (yy => y)
-!!$ class is (psb_c_vect_oacc)
-!!$ if (xx%is_host()) call xx%sync()
-!!$ if (yy%is_host()) call yy%sync()
-!!$ if ((beta /= czero) .and. (z%is_host())) call z%sync()
-!!$ !$acc parallel loop
-!!$ do i = 1, n
-!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
-!!$ end do
-!!$ call z%set_dev()
-!!$ class default
-!!$ if (xx%is_dev()) call xx%sync()
-!!$ if (yy%is_dev()) call yy%sync()
-!!$ if ((beta /= czero) .and. (z%is_dev())) call z%sync()
-!!$ !$acc parallel loop
-!!$ do i = 1, n
-!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
-!!$ end do
-!!$ call z%set_host()
-!!$ end select
-!!$ class default
-!!$ if (x%is_dev()) call x%sync()
-!!$ if (y%is_dev()) call y%sync()
-!!$ if ((beta /= czero) .and. (z%is_dev())) call z%sync()
-!!$ !$acc parallel loop
-!!$ do i = 1, n
-!!$ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i)
-!!$ end do
-!!$ call z%set_host()
-!!$ end select
-!!$ end subroutine c_oacc_mlt_v_2
-
-
subroutine c_oacc_axpby_v(m, alpha, x, beta, y, info)
!use psi_serial_mod
implicit none
@@ -414,7 +332,7 @@ contains
integer(psb_ipk_) :: i
if ((beta /= czero) .and. (y%is_dev())) call y%sync()
- !$acc parallel loop
+
do i = 1, m
y%v(i) = alpha * x(i) + beta * y%v(i)
end do
@@ -432,44 +350,44 @@ contains
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nx, ny, nz, i
logical :: gpu_done
- write(0,*)'upd_xyz'
+
info = psb_success_
gpu_done = .false.
select type(xx => x)
class is (psb_c_vect_oacc)
- select type(yy => y)
+ select type(yy => y)
+ class is (psb_c_vect_oacc)
+ select type(zz => z)
class is (psb_c_vect_oacc)
- select type(zz => z)
- class is (psb_c_vect_oacc)
- if ((beta /= czero) .and. yy%is_host()) call yy%sync()
- if ((delta /= czero) .and. zz%is_host()) call zz%sync()
- if (xx%is_host()) call xx%sync()
- nx = size(xx%v)
- ny = size(yy%v)
- nz = size(zz%v)
- if ((nx < m) .or. (ny < m) .or. (nz < m)) then
- info = psb_err_internal_error_
- else
- !$acc parallel loop
- do i = 1, m
- yy%v(i) = alpha * xx%v(i) + beta * yy%v(i)
- zz%v(i) = gamma * yy%v(i) + delta * zz%v(i)
- end do
- end if
- call yy%set_dev()
- call zz%set_dev()
- gpu_done = .true.
- end select
+ if ((beta /= czero) .and. yy%is_host()) call yy%sync()
+ if ((delta /= czero) .and. zz%is_host()) call zz%sync()
+ if (xx%is_host()) call xx%sync()
+ nx = size(xx%v)
+ ny = size(yy%v)
+ nz = size(zz%v)
+ if ((nx < m) .or. (ny < m) .or. (nz < m)) then
+ info = psb_err_internal_error_
+ else
+ !$acc parallel loop
+ do i = 1, m
+ yy%v(i) = alpha * xx%v(i) + beta * yy%v(i)
+ zz%v(i) = gamma * yy%v(i) + delta * zz%v(i)
+ end do
+ end if
+ call yy%set_dev()
+ call zz%set_dev()
+ gpu_done = .true.
end select
+ end select
end select
if (.not. gpu_done) then
- if (x%is_host()) call x%sync()
- if (y%is_host()) call y%sync()
- if (z%is_host()) call z%sync()
- call y%axpby(m, alpha, x, beta, info)
- call z%axpby(m, gamma, y, delta, info)
+ if (x%is_host()) call x%sync()
+ if (y%is_host()) call y%sync()
+ if (z%is_host()) call z%sync()
+ call y%axpby(m, alpha, x, beta, info)
+ call z%axpby(m, gamma, y, delta, info)
end if
end subroutine c_oacc_upd_xyz
@@ -676,7 +594,7 @@ contains
if (x%is_dev()) call x%sync()
call x%psb_c_base_vect_type%ins(n, irl, val, dupl, info)
call x%set_host()
- !$acc update device(x%v)
+
end subroutine c_oacc_ins_a
@@ -687,16 +605,14 @@ contains
class(psb_c_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
+ call x%free(info)
call x%all(n, info)
if (info /= 0) then
call psb_errpush(info, 'c_oacc_bld_mn', i_err=(/n, n, n, n, n/))
end if
call x%set_host()
- if (acc_is_present(x%v)) then
- !$acc exit data delete(x%v) finalize
- end if
- !$acc enter data copyin(x%v)
-
+ call x%sync_space()
+
end subroutine c_oacc_bld_mn
@@ -707,6 +623,7 @@ contains
class(psb_c_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
+ call x%free(info)
call psb_realloc(size(this), x%v, info)
if (info /= 0) then
info = psb_err_alloc_request_
@@ -714,13 +631,9 @@ contains
i_err=(/size(this), izero, izero, izero, izero/))
return
end if
-
x%v(:) = this(:)
call x%set_host()
- if (acc_is_present(x%v)) then
- !$acc exit data delete(x%v) finalize
- end if
- !$acc enter data copyin(x%v)
+ call x%sync_space()
end subroutine c_oacc_bld_x
@@ -848,54 +761,21 @@ contains
end function c_oacc_dot_a
- ! subroutine c_oacc_set_vect(x,y)
- ! implicit none
- ! class(psb_c_vect_oacc), intent(inout) :: x
- ! complex(psb_spk_), intent(in) :: y(:)
- ! integer(psb_ipk_) :: info
-
- ! if (size(x%v) /= size(y)) then
- ! call x%free(info)
- ! call x%all(size(y),info)
- ! end if
- ! x%v(:) = y(:)
- ! call x%set_host()
- ! end subroutine c_oacc_set_vect
-
- subroutine c_oacc_to_dev(v)
- implicit none
- complex(psb_spk_) :: v(:)
- !$acc update device(v)
- end subroutine c_oacc_to_dev
-
- subroutine c_oacc_to_host(v)
- implicit none
- complex(psb_spk_) :: v(:)
- !$acc update self(v)
- end subroutine c_oacc_to_host
subroutine c_oacc_sync_space(x)
implicit none
class(psb_c_vect_oacc), intent(inout) :: x
- if (allocated(x%v)) then
- if (.not.acc_is_present(x%v)) call c_oacc_create_dev(x%v)
- end if
- contains
- subroutine c_oacc_create_dev(v)
- implicit none
- complex(psb_spk_) :: v(:)
- !$acc enter data copyin(v)
- end subroutine c_oacc_create_dev
+ if (allocated(x%v)) call acc_create(x%v)
end subroutine c_oacc_sync_space
subroutine c_oacc_sync(x)
implicit none
class(psb_c_vect_oacc), intent(inout) :: x
if (x%is_dev()) then
- call c_oacc_to_host(x%v)
+ call acc_update_self(x%v)
end if
if (x%is_host()) then
- call c_oacc_to_dev(x%v)
+ call acc_update_device(x%v)
end if
call x%set_sync()
end subroutine c_oacc_sync
@@ -954,33 +834,36 @@ contains
integer(psb_ipk_), intent(out) :: info
call psb_realloc(n, x%v, info)
- if (info == 0) then
- call x%set_host()
- if (acc_is_present(x%v)) then
- !$acc exit data delete(x%v) finalize
- end if
- !$acc enter data create(x%v)
- call x%sync_space()
- end if
if (info /= 0) then
info = psb_err_alloc_request_
call psb_errpush(info, 'c_oacc_all', &
i_err=(/n, n, n, n, n/))
end if
+ call x%set_host()
+ call x%sync_space()
end subroutine c_oacc_vect_all
+ subroutine c_oacc_final_vect_free(x)
+ implicit none
+ type(psb_c_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_) :: info
+ info = 0
+ if (allocated(x%v)) then
+ if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
+ deallocate(x%v, stat=info)
+ end if
+
+ end subroutine c_oacc_final_vect_free
+
subroutine c_oacc_vect_free(x, info)
implicit none
class(psb_c_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
if (allocated(x%v)) then
- if (acc_is_present(x%v)) then
- !$acc exit data delete(x%v) finalize
- end if
+ if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
deallocate(x%v, stat=info)
end if
-
end subroutine c_oacc_vect_free
function c_oacc_get_size(x) result(res)
diff --git a/openacc/psb_d_oacc_csr_mat_mod.F90 b/openacc/psb_d_oacc_csr_mat_mod.F90
index 8b7e111e..74031e89 100644
--- a/openacc/psb_d_oacc_csr_mat_mod.F90
+++ b/openacc/psb_d_oacc_csr_mat_mod.F90
@@ -1,6 +1,7 @@
module psb_d_oacc_csr_mat_mod
use iso_c_binding
+ use openacc
use psb_d_mat_mod
use psb_d_oacc_vect_mod
!use oaccsparse_mod
@@ -35,6 +36,7 @@ module psb_d_oacc_csr_mat_mod
procedure, pass(a) :: set_host => d_oacc_csr_set_host
procedure, pass(a) :: set_sync => d_oacc_csr_set_sync
procedure, pass(a) :: set_dev => d_oacc_csr_set_dev
+ procedure, pass(a) :: free_space => d_oacc_csr_free_space
procedure, pass(a) :: sync_space => d_oacc_csr_sync_space
procedure, pass(a) :: sync => d_oacc_csr_sync
end type psb_d_oacc_csr_sparse_mat
@@ -154,22 +156,26 @@ module psb_d_oacc_csr_mat_mod
contains
- subroutine d_oacc_csr_free(a)
+ subroutine d_oacc_csr_free_space(a)
use psb_base_mod
implicit none
class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
- if (allocated(a%val)) then
- !$acc exit data delete(a%val)
- end if
- if (allocated(a%ja)) then
- !$acc exit data delete(a%ja)
- end if
- if (allocated(a%irp)) then
- !$acc exit data delete(a%irp)
- end if
+ if (allocated(a%val)) call acc_delete_finalize(a%val)
+ if (allocated(a%ja)) call acc_delete_finalize(a%ja)
+ if (allocated(a%irp)) call acc_delete_finalize(a%irp)
+
+ return
+ end subroutine d_oacc_csr_free_space
+
+ subroutine d_oacc_csr_free(a)
+ use psb_base_mod
+ implicit none
+ class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_) :: info
+ call a%free_space()
call a%psb_d_csr_sparse_mat%free()
return
@@ -193,7 +199,7 @@ contains
function d_oacc_csr_get_fmt() result(res)
implicit none
character(len=5) :: res
- res = 'CSR_oacc'
+ res = 'CSROA'
end function d_oacc_csr_get_fmt
subroutine d_oacc_csr_all(m, n, nz, a, info)
@@ -202,19 +208,8 @@ contains
class(psb_d_oacc_csr_sparse_mat), intent(out) :: a
integer(psb_ipk_), intent(out) :: info
- info = 0
- if (allocated(a%val)) then
- !$acc exit data delete(a%val) finalize
- deallocate(a%val, stat=info)
- end if
- if (allocated(a%ja)) then
- !$acc exit data delete(a%ja) finalize
- deallocate(a%ja, stat=info)
- end if
- if (allocated(a%irp)) then
- !$acc exit data delete(a%irp) finalize
- deallocate(a%irp, stat=info)
- end if
+ info = 0
+ call a%free()
call a%set_nrows(m)
call a%set_ncols(n)
@@ -274,26 +269,9 @@ contains
subroutine d_oacc_csr_sync_space(a)
implicit none
class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
- if (allocated(a%val)) then
- call d_oacc_create_dev(a%val)
- end if
- if (allocated(a%ja)) then
- call i_oacc_create_dev(a%ja)
- end if
- if (allocated(a%irp)) then
- call i_oacc_create_dev(a%irp)
- end if
- contains
- subroutine d_oacc_create_dev(v)
- implicit none
- real(psb_dpk_), intent(in) :: v(:)
- !$acc enter data copyin(v)
- end subroutine d_oacc_create_dev
- subroutine i_oacc_create_dev(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc enter data copyin(v)
- end subroutine i_oacc_create_dev
+ if (allocated(a%val)) call acc_create(a%val)
+ if (allocated(a%ja)) call acc_create(a%ja)
+ if (allocated(a%irp)) call acc_create(a%irp)
end subroutine d_oacc_csr_sync_space
subroutine d_oacc_csr_sync(a)
@@ -304,40 +282,16 @@ contains
tmpa => a
if (a%is_dev()) then
- call d_oacc_csr_to_host(a%val)
- call i_oacc_csr_to_host(a%ja)
- call i_oacc_csr_to_host(a%irp)
+ call acc_update_self(a%val)
+ call acc_update_self(a%ja)
+ call acc_update_self(a%irp)
else if (a%is_host()) then
- call d_oacc_csr_to_dev(a%val)
- call i_oacc_csr_to_dev(a%ja)
- call i_oacc_csr_to_dev(a%irp)
+ call acc_update_device(a%val)
+ call acc_update_device(a%ja)
+ call acc_update_device(a%irp)
end if
call tmpa%set_sync()
end subroutine d_oacc_csr_sync
- subroutine d_oacc_csr_to_dev(v)
- implicit none
- real(psb_dpk_), intent(in) :: v(:)
- !$acc update device(v)
- end subroutine d_oacc_csr_to_dev
-
- subroutine d_oacc_csr_to_host(v)
- implicit none
- real(psb_dpk_), intent(in) :: v(:)
- !$acc update self(v)
- end subroutine d_oacc_csr_to_host
-
- subroutine i_oacc_csr_to_dev(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc update device(v)
- end subroutine i_oacc_csr_to_dev
-
- subroutine i_oacc_csr_to_host(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc update self(v)
- end subroutine i_oacc_csr_to_host
-
end module psb_d_oacc_csr_mat_mod
diff --git a/openacc/psb_d_oacc_ell_mat_mod.F90 b/openacc/psb_d_oacc_ell_mat_mod.F90
index 962ad2db..45ffc92d 100644
--- a/openacc/psb_d_oacc_ell_mat_mod.F90
+++ b/openacc/psb_d_oacc_ell_mat_mod.F90
@@ -1,5 +1,6 @@
module psb_d_oacc_ell_mat_mod
use iso_c_binding
+ use openacc
use psb_d_mat_mod
use psb_d_ell_mat_mod
use psb_d_oacc_vect_mod
@@ -21,6 +22,7 @@ module psb_d_oacc_ell_mat_mod
procedure, pass(a) :: set_dev => d_oacc_ell_set_dev
procedure, pass(a) :: sync_space => d_oacc_ell_sync_space
procedure, pass(a) :: sync => d_oacc_ell_sync
+ procedure, pass(a) :: free_space => d_oacc_ell_free_space
procedure, pass(a) :: free => d_oacc_ell_free
procedure, pass(a) :: vect_mv => psb_d_oacc_ell_vect_mv
procedure, pass(a) :: in_vect_sv => psb_d_oacc_ell_inner_vect_sv
@@ -152,31 +154,32 @@ module psb_d_oacc_ell_mat_mod
contains
- subroutine d_oacc_ell_free(a)
+ subroutine d_oacc_ell_free_space(a)
use psb_base_mod
implicit none
class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
- if (allocated(a%val)) then
- !$acc exit data delete(a%val)
- end if
- if (allocated(a%ja)) then
- !$acc exit data delete(a%ja)
- end if
- if (allocated(a%irn)) then
- !$acc exit data delete(a%irn)
- end if
- if (allocated(a%idiag)) then
- !$acc exit data delete(a%idiag)
- end if
+ if (allocated(a%val)) call acc_delete_finalize(a%val)
+ if (allocated(a%ja)) call acc_delete_finalize(a%ja)
+ if (allocated(a%irn)) call acc_delete_finalize(a%irn)
+ if (allocated(a%idiag)) call acc_delete_finalize(a%idiag)
+
+ return
+ end subroutine d_oacc_ell_free_space
+
+ subroutine d_oacc_ell_free(a)
+ use psb_base_mod
+ implicit none
+ class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_) :: info
+ call a%free_space()
call a%psb_d_ell_sparse_mat%free()
return
end subroutine d_oacc_ell_free
-
function d_oacc_ell_sizeof(a) result(res)
implicit none
class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
@@ -196,41 +199,12 @@ contains
implicit none
class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
- if (allocated(a%val)) then
- call d_oacc_create_dev(a%val)
- end if
- if (allocated(a%ja)) then
- call i_oacc_create_dev(a%ja)
- end if
- if (allocated(a%irn)) then
- call i_oacc_create_dev_scalar(a%irn)
- end if
- if (allocated(a%idiag)) then
- call i_oacc_create_dev_scalar(a%idiag)
- end if
-
- contains
- subroutine d_oacc_create_dev(v)
- implicit none
- real(psb_dpk_), intent(in) :: v(:,:)
- !$acc enter data copyin(v)
- end subroutine d_oacc_create_dev
-
- subroutine i_oacc_create_dev(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:,:)
- !$acc enter data copyin(v)
- end subroutine i_oacc_create_dev
-
- subroutine i_oacc_create_dev_scalar(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc enter data copyin(v)
- end subroutine i_oacc_create_dev_scalar
+ if (allocated(a%val)) call acc_create(a%val)
+ if (allocated(a%ja)) call acc_create(a%ja)
+ if (allocated(a%irn)) call acc_create(a%irn)
+ if (allocated(a%idiag)) call acc_create(a%idiag)
end subroutine d_oacc_ell_sync_space
-
-
function d_oacc_ell_is_host(a) result(res)
implicit none
class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
@@ -279,7 +253,7 @@ contains
function d_oacc_ell_get_fmt() result(res)
implicit none
character(len=5) :: res
- res = 'ELL_oacc'
+ res = 'ELLOA'
end function d_oacc_ell_get_fmt
subroutine d_oacc_ell_sync(a)
@@ -290,64 +264,17 @@ contains
tmpa => a
if (a%is_dev()) then
- call d_oacc_ell_to_host(a%val)
- call i_oacc_ell_to_host(a%ja)
- call i_oacc_ell_to_host_scalar(a%irn)
- call i_oacc_ell_to_host_scalar(a%idiag)
+ call acc_update_self(a%val)
+ call acc_update_self(a%ja)
+ call acc_update_self(a%irn)
+ call acc_update_self(a%idiag)
else if (a%is_host()) then
- call d_oacc_ell_to_dev(a%val)
- call i_oacc_ell_to_dev(a%ja)
- call i_oacc_ell_to_dev_scalar(a%irn)
- call i_oacc_ell_to_dev_scalar(a%idiag)
+ call acc_update_device(a%val)
+ call acc_update_device(a%ja)
+ call acc_update_device(a%irn)
+ call acc_update_device(a%idiag)
end if
call tmpa%set_sync()
end subroutine d_oacc_ell_sync
- subroutine d_oacc_ell_to_dev_scalar(v)
- implicit none
- real(psb_dpk_), intent(in) :: v(:)
- !$acc update device(v)
- end subroutine d_oacc_ell_to_dev_scalar
-
- subroutine d_oacc_ell_to_dev(v)
- implicit none
- real(psb_dpk_), intent(in) :: v(:,:)
- !$acc update device(v)
- end subroutine d_oacc_ell_to_dev
-
- subroutine d_oacc_ell_to_host(v)
- implicit none
- real(psb_dpk_), intent(in) :: v(:,:)
- !$acc update self(v)
- end subroutine d_oacc_ell_to_host
-
- subroutine d_oacc_ell_to_host_scalar(v)
- implicit none
- real(psb_dpk_), intent(in) :: v(:)
- !$acc update self(v)
- end subroutine d_oacc_ell_to_host_scalar
-
- subroutine i_oacc_ell_to_dev(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:,:)
- !$acc update device(v)
- end subroutine i_oacc_ell_to_dev
-
- subroutine i_oacc_ell_to_dev_scalar(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc update device(v)
- end subroutine i_oacc_ell_to_dev_scalar
-
- subroutine i_oacc_ell_to_host(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:,:)
- !$acc update self(v)
- end subroutine i_oacc_ell_to_host
-
- subroutine i_oacc_ell_to_host_scalar(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc update self(v)
- end subroutine i_oacc_ell_to_host_scalar
end module psb_d_oacc_ell_mat_mod
diff --git a/openacc/psb_d_oacc_hll_mat_mod.F90 b/openacc/psb_d_oacc_hll_mat_mod.F90
index b1c36a65..8009f085 100644
--- a/openacc/psb_d_oacc_hll_mat_mod.F90
+++ b/openacc/psb_d_oacc_hll_mat_mod.F90
@@ -1,5 +1,6 @@
module psb_d_oacc_hll_mat_mod
use iso_c_binding
+ use openacc
use psb_d_mat_mod
use psb_d_hll_mat_mod
use psb_d_oacc_vect_mod
@@ -21,6 +22,7 @@ module psb_d_oacc_hll_mat_mod
procedure, pass(a) :: set_dev => d_oacc_hll_set_dev
procedure, pass(a) :: sync_space => d_oacc_hll_sync_space
procedure, pass(a) :: sync => d_oacc_hll_sync
+ procedure, pass(a) :: free_space => d_oacc_hll_free_space
procedure, pass(a) :: free => d_oacc_hll_free
procedure, pass(a) :: vect_mv => psb_d_oacc_hll_vect_mv
procedure, pass(a) :: in_vect_sv => psb_d_oacc_hll_inner_vect_sv
@@ -152,28 +154,28 @@ module psb_d_oacc_hll_mat_mod
contains
- subroutine d_oacc_hll_free(a)
+ subroutine d_oacc_hll_free_space(a)
use psb_base_mod
implicit none
class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
- if (allocated(a%val)) then
- !$acc exit data delete(a%val)
- end if
- if (allocated(a%ja)) then
- !$acc exit data delete(a%ja)
- end if
- if (allocated(a%irn)) then
- !$acc exit data delete(a%irn)
- end if
- if (allocated(a%idiag)) then
- !$acc exit data delete(a%idiag)
- end if
- if (allocated(a%hkoffs)) then
- !$acc exit data delete(a%hkoffs)
- end if
+ if (allocated(a%val)) call acc_delete_finalize(a%val)
+ if (allocated(a%ja)) call acc_delete_finalize(a%ja)
+ if (allocated(a%irn)) call acc_delete_finalize(a%irn)
+ if (allocated(a%idiag)) call acc_delete_finalize(a%idiag)
+ if (allocated(a%hkoffs)) call acc_delete_finalize(a%hkoffs)
+
+ return
+ end subroutine d_oacc_hll_free_space
+
+ subroutine d_oacc_hll_free(a)
+ use psb_base_mod
+ implicit none
+ class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_) :: info
+ call a%free_space()
call a%psb_d_hll_sparse_mat%free()
return
@@ -244,48 +246,18 @@ contains
function d_oacc_hll_get_fmt() result(res)
implicit none
character(len=5) :: res
- res = 'HLL_oacc'
+ res = 'HLLOA'
end function d_oacc_hll_get_fmt
subroutine d_oacc_hll_sync_space(a)
implicit none
class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
- if (allocated(a%val)) then
- call d_oacc_create_dev(a%val)
- end if
- if (allocated(a%ja)) then
- call i_oacc_create_dev(a%ja)
- end if
- if (allocated(a%irn)) then
- call i_oacc_create_dev_scalar(a%irn)
- end if
- if (allocated(a%idiag)) then
- call i_oacc_create_dev_scalar(a%idiag)
- end if
- if (allocated(a%hkoffs)) then
- call i_oacc_create_dev_scalar(a%hkoffs)
- end if
-
- contains
- subroutine d_oacc_create_dev(v)
- implicit none
- real(psb_dpk_), intent(in) :: v(:)
- !$acc enter data copyin(v)
- end subroutine d_oacc_create_dev
-
- subroutine i_oacc_create_dev(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc enter data copyin(v)
- end subroutine i_oacc_create_dev
-
- subroutine i_oacc_create_dev_scalar(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc enter data copyin(v)
- end subroutine i_oacc_create_dev_scalar
-
+ if (allocated(a%val)) call acc_create(a%val)
+ if (allocated(a%ja)) call acc_create(a%ja)
+ if (allocated(a%irn)) call acc_create(a%irn)
+ if (allocated(a%idiag)) call acc_create(a%idiag)
+ if (allocated(a%hkoffs)) call acc_create(a%hkoffs)
end subroutine d_oacc_hll_sync_space
@@ -297,56 +269,19 @@ contains
tmpa => a
if (a%is_dev()) then
- call d_oacc_hll_to_host(a%val)
- call i_oacc_hll_to_host(a%ja)
- call i_oacc_hll_to_host_scalar(a%irn)
- call i_oacc_hll_to_host_scalar(a%idiag)
- call i_oacc_hll_to_host_scalar(a%hkoffs)
+ call acc_update_self(a%val)
+ call acc_update_self(a%ja)
+ call acc_update_self(a%irn)
+ call acc_update_self(a%idiag)
+ call acc_update_self(a%hkoffs)
else if (a%is_host()) then
- call d_oacc_hll_to_dev(a%val)
- call i_oacc_hll_to_dev(a%ja)
- call i_oacc_hll_to_dev_scalar(a%irn)
- call i_oacc_hll_to_dev_scalar(a%idiag)
- call i_oacc_hll_to_dev_scalar(a%hkoffs)
+ call acc_update_device(a%val)
+ call acc_update_device(a%ja)
+ call acc_update_device(a%irn)
+ call acc_update_device(a%idiag)
+ call acc_update_device(a%hkoffs)
end if
call tmpa%set_sync()
end subroutine d_oacc_hll_sync
- subroutine d_oacc_hll_to_host(v)
- implicit none
- real(psb_dpk_), intent(in) :: v(:)
- !$acc update self(v)
- end subroutine d_oacc_hll_to_host
-
- subroutine d_oacc_hll_to_dev(v)
- implicit none
- real(psb_dpk_), intent(in) :: v(:)
- !$acc update device(v)
- end subroutine d_oacc_hll_to_dev
-
- subroutine i_oacc_hll_to_host(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc update self(v)
- end subroutine i_oacc_hll_to_host
-
- subroutine i_oacc_hll_to_dev(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc update device(v)
- end subroutine i_oacc_hll_to_dev
-
- subroutine i_oacc_hll_to_host_scalar(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc update self(v)
- end subroutine i_oacc_hll_to_host_scalar
-
- subroutine i_oacc_hll_to_dev_scalar(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc update device(v)
- end subroutine i_oacc_hll_to_dev_scalar
-
-
end module psb_d_oacc_hll_mat_mod
diff --git a/openacc/psb_d_oacc_vect_mod.F90 b/openacc/psb_d_oacc_vect_mod.F90
index 9ecbccb4..c7804bc1 100644
--- a/openacc/psb_d_oacc_vect_mod.F90
+++ b/openacc/psb_d_oacc_vect_mod.F90
@@ -59,7 +59,7 @@ module psb_d_oacc_vect_mod
procedure, pass(x) :: asum => d_oacc_asum
procedure, pass(x) :: absval1 => d_oacc_absval1
procedure, pass(x) :: absval2 => d_oacc_absval2
-
+ final :: d_oacc_final_vect_free
end type psb_d_vect_oacc
interface
@@ -164,21 +164,25 @@ contains
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n
- real(psb_dpk_) :: res
- real(psb_dpk_) :: mx
+ real(psb_dpk_) :: res
integer(psb_ipk_) :: info
if (x%is_host()) call x%sync()
- mx = d_oacc_amax(n,x)
- res = d_inner_oacc_nrm2(n, mx, x%v)
+!!$ write(0,*)'oacc_nrm2'
+ res = d_inner_oacc_nrm2(n, x%v)
contains
- function d_inner_oacc_nrm2(n, mx,x) result(res)
+ function d_inner_oacc_nrm2(n, x) result(res)
integer(psb_ipk_) :: n
real(psb_dpk_) :: x(:)
- real(psb_dpk_) :: mx, res
- real(psb_dpk_) :: sum
+ real(psb_dpk_) :: res
+ real(psb_dpk_) :: sum, mx
integer(psb_ipk_) :: i
- sum = 0.0
+ mx = dzero
+ !$acc parallel loop reduction(max:mx)
+ do i = 1, n
+ if (abs(x(i)) > mx) mx = abs(x(i))
+ end do
+ sum = dzero
!$acc parallel loop reduction(+:sum)
do i = 1, n
sum = sum + abs(x(i)/mx)**2
@@ -203,7 +207,7 @@ contains
real(psb_dpk_) :: res
real(psb_dpk_) :: max_val
integer(psb_ipk_) :: i
- max_val = -huge(0.0)
+ max_val = dzero
!$acc parallel loop reduction(max:max_val)
do i = 1, n
if (abs(x(i)) > max_val) max_val = abs(x(i))
@@ -228,7 +232,7 @@ contains
real(psb_dpk_) :: x(:)
real(psb_dpk_) :: res
integer(psb_ipk_) :: i
- res = 0.0
+ res = dzero
!$acc parallel loop reduction(+:res)
do i = 1, n
res = res + abs(x(i))
@@ -271,92 +275,6 @@ contains
call z%set_host()
end subroutine d_oacc_mlt_a_2
-
-!!$ subroutine d_oacc_mlt_v(x, y, info)
-!!$ implicit none
-!!$ class(psb_d_base_vect_type), intent(inout) :: x
-!!$ class(psb_d_vect_oacc), intent(inout) :: y
-!!$ integer(psb_ipk_), intent(out) :: info
-!!$
-!!$ integer(psb_ipk_) :: i, n
-!!$
-!!$ info = 0
-!!$ n = min(x%get_nrows(), y%get_nrows())
-!!$ select type(xx => x)
-!!$ type is (psb_d_base_vect_type)
-!!$ if (y%is_dev()) call y%sync()
-!!$ !$acc parallel loop
-!!$ do i = 1, n
-!!$ y%v(i) = y%v(i) * xx%v(i)
-!!$ end do
-!!$ call y%set_host()
-!!$ class default
-!!$ if (xx%is_dev()) call xx%sync()
-!!$ if (y%is_dev()) call y%sync()
-!!$ !$acc parallel loop
-!!$ do i = 1, n
-!!$ y%v(i) = y%v(i) * xx%v(i)
-!!$ end do
-!!$ call y%set_host()
-!!$ end select
-!!$ end subroutine d_oacc_mlt_v
-!!$
-!!$ subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
-!!$ use psi_serial_mod
-!!$ use psb_string_mod
-!!$ implicit none
-!!$ real(psb_dpk_), intent(in) :: alpha, beta
-!!$ class(psb_d_base_vect_type), intent(inout) :: x
-!!$ class(psb_d_base_vect_type), intent(inout) :: y
-!!$ class(psb_d_vect_oacc), intent(inout) :: z
-!!$ integer(psb_ipk_), intent(out) :: info
-!!$ character(len=1), intent(in), optional :: conjgx, conjgy
-!!$ integer(psb_ipk_) :: i, n
-!!$ logical :: conjgx_, conjgy_
-!!$
-!!$ conjgx_ = .false.
-!!$ conjgy_ = .false.
-!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C')
-!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C')
-!!$
-!!$ n = min(x%get_nrows(), y%get_nrows(), z%get_nrows())
-!!$
-!!$ info = 0
-!!$ select type(xx => x)
-!!$ class is (psb_d_vect_oacc)
-!!$ select type (yy => y)
-!!$ class is (psb_d_vect_oacc)
-!!$ if (xx%is_host()) call xx%sync()
-!!$ if (yy%is_host()) call yy%sync()
-!!$ if ((beta /= dzero) .and. (z%is_host())) call z%sync()
-!!$ !$acc parallel loop
-!!$ do i = 1, n
-!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
-!!$ end do
-!!$ call z%set_dev()
-!!$ class default
-!!$ if (xx%is_dev()) call xx%sync()
-!!$ if (yy%is_dev()) call yy%sync()
-!!$ if ((beta /= dzero) .and. (z%is_dev())) call z%sync()
-!!$ !$acc parallel loop
-!!$ do i = 1, n
-!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
-!!$ end do
-!!$ call z%set_host()
-!!$ end select
-!!$ class default
-!!$ if (x%is_dev()) call x%sync()
-!!$ if (y%is_dev()) call y%sync()
-!!$ if ((beta /= dzero) .and. (z%is_dev())) call z%sync()
-!!$ !$acc parallel loop
-!!$ do i = 1, n
-!!$ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i)
-!!$ end do
-!!$ call z%set_host()
-!!$ end select
-!!$ end subroutine d_oacc_mlt_v_2
-
-
subroutine d_oacc_axpby_v(m, alpha, x, beta, y, info)
!use psi_serial_mod
implicit none
@@ -414,7 +332,7 @@ contains
integer(psb_ipk_) :: i
if ((beta /= dzero) .and. (y%is_dev())) call y%sync()
- !$acc parallel loop
+
do i = 1, m
y%v(i) = alpha * x(i) + beta * y%v(i)
end do
@@ -432,44 +350,44 @@ contains
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nx, ny, nz, i
logical :: gpu_done
- write(0,*)'upd_xyz'
+
info = psb_success_
gpu_done = .false.
select type(xx => x)
class is (psb_d_vect_oacc)
- select type(yy => y)
+ select type(yy => y)
+ class is (psb_d_vect_oacc)
+ select type(zz => z)
class is (psb_d_vect_oacc)
- select type(zz => z)
- class is (psb_d_vect_oacc)
- if ((beta /= dzero) .and. yy%is_host()) call yy%sync()
- if ((delta /= dzero) .and. zz%is_host()) call zz%sync()
- if (xx%is_host()) call xx%sync()
- nx = size(xx%v)
- ny = size(yy%v)
- nz = size(zz%v)
- if ((nx < m) .or. (ny < m) .or. (nz < m)) then
- info = psb_err_internal_error_
- else
- !$acc parallel loop
- do i = 1, m
- yy%v(i) = alpha * xx%v(i) + beta * yy%v(i)
- zz%v(i) = gamma * yy%v(i) + delta * zz%v(i)
- end do
- end if
- call yy%set_dev()
- call zz%set_dev()
- gpu_done = .true.
- end select
+ if ((beta /= dzero) .and. yy%is_host()) call yy%sync()
+ if ((delta /= dzero) .and. zz%is_host()) call zz%sync()
+ if (xx%is_host()) call xx%sync()
+ nx = size(xx%v)
+ ny = size(yy%v)
+ nz = size(zz%v)
+ if ((nx < m) .or. (ny < m) .or. (nz < m)) then
+ info = psb_err_internal_error_
+ else
+ !$acc parallel loop
+ do i = 1, m
+ yy%v(i) = alpha * xx%v(i) + beta * yy%v(i)
+ zz%v(i) = gamma * yy%v(i) + delta * zz%v(i)
+ end do
+ end if
+ call yy%set_dev()
+ call zz%set_dev()
+ gpu_done = .true.
end select
+ end select
end select
if (.not. gpu_done) then
- if (x%is_host()) call x%sync()
- if (y%is_host()) call y%sync()
- if (z%is_host()) call z%sync()
- call y%axpby(m, alpha, x, beta, info)
- call z%axpby(m, gamma, y, delta, info)
+ if (x%is_host()) call x%sync()
+ if (y%is_host()) call y%sync()
+ if (z%is_host()) call z%sync()
+ call y%axpby(m, alpha, x, beta, info)
+ call z%axpby(m, gamma, y, delta, info)
end if
end subroutine d_oacc_upd_xyz
@@ -676,7 +594,7 @@ contains
if (x%is_dev()) call x%sync()
call x%psb_d_base_vect_type%ins(n, irl, val, dupl, info)
call x%set_host()
- !$acc update device(x%v)
+
end subroutine d_oacc_ins_a
@@ -687,16 +605,14 @@ contains
class(psb_d_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
+ call x%free(info)
call x%all(n, info)
if (info /= 0) then
call psb_errpush(info, 'd_oacc_bld_mn', i_err=(/n, n, n, n, n/))
end if
call x%set_host()
- if (acc_is_present(x%v)) then
- !$acc exit data delete(x%v) finalize
- end if
- !$acc enter data copyin(x%v)
-
+ call x%sync_space()
+
end subroutine d_oacc_bld_mn
@@ -707,6 +623,7 @@ contains
class(psb_d_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
+ call x%free(info)
call psb_realloc(size(this), x%v, info)
if (info /= 0) then
info = psb_err_alloc_request_
@@ -714,13 +631,9 @@ contains
i_err=(/size(this), izero, izero, izero, izero/))
return
end if
-
x%v(:) = this(:)
call x%set_host()
- if (acc_is_present(x%v)) then
- !$acc exit data delete(x%v) finalize
- end if
- !$acc enter data copyin(x%v)
+ call x%sync_space()
end subroutine d_oacc_bld_x
@@ -848,54 +761,21 @@ contains
end function d_oacc_dot_a
- ! subroutine d_oacc_set_vect(x,y)
- ! implicit none
- ! class(psb_d_vect_oacc), intent(inout) :: x
- ! real(psb_dpk_), intent(in) :: y(:)
- ! integer(psb_ipk_) :: info
-
- ! if (size(x%v) /= size(y)) then
- ! call x%free(info)
- ! call x%all(size(y),info)
- ! end if
- ! x%v(:) = y(:)
- ! call x%set_host()
- ! end subroutine d_oacc_set_vect
-
- subroutine d_oacc_to_dev(v)
- implicit none
- real(psb_dpk_) :: v(:)
- !$acc update device(v)
- end subroutine d_oacc_to_dev
-
- subroutine d_oacc_to_host(v)
- implicit none
- real(psb_dpk_) :: v(:)
- !$acc update self(v)
- end subroutine d_oacc_to_host
subroutine d_oacc_sync_space(x)
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
- if (allocated(x%v)) then
- if (.not.acc_is_present(x%v)) call d_oacc_create_dev(x%v)
- end if
- contains
- subroutine d_oacc_create_dev(v)
- implicit none
- real(psb_dpk_) :: v(:)
- !$acc enter data copyin(v)
- end subroutine d_oacc_create_dev
+ if (allocated(x%v)) call acc_create(x%v)
end subroutine d_oacc_sync_space
subroutine d_oacc_sync(x)
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
if (x%is_dev()) then
- call d_oacc_to_host(x%v)
+ call acc_update_self(x%v)
end if
if (x%is_host()) then
- call d_oacc_to_dev(x%v)
+ call acc_update_device(x%v)
end if
call x%set_sync()
end subroutine d_oacc_sync
@@ -954,33 +834,36 @@ contains
integer(psb_ipk_), intent(out) :: info
call psb_realloc(n, x%v, info)
- if (info == 0) then
- call x%set_host()
- if (acc_is_present(x%v)) then
- !$acc exit data delete(x%v) finalize
- end if
- !$acc enter data create(x%v)
- call x%sync_space()
- end if
if (info /= 0) then
info = psb_err_alloc_request_
call psb_errpush(info, 'd_oacc_all', &
i_err=(/n, n, n, n, n/))
end if
+ call x%set_host()
+ call x%sync_space()
end subroutine d_oacc_vect_all
+ subroutine d_oacc_final_vect_free(x)
+ implicit none
+ type(psb_d_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_) :: info
+ info = 0
+ if (allocated(x%v)) then
+ if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
+ deallocate(x%v, stat=info)
+ end if
+
+ end subroutine d_oacc_final_vect_free
+
subroutine d_oacc_vect_free(x, info)
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
if (allocated(x%v)) then
- if (acc_is_present(x%v)) then
- !$acc exit data delete(x%v) finalize
- end if
+ if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
deallocate(x%v, stat=info)
end if
-
end subroutine d_oacc_vect_free
function d_oacc_get_size(x) result(res)
diff --git a/openacc/psb_i_oacc_vect_mod.F90 b/openacc/psb_i_oacc_vect_mod.F90
index 3dbc48f1..8c813134 100644
--- a/openacc/psb_i_oacc_vect_mod.F90
+++ b/openacc/psb_i_oacc_vect_mod.F90
@@ -42,7 +42,7 @@ module psb_i_oacc_vect_mod
procedure, pass(x) :: get_size => i_oacc_get_size
-
+ final :: i_oacc_final_vect_free
end type psb_i_vect_oacc
@@ -252,7 +252,7 @@ contains
if (x%is_dev()) call x%sync()
call x%psb_i_base_vect_type%ins(n, irl, val, dupl, info)
call x%set_host()
- !$acc update device(x%v)
+
end subroutine i_oacc_ins_a
@@ -263,16 +263,14 @@ contains
class(psb_i_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
+ call x%free(info)
call x%all(n, info)
if (info /= 0) then
call psb_errpush(info, 'i_oacc_bld_mn', i_err=(/n, n, n, n, n/))
end if
call x%set_host()
- if (acc_is_present(x%v)) then
- !$acc exit data delete(x%v) finalize
- end if
- !$acc enter data copyin(x%v)
-
+ call x%sync_space()
+
end subroutine i_oacc_bld_mn
@@ -283,6 +281,7 @@ contains
class(psb_i_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
+ call x%free(info)
call psb_realloc(size(this), x%v, info)
if (info /= 0) then
info = psb_err_alloc_request_
@@ -290,13 +289,9 @@ contains
i_err=(/size(this), izero, izero, izero, izero/))
return
end if
-
x%v(:) = this(:)
call x%set_host()
- if (acc_is_present(x%v)) then
- !$acc exit data delete(x%v) finalize
- end if
- !$acc enter data copyin(x%v)
+ call x%sync_space()
end subroutine i_oacc_bld_x
@@ -370,54 +365,21 @@ contains
end function i_oacc_get_fmt
- ! subroutine i_oacc_set_vect(x,y)
- ! implicit none
- ! class(psb_i_vect_oacc), intent(inout) :: x
- ! integer(psb_ipk_), intent(in) :: y(:)
- ! integer(psb_ipk_) :: info
-
- ! if (size(x%v) /= size(y)) then
- ! call x%free(info)
- ! call x%all(size(y),info)
- ! end if
- ! x%v(:) = y(:)
- ! call x%set_host()
- ! end subroutine i_oacc_set_vect
-
- subroutine i_oacc_to_dev(v)
- implicit none
- integer(psb_ipk_) :: v(:)
- !$acc update device(v)
- end subroutine i_oacc_to_dev
-
- subroutine i_oacc_to_host(v)
- implicit none
- integer(psb_ipk_) :: v(:)
- !$acc update self(v)
- end subroutine i_oacc_to_host
subroutine i_oacc_sync_space(x)
implicit none
class(psb_i_vect_oacc), intent(inout) :: x
- if (allocated(x%v)) then
- if (.not.acc_is_present(x%v)) call i_oacc_create_dev(x%v)
- end if
- contains
- subroutine i_oacc_create_dev(v)
- implicit none
- integer(psb_ipk_) :: v(:)
- !$acc enter data copyin(v)
- end subroutine i_oacc_create_dev
+ if (allocated(x%v)) call acc_create(x%v)
end subroutine i_oacc_sync_space
subroutine i_oacc_sync(x)
implicit none
class(psb_i_vect_oacc), intent(inout) :: x
if (x%is_dev()) then
- call i_oacc_to_host(x%v)
+ call acc_update_self(x%v)
end if
if (x%is_host()) then
- call i_oacc_to_dev(x%v)
+ call acc_update_device(x%v)
end if
call x%set_sync()
end subroutine i_oacc_sync
@@ -476,33 +438,36 @@ contains
integer(psb_ipk_), intent(out) :: info
call psb_realloc(n, x%v, info)
- if (info == 0) then
- call x%set_host()
- if (acc_is_present(x%v)) then
- !$acc exit data delete(x%v) finalize
- end if
- !$acc enter data create(x%v)
- call x%sync_space()
- end if
if (info /= 0) then
info = psb_err_alloc_request_
call psb_errpush(info, 'i_oacc_all', &
i_err=(/n, n, n, n, n/))
end if
+ call x%set_host()
+ call x%sync_space()
end subroutine i_oacc_vect_all
+ subroutine i_oacc_final_vect_free(x)
+ implicit none
+ type(psb_i_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_) :: info
+ info = 0
+ if (allocated(x%v)) then
+ if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
+ deallocate(x%v, stat=info)
+ end if
+
+ end subroutine i_oacc_final_vect_free
+
subroutine i_oacc_vect_free(x, info)
implicit none
class(psb_i_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
if (allocated(x%v)) then
- if (acc_is_present(x%v)) then
- !$acc exit data delete(x%v) finalize
- end if
+ if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
deallocate(x%v, stat=info)
end if
-
end subroutine i_oacc_vect_free
function i_oacc_get_size(x) result(res)
diff --git a/openacc/psb_l_oacc_vect_mod.F90 b/openacc/psb_l_oacc_vect_mod.F90
index cdf28366..9ff100bc 100644
--- a/openacc/psb_l_oacc_vect_mod.F90
+++ b/openacc/psb_l_oacc_vect_mod.F90
@@ -44,7 +44,7 @@ module psb_l_oacc_vect_mod
procedure, pass(x) :: get_size => l_oacc_get_size
-
+ final :: l_oacc_final_vect_free
end type psb_l_vect_oacc
@@ -254,7 +254,7 @@ contains
if (x%is_dev()) call x%sync()
call x%psb_l_base_vect_type%ins(n, irl, val, dupl, info)
call x%set_host()
- !$acc update device(x%v)
+
end subroutine l_oacc_ins_a
@@ -265,16 +265,14 @@ contains
class(psb_l_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
+ call x%free(info)
call x%all(n, info)
if (info /= 0) then
call psb_errpush(info, 'l_oacc_bld_mn', i_err=(/n, n, n, n, n/))
end if
call x%set_host()
- if (acc_is_present(x%v)) then
- !$acc exit data delete(x%v) finalize
- end if
- !$acc enter data copyin(x%v)
-
+ call x%sync_space()
+
end subroutine l_oacc_bld_mn
@@ -285,6 +283,7 @@ contains
class(psb_l_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
+ call x%free(info)
call psb_realloc(size(this), x%v, info)
if (info /= 0) then
info = psb_err_alloc_request_
@@ -292,13 +291,9 @@ contains
i_err=(/size(this), izero, izero, izero, izero/))
return
end if
-
x%v(:) = this(:)
call x%set_host()
- if (acc_is_present(x%v)) then
- !$acc exit data delete(x%v) finalize
- end if
- !$acc enter data copyin(x%v)
+ call x%sync_space()
end subroutine l_oacc_bld_x
@@ -372,54 +367,21 @@ contains
end function l_oacc_get_fmt
- ! subroutine l_oacc_set_vect(x,y)
- ! implicit none
- ! class(psb_l_vect_oacc), intent(inout) :: x
- ! integer(psb_lpk_), intent(in) :: y(:)
- ! integer(psb_ipk_) :: info
-
- ! if (size(x%v) /= size(y)) then
- ! call x%free(info)
- ! call x%all(size(y),info)
- ! end if
- ! x%v(:) = y(:)
- ! call x%set_host()
- ! end subroutine l_oacc_set_vect
-
- subroutine l_oacc_to_dev(v)
- implicit none
- integer(psb_lpk_) :: v(:)
- !$acc update device(v)
- end subroutine l_oacc_to_dev
-
- subroutine l_oacc_to_host(v)
- implicit none
- integer(psb_lpk_) :: v(:)
- !$acc update self(v)
- end subroutine l_oacc_to_host
subroutine l_oacc_sync_space(x)
implicit none
class(psb_l_vect_oacc), intent(inout) :: x
- if (allocated(x%v)) then
- if (.not.acc_is_present(x%v)) call l_oacc_create_dev(x%v)
- end if
- contains
- subroutine l_oacc_create_dev(v)
- implicit none
- integer(psb_lpk_) :: v(:)
- !$acc enter data copyin(v)
- end subroutine l_oacc_create_dev
+ if (allocated(x%v)) call acc_create(x%v)
end subroutine l_oacc_sync_space
subroutine l_oacc_sync(x)
implicit none
class(psb_l_vect_oacc), intent(inout) :: x
if (x%is_dev()) then
- call l_oacc_to_host(x%v)
+ call acc_update_self(x%v)
end if
if (x%is_host()) then
- call l_oacc_to_dev(x%v)
+ call acc_update_device(x%v)
end if
call x%set_sync()
end subroutine l_oacc_sync
@@ -478,33 +440,36 @@ contains
integer(psb_ipk_), intent(out) :: info
call psb_realloc(n, x%v, info)
- if (info == 0) then
- call x%set_host()
- if (acc_is_present(x%v)) then
- !$acc exit data delete(x%v) finalize
- end if
- !$acc enter data create(x%v)
- call x%sync_space()
- end if
if (info /= 0) then
info = psb_err_alloc_request_
call psb_errpush(info, 'l_oacc_all', &
i_err=(/n, n, n, n, n/))
end if
+ call x%set_host()
+ call x%sync_space()
end subroutine l_oacc_vect_all
+ subroutine l_oacc_final_vect_free(x)
+ implicit none
+ type(psb_l_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_) :: info
+ info = 0
+ if (allocated(x%v)) then
+ if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
+ deallocate(x%v, stat=info)
+ end if
+
+ end subroutine l_oacc_final_vect_free
+
subroutine l_oacc_vect_free(x, info)
implicit none
class(psb_l_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
if (allocated(x%v)) then
- if (acc_is_present(x%v)) then
- !$acc exit data delete(x%v) finalize
- end if
+ if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
deallocate(x%v, stat=info)
end if
-
end subroutine l_oacc_vect_free
function l_oacc_get_size(x) result(res)
diff --git a/openacc/psb_s_oacc_csr_mat_mod.F90 b/openacc/psb_s_oacc_csr_mat_mod.F90
index 89b10d08..b577daac 100644
--- a/openacc/psb_s_oacc_csr_mat_mod.F90
+++ b/openacc/psb_s_oacc_csr_mat_mod.F90
@@ -1,6 +1,7 @@
module psb_s_oacc_csr_mat_mod
use iso_c_binding
+ use openacc
use psb_s_mat_mod
use psb_s_oacc_vect_mod
!use oaccsparse_mod
@@ -35,6 +36,7 @@ module psb_s_oacc_csr_mat_mod
procedure, pass(a) :: set_host => s_oacc_csr_set_host
procedure, pass(a) :: set_sync => s_oacc_csr_set_sync
procedure, pass(a) :: set_dev => s_oacc_csr_set_dev
+ procedure, pass(a) :: free_space => s_oacc_csr_free_space
procedure, pass(a) :: sync_space => s_oacc_csr_sync_space
procedure, pass(a) :: sync => s_oacc_csr_sync
end type psb_s_oacc_csr_sparse_mat
@@ -154,22 +156,26 @@ module psb_s_oacc_csr_mat_mod
contains
- subroutine s_oacc_csr_free(a)
+ subroutine s_oacc_csr_free_space(a)
use psb_base_mod
implicit none
class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
- if (allocated(a%val)) then
- !$acc exit data delete(a%val)
- end if
- if (allocated(a%ja)) then
- !$acc exit data delete(a%ja)
- end if
- if (allocated(a%irp)) then
- !$acc exit data delete(a%irp)
- end if
+ if (allocated(a%val)) call acc_delete_finalize(a%val)
+ if (allocated(a%ja)) call acc_delete_finalize(a%ja)
+ if (allocated(a%irp)) call acc_delete_finalize(a%irp)
+
+ return
+ end subroutine s_oacc_csr_free_space
+
+ subroutine s_oacc_csr_free(a)
+ use psb_base_mod
+ implicit none
+ class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_) :: info
+ call a%free_space()
call a%psb_s_csr_sparse_mat%free()
return
@@ -193,7 +199,7 @@ contains
function s_oacc_csr_get_fmt() result(res)
implicit none
character(len=5) :: res
- res = 'CSR_oacc'
+ res = 'CSROA'
end function s_oacc_csr_get_fmt
subroutine s_oacc_csr_all(m, n, nz, a, info)
@@ -202,19 +208,8 @@ contains
class(psb_s_oacc_csr_sparse_mat), intent(out) :: a
integer(psb_ipk_), intent(out) :: info
- info = 0
- if (allocated(a%val)) then
- !$acc exit data delete(a%val) finalize
- deallocate(a%val, stat=info)
- end if
- if (allocated(a%ja)) then
- !$acc exit data delete(a%ja) finalize
- deallocate(a%ja, stat=info)
- end if
- if (allocated(a%irp)) then
- !$acc exit data delete(a%irp) finalize
- deallocate(a%irp, stat=info)
- end if
+ info = 0
+ call a%free()
call a%set_nrows(m)
call a%set_ncols(n)
@@ -274,26 +269,9 @@ contains
subroutine s_oacc_csr_sync_space(a)
implicit none
class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
- if (allocated(a%val)) then
- call s_oacc_create_dev(a%val)
- end if
- if (allocated(a%ja)) then
- call i_oacc_create_dev(a%ja)
- end if
- if (allocated(a%irp)) then
- call i_oacc_create_dev(a%irp)
- end if
- contains
- subroutine s_oacc_create_dev(v)
- implicit none
- real(psb_spk_), intent(in) :: v(:)
- !$acc enter data copyin(v)
- end subroutine s_oacc_create_dev
- subroutine i_oacc_create_dev(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc enter data copyin(v)
- end subroutine i_oacc_create_dev
+ if (allocated(a%val)) call acc_create(a%val)
+ if (allocated(a%ja)) call acc_create(a%ja)
+ if (allocated(a%irp)) call acc_create(a%irp)
end subroutine s_oacc_csr_sync_space
subroutine s_oacc_csr_sync(a)
@@ -304,40 +282,16 @@ contains
tmpa => a
if (a%is_dev()) then
- call s_oacc_csr_to_host(a%val)
- call i_oacc_csr_to_host(a%ja)
- call i_oacc_csr_to_host(a%irp)
+ call acc_update_self(a%val)
+ call acc_update_self(a%ja)
+ call acc_update_self(a%irp)
else if (a%is_host()) then
- call s_oacc_csr_to_dev(a%val)
- call i_oacc_csr_to_dev(a%ja)
- call i_oacc_csr_to_dev(a%irp)
+ call acc_update_device(a%val)
+ call acc_update_device(a%ja)
+ call acc_update_device(a%irp)
end if
call tmpa%set_sync()
end subroutine s_oacc_csr_sync
- subroutine s_oacc_csr_to_dev(v)
- implicit none
- real(psb_spk_), intent(in) :: v(:)
- !$acc update device(v)
- end subroutine s_oacc_csr_to_dev
-
- subroutine s_oacc_csr_to_host(v)
- implicit none
- real(psb_spk_), intent(in) :: v(:)
- !$acc update self(v)
- end subroutine s_oacc_csr_to_host
-
- subroutine i_oacc_csr_to_dev(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc update device(v)
- end subroutine i_oacc_csr_to_dev
-
- subroutine i_oacc_csr_to_host(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc update self(v)
- end subroutine i_oacc_csr_to_host
-
end module psb_s_oacc_csr_mat_mod
diff --git a/openacc/psb_s_oacc_ell_mat_mod.F90 b/openacc/psb_s_oacc_ell_mat_mod.F90
index 9924ba77..793bf353 100644
--- a/openacc/psb_s_oacc_ell_mat_mod.F90
+++ b/openacc/psb_s_oacc_ell_mat_mod.F90
@@ -1,5 +1,6 @@
module psb_s_oacc_ell_mat_mod
use iso_c_binding
+ use openacc
use psb_s_mat_mod
use psb_s_ell_mat_mod
use psb_s_oacc_vect_mod
@@ -21,6 +22,7 @@ module psb_s_oacc_ell_mat_mod
procedure, pass(a) :: set_dev => s_oacc_ell_set_dev
procedure, pass(a) :: sync_space => s_oacc_ell_sync_space
procedure, pass(a) :: sync => s_oacc_ell_sync
+ procedure, pass(a) :: free_space => s_oacc_ell_free_space
procedure, pass(a) :: free => s_oacc_ell_free
procedure, pass(a) :: vect_mv => psb_s_oacc_ell_vect_mv
procedure, pass(a) :: in_vect_sv => psb_s_oacc_ell_inner_vect_sv
@@ -152,31 +154,32 @@ module psb_s_oacc_ell_mat_mod
contains
- subroutine s_oacc_ell_free(a)
+ subroutine s_oacc_ell_free_space(a)
use psb_base_mod
implicit none
class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
- if (allocated(a%val)) then
- !$acc exit data delete(a%val)
- end if
- if (allocated(a%ja)) then
- !$acc exit data delete(a%ja)
- end if
- if (allocated(a%irn)) then
- !$acc exit data delete(a%irn)
- end if
- if (allocated(a%idiag)) then
- !$acc exit data delete(a%idiag)
- end if
+ if (allocated(a%val)) call acc_delete_finalize(a%val)
+ if (allocated(a%ja)) call acc_delete_finalize(a%ja)
+ if (allocated(a%irn)) call acc_delete_finalize(a%irn)
+ if (allocated(a%idiag)) call acc_delete_finalize(a%idiag)
+
+ return
+ end subroutine s_oacc_ell_free_space
+
+ subroutine s_oacc_ell_free(a)
+ use psb_base_mod
+ implicit none
+ class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_) :: info
+ call a%free_space()
call a%psb_s_ell_sparse_mat%free()
return
end subroutine s_oacc_ell_free
-
function s_oacc_ell_sizeof(a) result(res)
implicit none
class(psb_s_oacc_ell_sparse_mat), intent(in) :: a
@@ -196,41 +199,12 @@ contains
implicit none
class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
- if (allocated(a%val)) then
- call s_oacc_create_dev(a%val)
- end if
- if (allocated(a%ja)) then
- call i_oacc_create_dev(a%ja)
- end if
- if (allocated(a%irn)) then
- call i_oacc_create_dev_scalar(a%irn)
- end if
- if (allocated(a%idiag)) then
- call i_oacc_create_dev_scalar(a%idiag)
- end if
-
- contains
- subroutine s_oacc_create_dev(v)
- implicit none
- real(psb_spk_), intent(in) :: v(:,:)
- !$acc enter data copyin(v)
- end subroutine s_oacc_create_dev
-
- subroutine i_oacc_create_dev(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:,:)
- !$acc enter data copyin(v)
- end subroutine i_oacc_create_dev
-
- subroutine i_oacc_create_dev_scalar(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc enter data copyin(v)
- end subroutine i_oacc_create_dev_scalar
+ if (allocated(a%val)) call acc_create(a%val)
+ if (allocated(a%ja)) call acc_create(a%ja)
+ if (allocated(a%irn)) call acc_create(a%irn)
+ if (allocated(a%idiag)) call acc_create(a%idiag)
end subroutine s_oacc_ell_sync_space
-
-
function s_oacc_ell_is_host(a) result(res)
implicit none
class(psb_s_oacc_ell_sparse_mat), intent(in) :: a
@@ -279,7 +253,7 @@ contains
function s_oacc_ell_get_fmt() result(res)
implicit none
character(len=5) :: res
- res = 'ELL_oacc'
+ res = 'ELLOA'
end function s_oacc_ell_get_fmt
subroutine s_oacc_ell_sync(a)
@@ -290,64 +264,17 @@ contains
tmpa => a
if (a%is_dev()) then
- call s_oacc_ell_to_host(a%val)
- call i_oacc_ell_to_host(a%ja)
- call i_oacc_ell_to_host_scalar(a%irn)
- call i_oacc_ell_to_host_scalar(a%idiag)
+ call acc_update_self(a%val)
+ call acc_update_self(a%ja)
+ call acc_update_self(a%irn)
+ call acc_update_self(a%idiag)
else if (a%is_host()) then
- call s_oacc_ell_to_dev(a%val)
- call i_oacc_ell_to_dev(a%ja)
- call i_oacc_ell_to_dev_scalar(a%irn)
- call i_oacc_ell_to_dev_scalar(a%idiag)
+ call acc_update_device(a%val)
+ call acc_update_device(a%ja)
+ call acc_update_device(a%irn)
+ call acc_update_device(a%idiag)
end if
call tmpa%set_sync()
end subroutine s_oacc_ell_sync
- subroutine s_oacc_ell_to_dev_scalar(v)
- implicit none
- real(psb_spk_), intent(in) :: v(:)
- !$acc update device(v)
- end subroutine s_oacc_ell_to_dev_scalar
-
- subroutine s_oacc_ell_to_dev(v)
- implicit none
- real(psb_spk_), intent(in) :: v(:,:)
- !$acc update device(v)
- end subroutine s_oacc_ell_to_dev
-
- subroutine s_oacc_ell_to_host(v)
- implicit none
- real(psb_spk_), intent(in) :: v(:,:)
- !$acc update self(v)
- end subroutine s_oacc_ell_to_host
-
- subroutine s_oacc_ell_to_host_scalar(v)
- implicit none
- real(psb_spk_), intent(in) :: v(:)
- !$acc update self(v)
- end subroutine s_oacc_ell_to_host_scalar
-
- subroutine i_oacc_ell_to_dev(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:,:)
- !$acc update device(v)
- end subroutine i_oacc_ell_to_dev
-
- subroutine i_oacc_ell_to_dev_scalar(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc update device(v)
- end subroutine i_oacc_ell_to_dev_scalar
-
- subroutine i_oacc_ell_to_host(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:,:)
- !$acc update self(v)
- end subroutine i_oacc_ell_to_host
-
- subroutine i_oacc_ell_to_host_scalar(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc update self(v)
- end subroutine i_oacc_ell_to_host_scalar
end module psb_s_oacc_ell_mat_mod
diff --git a/openacc/psb_s_oacc_hll_mat_mod.F90 b/openacc/psb_s_oacc_hll_mat_mod.F90
index bf8949a1..508591e4 100644
--- a/openacc/psb_s_oacc_hll_mat_mod.F90
+++ b/openacc/psb_s_oacc_hll_mat_mod.F90
@@ -1,5 +1,6 @@
module psb_s_oacc_hll_mat_mod
use iso_c_binding
+ use openacc
use psb_s_mat_mod
use psb_s_hll_mat_mod
use psb_s_oacc_vect_mod
@@ -21,6 +22,7 @@ module psb_s_oacc_hll_mat_mod
procedure, pass(a) :: set_dev => s_oacc_hll_set_dev
procedure, pass(a) :: sync_space => s_oacc_hll_sync_space
procedure, pass(a) :: sync => s_oacc_hll_sync
+ procedure, pass(a) :: free_space => s_oacc_hll_free_space
procedure, pass(a) :: free => s_oacc_hll_free
procedure, pass(a) :: vect_mv => psb_s_oacc_hll_vect_mv
procedure, pass(a) :: in_vect_sv => psb_s_oacc_hll_inner_vect_sv
@@ -152,28 +154,28 @@ module psb_s_oacc_hll_mat_mod
contains
- subroutine s_oacc_hll_free(a)
+ subroutine s_oacc_hll_free_space(a)
use psb_base_mod
implicit none
class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
- if (allocated(a%val)) then
- !$acc exit data delete(a%val)
- end if
- if (allocated(a%ja)) then
- !$acc exit data delete(a%ja)
- end if
- if (allocated(a%irn)) then
- !$acc exit data delete(a%irn)
- end if
- if (allocated(a%idiag)) then
- !$acc exit data delete(a%idiag)
- end if
- if (allocated(a%hkoffs)) then
- !$acc exit data delete(a%hkoffs)
- end if
+ if (allocated(a%val)) call acc_delete_finalize(a%val)
+ if (allocated(a%ja)) call acc_delete_finalize(a%ja)
+ if (allocated(a%irn)) call acc_delete_finalize(a%irn)
+ if (allocated(a%idiag)) call acc_delete_finalize(a%idiag)
+ if (allocated(a%hkoffs)) call acc_delete_finalize(a%hkoffs)
+
+ return
+ end subroutine s_oacc_hll_free_space
+
+ subroutine s_oacc_hll_free(a)
+ use psb_base_mod
+ implicit none
+ class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_) :: info
+ call a%free_space()
call a%psb_s_hll_sparse_mat%free()
return
@@ -244,48 +246,18 @@ contains
function s_oacc_hll_get_fmt() result(res)
implicit none
character(len=5) :: res
- res = 'HLL_oacc'
+ res = 'HLLOA'
end function s_oacc_hll_get_fmt
subroutine s_oacc_hll_sync_space(a)
implicit none
class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a
- if (allocated(a%val)) then
- call s_oacc_create_dev(a%val)
- end if
- if (allocated(a%ja)) then
- call i_oacc_create_dev(a%ja)
- end if
- if (allocated(a%irn)) then
- call i_oacc_create_dev_scalar(a%irn)
- end if
- if (allocated(a%idiag)) then
- call i_oacc_create_dev_scalar(a%idiag)
- end if
- if (allocated(a%hkoffs)) then
- call i_oacc_create_dev_scalar(a%hkoffs)
- end if
-
- contains
- subroutine s_oacc_create_dev(v)
- implicit none
- real(psb_spk_), intent(in) :: v(:)
- !$acc enter data copyin(v)
- end subroutine s_oacc_create_dev
-
- subroutine i_oacc_create_dev(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc enter data copyin(v)
- end subroutine i_oacc_create_dev
-
- subroutine i_oacc_create_dev_scalar(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc enter data copyin(v)
- end subroutine i_oacc_create_dev_scalar
-
+ if (allocated(a%val)) call acc_create(a%val)
+ if (allocated(a%ja)) call acc_create(a%ja)
+ if (allocated(a%irn)) call acc_create(a%irn)
+ if (allocated(a%idiag)) call acc_create(a%idiag)
+ if (allocated(a%hkoffs)) call acc_create(a%hkoffs)
end subroutine s_oacc_hll_sync_space
@@ -297,56 +269,19 @@ contains
tmpa => a
if (a%is_dev()) then
- call s_oacc_hll_to_host(a%val)
- call i_oacc_hll_to_host(a%ja)
- call i_oacc_hll_to_host_scalar(a%irn)
- call i_oacc_hll_to_host_scalar(a%idiag)
- call i_oacc_hll_to_host_scalar(a%hkoffs)
+ call acc_update_self(a%val)
+ call acc_update_self(a%ja)
+ call acc_update_self(a%irn)
+ call acc_update_self(a%idiag)
+ call acc_update_self(a%hkoffs)
else if (a%is_host()) then
- call s_oacc_hll_to_dev(a%val)
- call i_oacc_hll_to_dev(a%ja)
- call i_oacc_hll_to_dev_scalar(a%irn)
- call i_oacc_hll_to_dev_scalar(a%idiag)
- call i_oacc_hll_to_dev_scalar(a%hkoffs)
+ call acc_update_device(a%val)
+ call acc_update_device(a%ja)
+ call acc_update_device(a%irn)
+ call acc_update_device(a%idiag)
+ call acc_update_device(a%hkoffs)
end if
call tmpa%set_sync()
end subroutine s_oacc_hll_sync
- subroutine s_oacc_hll_to_host(v)
- implicit none
- real(psb_spk_), intent(in) :: v(:)
- !$acc update self(v)
- end subroutine s_oacc_hll_to_host
-
- subroutine s_oacc_hll_to_dev(v)
- implicit none
- real(psb_spk_), intent(in) :: v(:)
- !$acc update device(v)
- end subroutine s_oacc_hll_to_dev
-
- subroutine i_oacc_hll_to_host(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc update self(v)
- end subroutine i_oacc_hll_to_host
-
- subroutine i_oacc_hll_to_dev(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc update device(v)
- end subroutine i_oacc_hll_to_dev
-
- subroutine i_oacc_hll_to_host_scalar(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc update self(v)
- end subroutine i_oacc_hll_to_host_scalar
-
- subroutine i_oacc_hll_to_dev_scalar(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc update device(v)
- end subroutine i_oacc_hll_to_dev_scalar
-
-
end module psb_s_oacc_hll_mat_mod
diff --git a/openacc/psb_s_oacc_vect_mod.F90 b/openacc/psb_s_oacc_vect_mod.F90
index c3b31af7..7ce4292f 100644
--- a/openacc/psb_s_oacc_vect_mod.F90
+++ b/openacc/psb_s_oacc_vect_mod.F90
@@ -59,7 +59,7 @@ module psb_s_oacc_vect_mod
procedure, pass(x) :: asum => s_oacc_asum
procedure, pass(x) :: absval1 => s_oacc_absval1
procedure, pass(x) :: absval2 => s_oacc_absval2
-
+ final :: s_oacc_final_vect_free
end type psb_s_vect_oacc
interface
@@ -164,21 +164,25 @@ contains
implicit none
class(psb_s_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n
- real(psb_spk_) :: res
- real(psb_spk_) :: mx
+ real(psb_spk_) :: res
integer(psb_ipk_) :: info
if (x%is_host()) call x%sync()
- mx = s_oacc_amax(n,x)
- res = s_inner_oacc_nrm2(n, mx, x%v)
+!!$ write(0,*)'oacc_nrm2'
+ res = s_inner_oacc_nrm2(n, x%v)
contains
- function s_inner_oacc_nrm2(n, mx,x) result(res)
+ function s_inner_oacc_nrm2(n, x) result(res)
integer(psb_ipk_) :: n
real(psb_spk_) :: x(:)
- real(psb_spk_) :: mx, res
- real(psb_spk_) :: sum
+ real(psb_spk_) :: res
+ real(psb_spk_) :: sum, mx
integer(psb_ipk_) :: i
- sum = 0.0
+ mx = szero
+ !$acc parallel loop reduction(max:mx)
+ do i = 1, n
+ if (abs(x(i)) > mx) mx = abs(x(i))
+ end do
+ sum = szero
!$acc parallel loop reduction(+:sum)
do i = 1, n
sum = sum + abs(x(i)/mx)**2
@@ -203,7 +207,7 @@ contains
real(psb_spk_) :: res
real(psb_spk_) :: max_val
integer(psb_ipk_) :: i
- max_val = -huge(0.0)
+ max_val = szero
!$acc parallel loop reduction(max:max_val)
do i = 1, n
if (abs(x(i)) > max_val) max_val = abs(x(i))
@@ -228,7 +232,7 @@ contains
real(psb_spk_) :: x(:)
real(psb_spk_) :: res
integer(psb_ipk_) :: i
- res = 0.0
+ res = szero
!$acc parallel loop reduction(+:res)
do i = 1, n
res = res + abs(x(i))
@@ -271,92 +275,6 @@ contains
call z%set_host()
end subroutine s_oacc_mlt_a_2
-
-!!$ subroutine s_oacc_mlt_v(x, y, info)
-!!$ implicit none
-!!$ class(psb_s_base_vect_type), intent(inout) :: x
-!!$ class(psb_s_vect_oacc), intent(inout) :: y
-!!$ integer(psb_ipk_), intent(out) :: info
-!!$
-!!$ integer(psb_ipk_) :: i, n
-!!$
-!!$ info = 0
-!!$ n = min(x%get_nrows(), y%get_nrows())
-!!$ select type(xx => x)
-!!$ type is (psb_s_base_vect_type)
-!!$ if (y%is_dev()) call y%sync()
-!!$ !$acc parallel loop
-!!$ do i = 1, n
-!!$ y%v(i) = y%v(i) * xx%v(i)
-!!$ end do
-!!$ call y%set_host()
-!!$ class default
-!!$ if (xx%is_dev()) call xx%sync()
-!!$ if (y%is_dev()) call y%sync()
-!!$ !$acc parallel loop
-!!$ do i = 1, n
-!!$ y%v(i) = y%v(i) * xx%v(i)
-!!$ end do
-!!$ call y%set_host()
-!!$ end select
-!!$ end subroutine s_oacc_mlt_v
-!!$
-!!$ subroutine s_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
-!!$ use psi_serial_mod
-!!$ use psb_string_mod
-!!$ implicit none
-!!$ real(psb_spk_), intent(in) :: alpha, beta
-!!$ class(psb_s_base_vect_type), intent(inout) :: x
-!!$ class(psb_s_base_vect_type), intent(inout) :: y
-!!$ class(psb_s_vect_oacc), intent(inout) :: z
-!!$ integer(psb_ipk_), intent(out) :: info
-!!$ character(len=1), intent(in), optional :: conjgx, conjgy
-!!$ integer(psb_ipk_) :: i, n
-!!$ logical :: conjgx_, conjgy_
-!!$
-!!$ conjgx_ = .false.
-!!$ conjgy_ = .false.
-!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C')
-!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C')
-!!$
-!!$ n = min(x%get_nrows(), y%get_nrows(), z%get_nrows())
-!!$
-!!$ info = 0
-!!$ select type(xx => x)
-!!$ class is (psb_s_vect_oacc)
-!!$ select type (yy => y)
-!!$ class is (psb_s_vect_oacc)
-!!$ if (xx%is_host()) call xx%sync()
-!!$ if (yy%is_host()) call yy%sync()
-!!$ if ((beta /= szero) .and. (z%is_host())) call z%sync()
-!!$ !$acc parallel loop
-!!$ do i = 1, n
-!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
-!!$ end do
-!!$ call z%set_dev()
-!!$ class default
-!!$ if (xx%is_dev()) call xx%sync()
-!!$ if (yy%is_dev()) call yy%sync()
-!!$ if ((beta /= szero) .and. (z%is_dev())) call z%sync()
-!!$ !$acc parallel loop
-!!$ do i = 1, n
-!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
-!!$ end do
-!!$ call z%set_host()
-!!$ end select
-!!$ class default
-!!$ if (x%is_dev()) call x%sync()
-!!$ if (y%is_dev()) call y%sync()
-!!$ if ((beta /= szero) .and. (z%is_dev())) call z%sync()
-!!$ !$acc parallel loop
-!!$ do i = 1, n
-!!$ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i)
-!!$ end do
-!!$ call z%set_host()
-!!$ end select
-!!$ end subroutine s_oacc_mlt_v_2
-
-
subroutine s_oacc_axpby_v(m, alpha, x, beta, y, info)
!use psi_serial_mod
implicit none
@@ -414,7 +332,7 @@ contains
integer(psb_ipk_) :: i
if ((beta /= szero) .and. (y%is_dev())) call y%sync()
- !$acc parallel loop
+
do i = 1, m
y%v(i) = alpha * x(i) + beta * y%v(i)
end do
@@ -432,44 +350,44 @@ contains
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nx, ny, nz, i
logical :: gpu_done
- write(0,*)'upd_xyz'
+
info = psb_success_
gpu_done = .false.
select type(xx => x)
class is (psb_s_vect_oacc)
- select type(yy => y)
+ select type(yy => y)
+ class is (psb_s_vect_oacc)
+ select type(zz => z)
class is (psb_s_vect_oacc)
- select type(zz => z)
- class is (psb_s_vect_oacc)
- if ((beta /= szero) .and. yy%is_host()) call yy%sync()
- if ((delta /= szero) .and. zz%is_host()) call zz%sync()
- if (xx%is_host()) call xx%sync()
- nx = size(xx%v)
- ny = size(yy%v)
- nz = size(zz%v)
- if ((nx < m) .or. (ny < m) .or. (nz < m)) then
- info = psb_err_internal_error_
- else
- !$acc parallel loop
- do i = 1, m
- yy%v(i) = alpha * xx%v(i) + beta * yy%v(i)
- zz%v(i) = gamma * yy%v(i) + delta * zz%v(i)
- end do
- end if
- call yy%set_dev()
- call zz%set_dev()
- gpu_done = .true.
- end select
+ if ((beta /= szero) .and. yy%is_host()) call yy%sync()
+ if ((delta /= szero) .and. zz%is_host()) call zz%sync()
+ if (xx%is_host()) call xx%sync()
+ nx = size(xx%v)
+ ny = size(yy%v)
+ nz = size(zz%v)
+ if ((nx < m) .or. (ny < m) .or. (nz < m)) then
+ info = psb_err_internal_error_
+ else
+ !$acc parallel loop
+ do i = 1, m
+ yy%v(i) = alpha * xx%v(i) + beta * yy%v(i)
+ zz%v(i) = gamma * yy%v(i) + delta * zz%v(i)
+ end do
+ end if
+ call yy%set_dev()
+ call zz%set_dev()
+ gpu_done = .true.
end select
+ end select
end select
if (.not. gpu_done) then
- if (x%is_host()) call x%sync()
- if (y%is_host()) call y%sync()
- if (z%is_host()) call z%sync()
- call y%axpby(m, alpha, x, beta, info)
- call z%axpby(m, gamma, y, delta, info)
+ if (x%is_host()) call x%sync()
+ if (y%is_host()) call y%sync()
+ if (z%is_host()) call z%sync()
+ call y%axpby(m, alpha, x, beta, info)
+ call z%axpby(m, gamma, y, delta, info)
end if
end subroutine s_oacc_upd_xyz
@@ -676,7 +594,7 @@ contains
if (x%is_dev()) call x%sync()
call x%psb_s_base_vect_type%ins(n, irl, val, dupl, info)
call x%set_host()
- !$acc update device(x%v)
+
end subroutine s_oacc_ins_a
@@ -687,16 +605,14 @@ contains
class(psb_s_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
+ call x%free(info)
call x%all(n, info)
if (info /= 0) then
call psb_errpush(info, 's_oacc_bld_mn', i_err=(/n, n, n, n, n/))
end if
call x%set_host()
- if (acc_is_present(x%v)) then
- !$acc exit data delete(x%v) finalize
- end if
- !$acc enter data copyin(x%v)
-
+ call x%sync_space()
+
end subroutine s_oacc_bld_mn
@@ -707,6 +623,7 @@ contains
class(psb_s_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
+ call x%free(info)
call psb_realloc(size(this), x%v, info)
if (info /= 0) then
info = psb_err_alloc_request_
@@ -714,13 +631,9 @@ contains
i_err=(/size(this), izero, izero, izero, izero/))
return
end if
-
x%v(:) = this(:)
call x%set_host()
- if (acc_is_present(x%v)) then
- !$acc exit data delete(x%v) finalize
- end if
- !$acc enter data copyin(x%v)
+ call x%sync_space()
end subroutine s_oacc_bld_x
@@ -848,54 +761,21 @@ contains
end function s_oacc_dot_a
- ! subroutine s_oacc_set_vect(x,y)
- ! implicit none
- ! class(psb_s_vect_oacc), intent(inout) :: x
- ! real(psb_spk_), intent(in) :: y(:)
- ! integer(psb_ipk_) :: info
-
- ! if (size(x%v) /= size(y)) then
- ! call x%free(info)
- ! call x%all(size(y),info)
- ! end if
- ! x%v(:) = y(:)
- ! call x%set_host()
- ! end subroutine s_oacc_set_vect
-
- subroutine s_oacc_to_dev(v)
- implicit none
- real(psb_spk_) :: v(:)
- !$acc update device(v)
- end subroutine s_oacc_to_dev
-
- subroutine s_oacc_to_host(v)
- implicit none
- real(psb_spk_) :: v(:)
- !$acc update self(v)
- end subroutine s_oacc_to_host
subroutine s_oacc_sync_space(x)
implicit none
class(psb_s_vect_oacc), intent(inout) :: x
- if (allocated(x%v)) then
- if (.not.acc_is_present(x%v)) call s_oacc_create_dev(x%v)
- end if
- contains
- subroutine s_oacc_create_dev(v)
- implicit none
- real(psb_spk_) :: v(:)
- !$acc enter data copyin(v)
- end subroutine s_oacc_create_dev
+ if (allocated(x%v)) call acc_create(x%v)
end subroutine s_oacc_sync_space
subroutine s_oacc_sync(x)
implicit none
class(psb_s_vect_oacc), intent(inout) :: x
if (x%is_dev()) then
- call s_oacc_to_host(x%v)
+ call acc_update_self(x%v)
end if
if (x%is_host()) then
- call s_oacc_to_dev(x%v)
+ call acc_update_device(x%v)
end if
call x%set_sync()
end subroutine s_oacc_sync
@@ -954,33 +834,36 @@ contains
integer(psb_ipk_), intent(out) :: info
call psb_realloc(n, x%v, info)
- if (info == 0) then
- call x%set_host()
- if (acc_is_present(x%v)) then
- !$acc exit data delete(x%v) finalize
- end if
- !$acc enter data create(x%v)
- call x%sync_space()
- end if
if (info /= 0) then
info = psb_err_alloc_request_
call psb_errpush(info, 's_oacc_all', &
i_err=(/n, n, n, n, n/))
end if
+ call x%set_host()
+ call x%sync_space()
end subroutine s_oacc_vect_all
+ subroutine s_oacc_final_vect_free(x)
+ implicit none
+ type(psb_s_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_) :: info
+ info = 0
+ if (allocated(x%v)) then
+ if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
+ deallocate(x%v, stat=info)
+ end if
+
+ end subroutine s_oacc_final_vect_free
+
subroutine s_oacc_vect_free(x, info)
implicit none
class(psb_s_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
if (allocated(x%v)) then
- if (acc_is_present(x%v)) then
- !$acc exit data delete(x%v) finalize
- end if
+ if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
deallocate(x%v, stat=info)
end if
-
end subroutine s_oacc_vect_free
function s_oacc_get_size(x) result(res)
diff --git a/openacc/psb_z_oacc_csr_mat_mod.F90 b/openacc/psb_z_oacc_csr_mat_mod.F90
index 7842d96c..dbafb391 100644
--- a/openacc/psb_z_oacc_csr_mat_mod.F90
+++ b/openacc/psb_z_oacc_csr_mat_mod.F90
@@ -1,6 +1,7 @@
module psb_z_oacc_csr_mat_mod
use iso_c_binding
+ use openacc
use psb_z_mat_mod
use psb_z_oacc_vect_mod
!use oaccsparse_mod
@@ -35,6 +36,7 @@ module psb_z_oacc_csr_mat_mod
procedure, pass(a) :: set_host => z_oacc_csr_set_host
procedure, pass(a) :: set_sync => z_oacc_csr_set_sync
procedure, pass(a) :: set_dev => z_oacc_csr_set_dev
+ procedure, pass(a) :: free_space => z_oacc_csr_free_space
procedure, pass(a) :: sync_space => z_oacc_csr_sync_space
procedure, pass(a) :: sync => z_oacc_csr_sync
end type psb_z_oacc_csr_sparse_mat
@@ -154,22 +156,26 @@ module psb_z_oacc_csr_mat_mod
contains
- subroutine z_oacc_csr_free(a)
+ subroutine z_oacc_csr_free_space(a)
use psb_base_mod
implicit none
class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
- if (allocated(a%val)) then
- !$acc exit data delete(a%val)
- end if
- if (allocated(a%ja)) then
- !$acc exit data delete(a%ja)
- end if
- if (allocated(a%irp)) then
- !$acc exit data delete(a%irp)
- end if
+ if (allocated(a%val)) call acc_delete_finalize(a%val)
+ if (allocated(a%ja)) call acc_delete_finalize(a%ja)
+ if (allocated(a%irp)) call acc_delete_finalize(a%irp)
+
+ return
+ end subroutine z_oacc_csr_free_space
+
+ subroutine z_oacc_csr_free(a)
+ use psb_base_mod
+ implicit none
+ class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_) :: info
+ call a%free_space()
call a%psb_z_csr_sparse_mat%free()
return
@@ -193,7 +199,7 @@ contains
function z_oacc_csr_get_fmt() result(res)
implicit none
character(len=5) :: res
- res = 'CSR_oacc'
+ res = 'CSROA'
end function z_oacc_csr_get_fmt
subroutine z_oacc_csr_all(m, n, nz, a, info)
@@ -202,19 +208,8 @@ contains
class(psb_z_oacc_csr_sparse_mat), intent(out) :: a
integer(psb_ipk_), intent(out) :: info
- info = 0
- if (allocated(a%val)) then
- !$acc exit data delete(a%val) finalize
- deallocate(a%val, stat=info)
- end if
- if (allocated(a%ja)) then
- !$acc exit data delete(a%ja) finalize
- deallocate(a%ja, stat=info)
- end if
- if (allocated(a%irp)) then
- !$acc exit data delete(a%irp) finalize
- deallocate(a%irp, stat=info)
- end if
+ info = 0
+ call a%free()
call a%set_nrows(m)
call a%set_ncols(n)
@@ -274,26 +269,9 @@ contains
subroutine z_oacc_csr_sync_space(a)
implicit none
class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a
- if (allocated(a%val)) then
- call z_oacc_create_dev(a%val)
- end if
- if (allocated(a%ja)) then
- call i_oacc_create_dev(a%ja)
- end if
- if (allocated(a%irp)) then
- call i_oacc_create_dev(a%irp)
- end if
- contains
- subroutine z_oacc_create_dev(v)
- implicit none
- complex(psb_dpk_), intent(in) :: v(:)
- !$acc enter data copyin(v)
- end subroutine z_oacc_create_dev
- subroutine i_oacc_create_dev(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc enter data copyin(v)
- end subroutine i_oacc_create_dev
+ if (allocated(a%val)) call acc_create(a%val)
+ if (allocated(a%ja)) call acc_create(a%ja)
+ if (allocated(a%irp)) call acc_create(a%irp)
end subroutine z_oacc_csr_sync_space
subroutine z_oacc_csr_sync(a)
@@ -304,40 +282,16 @@ contains
tmpa => a
if (a%is_dev()) then
- call z_oacc_csr_to_host(a%val)
- call i_oacc_csr_to_host(a%ja)
- call i_oacc_csr_to_host(a%irp)
+ call acc_update_self(a%val)
+ call acc_update_self(a%ja)
+ call acc_update_self(a%irp)
else if (a%is_host()) then
- call z_oacc_csr_to_dev(a%val)
- call i_oacc_csr_to_dev(a%ja)
- call i_oacc_csr_to_dev(a%irp)
+ call acc_update_device(a%val)
+ call acc_update_device(a%ja)
+ call acc_update_device(a%irp)
end if
call tmpa%set_sync()
end subroutine z_oacc_csr_sync
- subroutine z_oacc_csr_to_dev(v)
- implicit none
- complex(psb_dpk_), intent(in) :: v(:)
- !$acc update device(v)
- end subroutine z_oacc_csr_to_dev
-
- subroutine z_oacc_csr_to_host(v)
- implicit none
- complex(psb_dpk_), intent(in) :: v(:)
- !$acc update self(v)
- end subroutine z_oacc_csr_to_host
-
- subroutine i_oacc_csr_to_dev(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc update device(v)
- end subroutine i_oacc_csr_to_dev
-
- subroutine i_oacc_csr_to_host(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc update self(v)
- end subroutine i_oacc_csr_to_host
-
end module psb_z_oacc_csr_mat_mod
diff --git a/openacc/psb_z_oacc_ell_mat_mod.F90 b/openacc/psb_z_oacc_ell_mat_mod.F90
index eb8884d6..76b0182d 100644
--- a/openacc/psb_z_oacc_ell_mat_mod.F90
+++ b/openacc/psb_z_oacc_ell_mat_mod.F90
@@ -1,5 +1,6 @@
module psb_z_oacc_ell_mat_mod
use iso_c_binding
+ use openacc
use psb_z_mat_mod
use psb_z_ell_mat_mod
use psb_z_oacc_vect_mod
@@ -21,6 +22,7 @@ module psb_z_oacc_ell_mat_mod
procedure, pass(a) :: set_dev => z_oacc_ell_set_dev
procedure, pass(a) :: sync_space => z_oacc_ell_sync_space
procedure, pass(a) :: sync => z_oacc_ell_sync
+ procedure, pass(a) :: free_space => z_oacc_ell_free_space
procedure, pass(a) :: free => z_oacc_ell_free
procedure, pass(a) :: vect_mv => psb_z_oacc_ell_vect_mv
procedure, pass(a) :: in_vect_sv => psb_z_oacc_ell_inner_vect_sv
@@ -152,31 +154,32 @@ module psb_z_oacc_ell_mat_mod
contains
- subroutine z_oacc_ell_free(a)
+ subroutine z_oacc_ell_free_space(a)
use psb_base_mod
implicit none
class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
- if (allocated(a%val)) then
- !$acc exit data delete(a%val)
- end if
- if (allocated(a%ja)) then
- !$acc exit data delete(a%ja)
- end if
- if (allocated(a%irn)) then
- !$acc exit data delete(a%irn)
- end if
- if (allocated(a%idiag)) then
- !$acc exit data delete(a%idiag)
- end if
+ if (allocated(a%val)) call acc_delete_finalize(a%val)
+ if (allocated(a%ja)) call acc_delete_finalize(a%ja)
+ if (allocated(a%irn)) call acc_delete_finalize(a%irn)
+ if (allocated(a%idiag)) call acc_delete_finalize(a%idiag)
+
+ return
+ end subroutine z_oacc_ell_free_space
+
+ subroutine z_oacc_ell_free(a)
+ use psb_base_mod
+ implicit none
+ class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_) :: info
+ call a%free_space()
call a%psb_z_ell_sparse_mat%free()
return
end subroutine z_oacc_ell_free
-
function z_oacc_ell_sizeof(a) result(res)
implicit none
class(psb_z_oacc_ell_sparse_mat), intent(in) :: a
@@ -196,41 +199,12 @@ contains
implicit none
class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a
- if (allocated(a%val)) then
- call z_oacc_create_dev(a%val)
- end if
- if (allocated(a%ja)) then
- call i_oacc_create_dev(a%ja)
- end if
- if (allocated(a%irn)) then
- call i_oacc_create_dev_scalar(a%irn)
- end if
- if (allocated(a%idiag)) then
- call i_oacc_create_dev_scalar(a%idiag)
- end if
-
- contains
- subroutine z_oacc_create_dev(v)
- implicit none
- complex(psb_dpk_), intent(in) :: v(:,:)
- !$acc enter data copyin(v)
- end subroutine z_oacc_create_dev
-
- subroutine i_oacc_create_dev(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:,:)
- !$acc enter data copyin(v)
- end subroutine i_oacc_create_dev
-
- subroutine i_oacc_create_dev_scalar(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc enter data copyin(v)
- end subroutine i_oacc_create_dev_scalar
+ if (allocated(a%val)) call acc_create(a%val)
+ if (allocated(a%ja)) call acc_create(a%ja)
+ if (allocated(a%irn)) call acc_create(a%irn)
+ if (allocated(a%idiag)) call acc_create(a%idiag)
end subroutine z_oacc_ell_sync_space
-
-
function z_oacc_ell_is_host(a) result(res)
implicit none
class(psb_z_oacc_ell_sparse_mat), intent(in) :: a
@@ -279,7 +253,7 @@ contains
function z_oacc_ell_get_fmt() result(res)
implicit none
character(len=5) :: res
- res = 'ELL_oacc'
+ res = 'ELLOA'
end function z_oacc_ell_get_fmt
subroutine z_oacc_ell_sync(a)
@@ -290,64 +264,17 @@ contains
tmpa => a
if (a%is_dev()) then
- call z_oacc_ell_to_host(a%val)
- call i_oacc_ell_to_host(a%ja)
- call i_oacc_ell_to_host_scalar(a%irn)
- call i_oacc_ell_to_host_scalar(a%idiag)
+ call acc_update_self(a%val)
+ call acc_update_self(a%ja)
+ call acc_update_self(a%irn)
+ call acc_update_self(a%idiag)
else if (a%is_host()) then
- call z_oacc_ell_to_dev(a%val)
- call i_oacc_ell_to_dev(a%ja)
- call i_oacc_ell_to_dev_scalar(a%irn)
- call i_oacc_ell_to_dev_scalar(a%idiag)
+ call acc_update_device(a%val)
+ call acc_update_device(a%ja)
+ call acc_update_device(a%irn)
+ call acc_update_device(a%idiag)
end if
call tmpa%set_sync()
end subroutine z_oacc_ell_sync
- subroutine z_oacc_ell_to_dev_scalar(v)
- implicit none
- complex(psb_dpk_), intent(in) :: v(:)
- !$acc update device(v)
- end subroutine z_oacc_ell_to_dev_scalar
-
- subroutine z_oacc_ell_to_dev(v)
- implicit none
- complex(psb_dpk_), intent(in) :: v(:,:)
- !$acc update device(v)
- end subroutine z_oacc_ell_to_dev
-
- subroutine z_oacc_ell_to_host(v)
- implicit none
- complex(psb_dpk_), intent(in) :: v(:,:)
- !$acc update self(v)
- end subroutine z_oacc_ell_to_host
-
- subroutine z_oacc_ell_to_host_scalar(v)
- implicit none
- complex(psb_dpk_), intent(in) :: v(:)
- !$acc update self(v)
- end subroutine z_oacc_ell_to_host_scalar
-
- subroutine i_oacc_ell_to_dev(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:,:)
- !$acc update device(v)
- end subroutine i_oacc_ell_to_dev
-
- subroutine i_oacc_ell_to_dev_scalar(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc update device(v)
- end subroutine i_oacc_ell_to_dev_scalar
-
- subroutine i_oacc_ell_to_host(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:,:)
- !$acc update self(v)
- end subroutine i_oacc_ell_to_host
-
- subroutine i_oacc_ell_to_host_scalar(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc update self(v)
- end subroutine i_oacc_ell_to_host_scalar
end module psb_z_oacc_ell_mat_mod
diff --git a/openacc/psb_z_oacc_hll_mat_mod.F90 b/openacc/psb_z_oacc_hll_mat_mod.F90
index e6a4929a..4a657fd1 100644
--- a/openacc/psb_z_oacc_hll_mat_mod.F90
+++ b/openacc/psb_z_oacc_hll_mat_mod.F90
@@ -1,5 +1,6 @@
module psb_z_oacc_hll_mat_mod
use iso_c_binding
+ use openacc
use psb_z_mat_mod
use psb_z_hll_mat_mod
use psb_z_oacc_vect_mod
@@ -21,6 +22,7 @@ module psb_z_oacc_hll_mat_mod
procedure, pass(a) :: set_dev => z_oacc_hll_set_dev
procedure, pass(a) :: sync_space => z_oacc_hll_sync_space
procedure, pass(a) :: sync => z_oacc_hll_sync
+ procedure, pass(a) :: free_space => z_oacc_hll_free_space
procedure, pass(a) :: free => z_oacc_hll_free
procedure, pass(a) :: vect_mv => psb_z_oacc_hll_vect_mv
procedure, pass(a) :: in_vect_sv => psb_z_oacc_hll_inner_vect_sv
@@ -152,28 +154,28 @@ module psb_z_oacc_hll_mat_mod
contains
- subroutine z_oacc_hll_free(a)
+ subroutine z_oacc_hll_free_space(a)
use psb_base_mod
implicit none
class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
- if (allocated(a%val)) then
- !$acc exit data delete(a%val)
- end if
- if (allocated(a%ja)) then
- !$acc exit data delete(a%ja)
- end if
- if (allocated(a%irn)) then
- !$acc exit data delete(a%irn)
- end if
- if (allocated(a%idiag)) then
- !$acc exit data delete(a%idiag)
- end if
- if (allocated(a%hkoffs)) then
- !$acc exit data delete(a%hkoffs)
- end if
+ if (allocated(a%val)) call acc_delete_finalize(a%val)
+ if (allocated(a%ja)) call acc_delete_finalize(a%ja)
+ if (allocated(a%irn)) call acc_delete_finalize(a%irn)
+ if (allocated(a%idiag)) call acc_delete_finalize(a%idiag)
+ if (allocated(a%hkoffs)) call acc_delete_finalize(a%hkoffs)
+
+ return
+ end subroutine z_oacc_hll_free_space
+
+ subroutine z_oacc_hll_free(a)
+ use psb_base_mod
+ implicit none
+ class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a
+ integer(psb_ipk_) :: info
+ call a%free_space()
call a%psb_z_hll_sparse_mat%free()
return
@@ -244,48 +246,18 @@ contains
function z_oacc_hll_get_fmt() result(res)
implicit none
character(len=5) :: res
- res = 'HLL_oacc'
+ res = 'HLLOA'
end function z_oacc_hll_get_fmt
subroutine z_oacc_hll_sync_space(a)
implicit none
class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a
- if (allocated(a%val)) then
- call z_oacc_create_dev(a%val)
- end if
- if (allocated(a%ja)) then
- call i_oacc_create_dev(a%ja)
- end if
- if (allocated(a%irn)) then
- call i_oacc_create_dev_scalar(a%irn)
- end if
- if (allocated(a%idiag)) then
- call i_oacc_create_dev_scalar(a%idiag)
- end if
- if (allocated(a%hkoffs)) then
- call i_oacc_create_dev_scalar(a%hkoffs)
- end if
-
- contains
- subroutine z_oacc_create_dev(v)
- implicit none
- complex(psb_dpk_), intent(in) :: v(:)
- !$acc enter data copyin(v)
- end subroutine z_oacc_create_dev
-
- subroutine i_oacc_create_dev(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc enter data copyin(v)
- end subroutine i_oacc_create_dev
-
- subroutine i_oacc_create_dev_scalar(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc enter data copyin(v)
- end subroutine i_oacc_create_dev_scalar
-
+ if (allocated(a%val)) call acc_create(a%val)
+ if (allocated(a%ja)) call acc_create(a%ja)
+ if (allocated(a%irn)) call acc_create(a%irn)
+ if (allocated(a%idiag)) call acc_create(a%idiag)
+ if (allocated(a%hkoffs)) call acc_create(a%hkoffs)
end subroutine z_oacc_hll_sync_space
@@ -297,56 +269,19 @@ contains
tmpa => a
if (a%is_dev()) then
- call z_oacc_hll_to_host(a%val)
- call i_oacc_hll_to_host(a%ja)
- call i_oacc_hll_to_host_scalar(a%irn)
- call i_oacc_hll_to_host_scalar(a%idiag)
- call i_oacc_hll_to_host_scalar(a%hkoffs)
+ call acc_update_self(a%val)
+ call acc_update_self(a%ja)
+ call acc_update_self(a%irn)
+ call acc_update_self(a%idiag)
+ call acc_update_self(a%hkoffs)
else if (a%is_host()) then
- call z_oacc_hll_to_dev(a%val)
- call i_oacc_hll_to_dev(a%ja)
- call i_oacc_hll_to_dev_scalar(a%irn)
- call i_oacc_hll_to_dev_scalar(a%idiag)
- call i_oacc_hll_to_dev_scalar(a%hkoffs)
+ call acc_update_device(a%val)
+ call acc_update_device(a%ja)
+ call acc_update_device(a%irn)
+ call acc_update_device(a%idiag)
+ call acc_update_device(a%hkoffs)
end if
call tmpa%set_sync()
end subroutine z_oacc_hll_sync
- subroutine z_oacc_hll_to_host(v)
- implicit none
- complex(psb_dpk_), intent(in) :: v(:)
- !$acc update self(v)
- end subroutine z_oacc_hll_to_host
-
- subroutine z_oacc_hll_to_dev(v)
- implicit none
- complex(psb_dpk_), intent(in) :: v(:)
- !$acc update device(v)
- end subroutine z_oacc_hll_to_dev
-
- subroutine i_oacc_hll_to_host(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc update self(v)
- end subroutine i_oacc_hll_to_host
-
- subroutine i_oacc_hll_to_dev(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc update device(v)
- end subroutine i_oacc_hll_to_dev
-
- subroutine i_oacc_hll_to_host_scalar(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc update self(v)
- end subroutine i_oacc_hll_to_host_scalar
-
- subroutine i_oacc_hll_to_dev_scalar(v)
- implicit none
- integer(psb_ipk_), intent(in) :: v(:)
- !$acc update device(v)
- end subroutine i_oacc_hll_to_dev_scalar
-
-
end module psb_z_oacc_hll_mat_mod
diff --git a/openacc/psb_z_oacc_vect_mod.F90 b/openacc/psb_z_oacc_vect_mod.F90
index bab1a0a0..5d6f07be 100644
--- a/openacc/psb_z_oacc_vect_mod.F90
+++ b/openacc/psb_z_oacc_vect_mod.F90
@@ -59,7 +59,7 @@ module psb_z_oacc_vect_mod
procedure, pass(x) :: asum => z_oacc_asum
procedure, pass(x) :: absval1 => z_oacc_absval1
procedure, pass(x) :: absval2 => z_oacc_absval2
-
+ final :: z_oacc_final_vect_free
end type psb_z_vect_oacc
interface
@@ -164,21 +164,25 @@ contains
implicit none
class(psb_z_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n
- real(psb_dpk_) :: res
- real(psb_dpk_) :: mx
+ real(psb_dpk_) :: res
integer(psb_ipk_) :: info
if (x%is_host()) call x%sync()
- mx = z_oacc_amax(n,x)
- res = z_inner_oacc_nrm2(n, mx, x%v)
+!!$ write(0,*)'oacc_nrm2'
+ res = z_inner_oacc_nrm2(n, x%v)
contains
- function z_inner_oacc_nrm2(n, mx,x) result(res)
+ function z_inner_oacc_nrm2(n, x) result(res)
integer(psb_ipk_) :: n
complex(psb_dpk_) :: x(:)
- real(psb_dpk_) :: mx, res
- real(psb_dpk_) :: sum
+ real(psb_dpk_) :: res
+ real(psb_dpk_) :: sum, mx
integer(psb_ipk_) :: i
- sum = 0.0
+ mx = dzero
+ !$acc parallel loop reduction(max:mx)
+ do i = 1, n
+ if (abs(x(i)) > mx) mx = abs(x(i))
+ end do
+ sum = dzero
!$acc parallel loop reduction(+:sum)
do i = 1, n
sum = sum + abs(x(i)/mx)**2
@@ -203,7 +207,7 @@ contains
real(psb_dpk_) :: res
real(psb_dpk_) :: max_val
integer(psb_ipk_) :: i
- max_val = -huge(0.0)
+ max_val = dzero
!$acc parallel loop reduction(max:max_val)
do i = 1, n
if (abs(x(i)) > max_val) max_val = abs(x(i))
@@ -228,7 +232,7 @@ contains
complex(psb_dpk_) :: x(:)
real(psb_dpk_) :: res
integer(psb_ipk_) :: i
- res = 0.0
+ res = dzero
!$acc parallel loop reduction(+:res)
do i = 1, n
res = res + abs(x(i))
@@ -271,92 +275,6 @@ contains
call z%set_host()
end subroutine z_oacc_mlt_a_2
-
-!!$ subroutine z_oacc_mlt_v(x, y, info)
-!!$ implicit none
-!!$ class(psb_z_base_vect_type), intent(inout) :: x
-!!$ class(psb_z_vect_oacc), intent(inout) :: y
-!!$ integer(psb_ipk_), intent(out) :: info
-!!$
-!!$ integer(psb_ipk_) :: i, n
-!!$
-!!$ info = 0
-!!$ n = min(x%get_nrows(), y%get_nrows())
-!!$ select type(xx => x)
-!!$ type is (psb_z_base_vect_type)
-!!$ if (y%is_dev()) call y%sync()
-!!$ !$acc parallel loop
-!!$ do i = 1, n
-!!$ y%v(i) = y%v(i) * xx%v(i)
-!!$ end do
-!!$ call y%set_host()
-!!$ class default
-!!$ if (xx%is_dev()) call xx%sync()
-!!$ if (y%is_dev()) call y%sync()
-!!$ !$acc parallel loop
-!!$ do i = 1, n
-!!$ y%v(i) = y%v(i) * xx%v(i)
-!!$ end do
-!!$ call y%set_host()
-!!$ end select
-!!$ end subroutine z_oacc_mlt_v
-!!$
-!!$ subroutine z_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
-!!$ use psi_serial_mod
-!!$ use psb_string_mod
-!!$ implicit none
-!!$ complex(psb_dpk_), intent(in) :: alpha, beta
-!!$ class(psb_z_base_vect_type), intent(inout) :: x
-!!$ class(psb_z_base_vect_type), intent(inout) :: y
-!!$ class(psb_z_vect_oacc), intent(inout) :: z
-!!$ integer(psb_ipk_), intent(out) :: info
-!!$ character(len=1), intent(in), optional :: conjgx, conjgy
-!!$ integer(psb_ipk_) :: i, n
-!!$ logical :: conjgx_, conjgy_
-!!$
-!!$ conjgx_ = .false.
-!!$ conjgy_ = .false.
-!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C')
-!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C')
-!!$
-!!$ n = min(x%get_nrows(), y%get_nrows(), z%get_nrows())
-!!$
-!!$ info = 0
-!!$ select type(xx => x)
-!!$ class is (psb_z_vect_oacc)
-!!$ select type (yy => y)
-!!$ class is (psb_z_vect_oacc)
-!!$ if (xx%is_host()) call xx%sync()
-!!$ if (yy%is_host()) call yy%sync()
-!!$ if ((beta /= zzero) .and. (z%is_host())) call z%sync()
-!!$ !$acc parallel loop
-!!$ do i = 1, n
-!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
-!!$ end do
-!!$ call z%set_dev()
-!!$ class default
-!!$ if (xx%is_dev()) call xx%sync()
-!!$ if (yy%is_dev()) call yy%sync()
-!!$ if ((beta /= zzero) .and. (z%is_dev())) call z%sync()
-!!$ !$acc parallel loop
-!!$ do i = 1, n
-!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
-!!$ end do
-!!$ call z%set_host()
-!!$ end select
-!!$ class default
-!!$ if (x%is_dev()) call x%sync()
-!!$ if (y%is_dev()) call y%sync()
-!!$ if ((beta /= zzero) .and. (z%is_dev())) call z%sync()
-!!$ !$acc parallel loop
-!!$ do i = 1, n
-!!$ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i)
-!!$ end do
-!!$ call z%set_host()
-!!$ end select
-!!$ end subroutine z_oacc_mlt_v_2
-
-
subroutine z_oacc_axpby_v(m, alpha, x, beta, y, info)
!use psi_serial_mod
implicit none
@@ -414,7 +332,7 @@ contains
integer(psb_ipk_) :: i
if ((beta /= zzero) .and. (y%is_dev())) call y%sync()
- !$acc parallel loop
+
do i = 1, m
y%v(i) = alpha * x(i) + beta * y%v(i)
end do
@@ -432,44 +350,44 @@ contains
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: nx, ny, nz, i
logical :: gpu_done
- write(0,*)'upd_xyz'
+
info = psb_success_
gpu_done = .false.
select type(xx => x)
class is (psb_z_vect_oacc)
- select type(yy => y)
+ select type(yy => y)
+ class is (psb_z_vect_oacc)
+ select type(zz => z)
class is (psb_z_vect_oacc)
- select type(zz => z)
- class is (psb_z_vect_oacc)
- if ((beta /= zzero) .and. yy%is_host()) call yy%sync()
- if ((delta /= zzero) .and. zz%is_host()) call zz%sync()
- if (xx%is_host()) call xx%sync()
- nx = size(xx%v)
- ny = size(yy%v)
- nz = size(zz%v)
- if ((nx < m) .or. (ny < m) .or. (nz < m)) then
- info = psb_err_internal_error_
- else
- !$acc parallel loop
- do i = 1, m
- yy%v(i) = alpha * xx%v(i) + beta * yy%v(i)
- zz%v(i) = gamma * yy%v(i) + delta * zz%v(i)
- end do
- end if
- call yy%set_dev()
- call zz%set_dev()
- gpu_done = .true.
- end select
+ if ((beta /= zzero) .and. yy%is_host()) call yy%sync()
+ if ((delta /= zzero) .and. zz%is_host()) call zz%sync()
+ if (xx%is_host()) call xx%sync()
+ nx = size(xx%v)
+ ny = size(yy%v)
+ nz = size(zz%v)
+ if ((nx < m) .or. (ny < m) .or. (nz < m)) then
+ info = psb_err_internal_error_
+ else
+ !$acc parallel loop
+ do i = 1, m
+ yy%v(i) = alpha * xx%v(i) + beta * yy%v(i)
+ zz%v(i) = gamma * yy%v(i) + delta * zz%v(i)
+ end do
+ end if
+ call yy%set_dev()
+ call zz%set_dev()
+ gpu_done = .true.
end select
+ end select
end select
if (.not. gpu_done) then
- if (x%is_host()) call x%sync()
- if (y%is_host()) call y%sync()
- if (z%is_host()) call z%sync()
- call y%axpby(m, alpha, x, beta, info)
- call z%axpby(m, gamma, y, delta, info)
+ if (x%is_host()) call x%sync()
+ if (y%is_host()) call y%sync()
+ if (z%is_host()) call z%sync()
+ call y%axpby(m, alpha, x, beta, info)
+ call z%axpby(m, gamma, y, delta, info)
end if
end subroutine z_oacc_upd_xyz
@@ -676,7 +594,7 @@ contains
if (x%is_dev()) call x%sync()
call x%psb_z_base_vect_type%ins(n, irl, val, dupl, info)
call x%set_host()
- !$acc update device(x%v)
+
end subroutine z_oacc_ins_a
@@ -687,16 +605,14 @@ contains
class(psb_z_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
+ call x%free(info)
call x%all(n, info)
if (info /= 0) then
call psb_errpush(info, 'z_oacc_bld_mn', i_err=(/n, n, n, n, n/))
end if
call x%set_host()
- if (acc_is_present(x%v)) then
- !$acc exit data delete(x%v) finalize
- end if
- !$acc enter data copyin(x%v)
-
+ call x%sync_space()
+
end subroutine z_oacc_bld_mn
@@ -707,6 +623,7 @@ contains
class(psb_z_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
+ call x%free(info)
call psb_realloc(size(this), x%v, info)
if (info /= 0) then
info = psb_err_alloc_request_
@@ -714,13 +631,9 @@ contains
i_err=(/size(this), izero, izero, izero, izero/))
return
end if
-
x%v(:) = this(:)
call x%set_host()
- if (acc_is_present(x%v)) then
- !$acc exit data delete(x%v) finalize
- end if
- !$acc enter data copyin(x%v)
+ call x%sync_space()
end subroutine z_oacc_bld_x
@@ -848,54 +761,21 @@ contains
end function z_oacc_dot_a
- ! subroutine z_oacc_set_vect(x,y)
- ! implicit none
- ! class(psb_z_vect_oacc), intent(inout) :: x
- ! complex(psb_dpk_), intent(in) :: y(:)
- ! integer(psb_ipk_) :: info
-
- ! if (size(x%v) /= size(y)) then
- ! call x%free(info)
- ! call x%all(size(y),info)
- ! end if
- ! x%v(:) = y(:)
- ! call x%set_host()
- ! end subroutine z_oacc_set_vect
-
- subroutine z_oacc_to_dev(v)
- implicit none
- complex(psb_dpk_) :: v(:)
- !$acc update device(v)
- end subroutine z_oacc_to_dev
-
- subroutine z_oacc_to_host(v)
- implicit none
- complex(psb_dpk_) :: v(:)
- !$acc update self(v)
- end subroutine z_oacc_to_host
subroutine z_oacc_sync_space(x)
implicit none
class(psb_z_vect_oacc), intent(inout) :: x
- if (allocated(x%v)) then
- if (.not.acc_is_present(x%v)) call z_oacc_create_dev(x%v)
- end if
- contains
- subroutine z_oacc_create_dev(v)
- implicit none
- complex(psb_dpk_) :: v(:)
- !$acc enter data copyin(v)
- end subroutine z_oacc_create_dev
+ if (allocated(x%v)) call acc_create(x%v)
end subroutine z_oacc_sync_space
subroutine z_oacc_sync(x)
implicit none
class(psb_z_vect_oacc), intent(inout) :: x
if (x%is_dev()) then
- call z_oacc_to_host(x%v)
+ call acc_update_self(x%v)
end if
if (x%is_host()) then
- call z_oacc_to_dev(x%v)
+ call acc_update_device(x%v)
end if
call x%set_sync()
end subroutine z_oacc_sync
@@ -954,33 +834,36 @@ contains
integer(psb_ipk_), intent(out) :: info
call psb_realloc(n, x%v, info)
- if (info == 0) then
- call x%set_host()
- if (acc_is_present(x%v)) then
- !$acc exit data delete(x%v) finalize
- end if
- !$acc enter data create(x%v)
- call x%sync_space()
- end if
if (info /= 0) then
info = psb_err_alloc_request_
call psb_errpush(info, 'z_oacc_all', &
i_err=(/n, n, n, n, n/))
end if
+ call x%set_host()
+ call x%sync_space()
end subroutine z_oacc_vect_all
+ subroutine z_oacc_final_vect_free(x)
+ implicit none
+ type(psb_z_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_) :: info
+ info = 0
+ if (allocated(x%v)) then
+ if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
+ deallocate(x%v, stat=info)
+ end if
+
+ end subroutine z_oacc_final_vect_free
+
subroutine z_oacc_vect_free(x, info)
implicit none
class(psb_z_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
if (allocated(x%v)) then
- if (acc_is_present(x%v)) then
- !$acc exit data delete(x%v) finalize
- end if
+ if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
deallocate(x%v, stat=info)
end if
-
end subroutine z_oacc_vect_free
function z_oacc_get_size(x) result(res)
From fbb974fb8b6e71cd7970e2101b0adf6cca32a724 Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Fri, 30 Aug 2024 10:20:58 +0200
Subject: [PATCH 35/86] Change name sync|free space, unify allocate impl
---
openacc/impl/psb_c_oacc_csr_allocate_mnnz.F90 | 10 +-
openacc/impl/psb_c_oacc_csr_cp_from_coo.F90 | 4 +-
openacc/impl/psb_c_oacc_csr_cp_from_fmt.F90 | 4 +-
openacc/impl/psb_c_oacc_csr_inner_vect_sv.F90 | 4 +-
openacc/impl/psb_c_oacc_csr_mv_from_coo.F90 | 4 +-
openacc/impl/psb_c_oacc_csr_mv_from_fmt.F90 | 4 +-
openacc/impl/psb_c_oacc_csr_reallocate_nz.F90 | 4 +-
openacc/impl/psb_c_oacc_ell_allocate_mnnz.F90 | 16 +--
openacc/impl/psb_c_oacc_ell_cp_from_coo.F90 | 4 +-
openacc/impl/psb_c_oacc_ell_cp_from_fmt.F90 | 4 +-
openacc/impl/psb_c_oacc_ell_inner_vect_sv.F90 | 4 +-
openacc/impl/psb_c_oacc_ell_mv_from_coo.F90 | 4 +-
openacc/impl/psb_c_oacc_ell_mv_from_fmt.F90 | 4 +-
openacc/impl/psb_c_oacc_ell_reallocate_nz.F90 | 4 +-
openacc/impl/psb_c_oacc_hll_allocate_mnnz.F90 | 21 +--
openacc/impl/psb_c_oacc_hll_cp_from_coo.F90 | 4 +-
openacc/impl/psb_c_oacc_hll_cp_from_fmt.F90 | 4 +-
openacc/impl/psb_c_oacc_hll_inner_vect_sv.F90 | 4 +-
openacc/impl/psb_c_oacc_hll_mv_from_coo.F90 | 4 +-
openacc/impl/psb_c_oacc_hll_mv_from_fmt.F90 | 4 +-
openacc/impl/psb_c_oacc_hll_reallocate_nz.F90 | 4 +-
openacc/impl/psb_c_oacc_mlt_v.f90 | 4 +-
openacc/impl/psb_c_oacc_mlt_v_2.f90 | 6 +-
openacc/impl/psb_d_oacc_csr_allocate_mnnz.F90 | 10 +-
openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 | 4 +-
openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 | 4 +-
openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90 | 4 +-
openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 | 4 +-
openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 | 4 +-
openacc/impl/psb_d_oacc_csr_reallocate_nz.F90 | 4 +-
openacc/impl/psb_d_oacc_ell_allocate_mnnz.F90 | 16 +--
openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 | 4 +-
openacc/impl/psb_d_oacc_ell_cp_from_fmt.F90 | 4 +-
openacc/impl/psb_d_oacc_ell_inner_vect_sv.F90 | 4 +-
openacc/impl/psb_d_oacc_ell_mv_from_coo.F90 | 4 +-
openacc/impl/psb_d_oacc_ell_mv_from_fmt.F90 | 4 +-
openacc/impl/psb_d_oacc_ell_reallocate_nz.F90 | 4 +-
openacc/impl/psb_d_oacc_hll_allocate_mnnz.F90 | 21 +--
openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 | 4 +-
openacc/impl/psb_d_oacc_hll_cp_from_fmt.F90 | 4 +-
openacc/impl/psb_d_oacc_hll_inner_vect_sv.F90 | 4 +-
openacc/impl/psb_d_oacc_hll_mv_from_coo.F90 | 4 +-
openacc/impl/psb_d_oacc_hll_mv_from_fmt.F90 | 4 +-
openacc/impl/psb_d_oacc_hll_reallocate_nz.F90 | 4 +-
openacc/impl/psb_d_oacc_mlt_v.f90 | 4 +-
openacc/impl/psb_d_oacc_mlt_v_2.f90 | 6 +-
openacc/impl/psb_s_oacc_csr_allocate_mnnz.F90 | 10 +-
openacc/impl/psb_s_oacc_csr_cp_from_coo.F90 | 4 +-
openacc/impl/psb_s_oacc_csr_cp_from_fmt.F90 | 4 +-
openacc/impl/psb_s_oacc_csr_inner_vect_sv.F90 | 4 +-
openacc/impl/psb_s_oacc_csr_mv_from_coo.F90 | 4 +-
openacc/impl/psb_s_oacc_csr_mv_from_fmt.F90 | 4 +-
openacc/impl/psb_s_oacc_csr_reallocate_nz.F90 | 4 +-
openacc/impl/psb_s_oacc_ell_allocate_mnnz.F90 | 16 +--
openacc/impl/psb_s_oacc_ell_cp_from_coo.F90 | 4 +-
openacc/impl/psb_s_oacc_ell_cp_from_fmt.F90 | 4 +-
openacc/impl/psb_s_oacc_ell_inner_vect_sv.F90 | 4 +-
openacc/impl/psb_s_oacc_ell_mv_from_coo.F90 | 4 +-
openacc/impl/psb_s_oacc_ell_mv_from_fmt.F90 | 4 +-
openacc/impl/psb_s_oacc_ell_reallocate_nz.F90 | 4 +-
openacc/impl/psb_s_oacc_hll_allocate_mnnz.F90 | 21 +--
openacc/impl/psb_s_oacc_hll_cp_from_coo.F90 | 4 +-
openacc/impl/psb_s_oacc_hll_cp_from_fmt.F90 | 4 +-
openacc/impl/psb_s_oacc_hll_inner_vect_sv.F90 | 4 +-
openacc/impl/psb_s_oacc_hll_mv_from_coo.F90 | 4 +-
openacc/impl/psb_s_oacc_hll_mv_from_fmt.F90 | 4 +-
openacc/impl/psb_s_oacc_hll_reallocate_nz.F90 | 4 +-
openacc/impl/psb_s_oacc_mlt_v.f90 | 4 +-
openacc/impl/psb_s_oacc_mlt_v_2.f90 | 6 +-
openacc/impl/psb_z_oacc_csr_allocate_mnnz.F90 | 10 +-
openacc/impl/psb_z_oacc_csr_cp_from_coo.F90 | 4 +-
openacc/impl/psb_z_oacc_csr_cp_from_fmt.F90 | 4 +-
openacc/impl/psb_z_oacc_csr_inner_vect_sv.F90 | 4 +-
openacc/impl/psb_z_oacc_csr_mv_from_coo.F90 | 4 +-
openacc/impl/psb_z_oacc_csr_mv_from_fmt.F90 | 4 +-
openacc/impl/psb_z_oacc_csr_reallocate_nz.F90 | 4 +-
openacc/impl/psb_z_oacc_ell_allocate_mnnz.F90 | 16 +--
openacc/impl/psb_z_oacc_ell_cp_from_coo.F90 | 4 +-
openacc/impl/psb_z_oacc_ell_cp_from_fmt.F90 | 4 +-
openacc/impl/psb_z_oacc_ell_inner_vect_sv.F90 | 4 +-
openacc/impl/psb_z_oacc_ell_mv_from_coo.F90 | 4 +-
openacc/impl/psb_z_oacc_ell_mv_from_fmt.F90 | 4 +-
openacc/impl/psb_z_oacc_ell_reallocate_nz.F90 | 4 +-
openacc/impl/psb_z_oacc_hll_allocate_mnnz.F90 | 21 +--
openacc/impl/psb_z_oacc_hll_cp_from_coo.F90 | 4 +-
openacc/impl/psb_z_oacc_hll_cp_from_fmt.F90 | 4 +-
openacc/impl/psb_z_oacc_hll_inner_vect_sv.F90 | 4 +-
openacc/impl/psb_z_oacc_hll_mv_from_coo.F90 | 4 +-
openacc/impl/psb_z_oacc_hll_mv_from_fmt.F90 | 4 +-
openacc/impl/psb_z_oacc_hll_reallocate_nz.F90 | 4 +-
openacc/impl/psb_z_oacc_mlt_v.f90 | 4 +-
openacc/impl/psb_z_oacc_mlt_v_2.f90 | 6 +-
openacc/psb_c_oacc_csr_mat_mod.F90 | 66 ++++-----
openacc/psb_c_oacc_ell_mat_mod.F90 | 62 ++++-----
openacc/psb_c_oacc_hll_mat_mod.F90 | 62 ++++-----
openacc/psb_c_oacc_vect_mod.F90 | 126 +++++++++---------
openacc/psb_d_oacc_csr_mat_mod.F90 | 66 ++++-----
openacc/psb_d_oacc_ell_mat_mod.F90 | 62 ++++-----
openacc/psb_d_oacc_hll_mat_mod.F90 | 62 ++++-----
openacc/psb_d_oacc_vect_mod.F90 | 126 +++++++++---------
openacc/psb_i_oacc_vect_mod.F90 | 82 ++++++------
openacc/psb_l_oacc_vect_mod.F90 | 82 ++++++------
openacc/psb_s_oacc_csr_mat_mod.F90 | 66 ++++-----
openacc/psb_s_oacc_ell_mat_mod.F90 | 62 ++++-----
openacc/psb_s_oacc_hll_mat_mod.F90 | 62 ++++-----
openacc/psb_s_oacc_vect_mod.F90 | 126 +++++++++---------
openacc/psb_z_oacc_csr_mat_mod.F90 | 66 ++++-----
openacc/psb_z_oacc_ell_mat_mod.F90 | 62 ++++-----
openacc/psb_z_oacc_hll_mat_mod.F90 | 62 ++++-----
openacc/psb_z_oacc_vect_mod.F90 | 126 +++++++++---------
110 files changed, 902 insertions(+), 1042 deletions(-)
diff --git a/openacc/impl/psb_c_oacc_csr_allocate_mnnz.F90 b/openacc/impl/psb_c_oacc_csr_allocate_mnnz.F90
index 09cdc228..a6fe9b9c 100644
--- a/openacc/impl/psb_c_oacc_csr_allocate_mnnz.F90
+++ b/openacc/impl/psb_c_oacc_csr_allocate_mnnz.F90
@@ -15,14 +15,8 @@ contains
info = psb_success_
call a%psb_c_csr_sparse_mat%allocate(m, n, nz)
-
- if (.not.allocated(a%val)) then
- allocate(a%val(nz))
- allocate(a%ja(nz))
- allocate(a%irp(m+1))
- end if
-
- call a%set_dev()
+ call a%set_host()
+ call a%sync_dev_space()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
diff --git a/openacc/impl/psb_c_oacc_csr_cp_from_coo.F90 b/openacc/impl/psb_c_oacc_csr_cp_from_coo.F90
index a411cc6a..ab5f514b 100644
--- a/openacc/impl/psb_c_oacc_csr_cp_from_coo.F90
+++ b/openacc/impl/psb_c_oacc_csr_cp_from_coo.F90
@@ -10,10 +10,10 @@ contains
info = psb_success_
- call a%free_space()
+ call a%free_dev_space()
call a%psb_c_csr_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
diff --git a/openacc/impl/psb_c_oacc_csr_cp_from_fmt.F90 b/openacc/impl/psb_c_oacc_csr_cp_from_fmt.F90
index a8cd93a0..c6287cbb 100644
--- a/openacc/impl/psb_c_oacc_csr_cp_from_fmt.F90
+++ b/openacc/impl/psb_c_oacc_csr_cp_from_fmt.F90
@@ -14,10 +14,10 @@ contains
type is (psb_c_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
- call a%free_space()
+ call a%free_dev_space()
call a%psb_c_csr_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
end select
diff --git a/openacc/impl/psb_c_oacc_csr_inner_vect_sv.F90 b/openacc/impl/psb_c_oacc_csr_inner_vect_sv.F90
index 1dee9f2e..833140f4 100644
--- a/openacc/impl/psb_c_oacc_csr_inner_vect_sv.F90
+++ b/openacc/impl/psb_c_oacc_csr_inner_vect_sv.F90
@@ -40,9 +40,9 @@ contains
call a%psb_c_csr_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans)
call y%set_host()
else
- select type (xx => x)
+ select type (xx => x)
type is (psb_c_vect_oacc)
- select type(yy => y)
+ select type(yy => y)
type is (psb_c_vect_oacc)
if (xx%is_host()) call xx%sync()
if (beta /= dzero) then
diff --git a/openacc/impl/psb_c_oacc_csr_mv_from_coo.F90 b/openacc/impl/psb_c_oacc_csr_mv_from_coo.F90
index 30691030..73f02b84 100644
--- a/openacc/impl/psb_c_oacc_csr_mv_from_coo.F90
+++ b/openacc/impl/psb_c_oacc_csr_mv_from_coo.F90
@@ -10,10 +10,10 @@ contains
info = psb_success_
- call a%free_space()
+ call a%free_dev_space()
call a%psb_c_csr_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
diff --git a/openacc/impl/psb_c_oacc_csr_mv_from_fmt.F90 b/openacc/impl/psb_c_oacc_csr_mv_from_fmt.F90
index b37011c0..853a3269 100644
--- a/openacc/impl/psb_c_oacc_csr_mv_from_fmt.F90
+++ b/openacc/impl/psb_c_oacc_csr_mv_from_fmt.F90
@@ -14,10 +14,10 @@ contains
type is (psb_c_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
- call a%free_space()
+ call a%free_dev_space()
call a%psb_c_csr_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
end select
diff --git a/openacc/impl/psb_c_oacc_csr_reallocate_nz.F90 b/openacc/impl/psb_c_oacc_csr_reallocate_nz.F90
index 92a53116..e7dc970a 100644
--- a/openacc/impl/psb_c_oacc_csr_reallocate_nz.F90
+++ b/openacc/impl/psb_c_oacc_csr_reallocate_nz.F90
@@ -14,8 +14,8 @@ contains
info = psb_success_
call a%psb_c_csr_sparse_mat%reallocate(nz)
-
- call a%set_dev()
+ call a%sync_dev_space()
+ call a%set_host()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
diff --git a/openacc/impl/psb_c_oacc_ell_allocate_mnnz.F90 b/openacc/impl/psb_c_oacc_ell_allocate_mnnz.F90
index 594f3e88..5a363434 100644
--- a/openacc/impl/psb_c_oacc_ell_allocate_mnnz.F90
+++ b/openacc/impl/psb_c_oacc_ell_allocate_mnnz.F90
@@ -21,20 +21,8 @@ contains
end if
call a%psb_c_ell_sparse_mat%allocate(m, n, nz_)
-
- if (.not.allocated(a%val)) then
- allocate(a%val(m, nz_))
- allocate(a%ja(m, nz_))
- allocate(a%irn(m))
- allocate(a%idiag(m))
- end if
-
- a%val = czero
- a%ja = -1
- a%irn = 0
- a%idiag = 0
-
- call a%set_dev()
+ call a%sync_dev_space()
+ call a%set_host()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
diff --git a/openacc/impl/psb_c_oacc_ell_cp_from_coo.F90 b/openacc/impl/psb_c_oacc_ell_cp_from_coo.F90
index 836874fe..47118e48 100644
--- a/openacc/impl/psb_c_oacc_ell_cp_from_coo.F90
+++ b/openacc/impl/psb_c_oacc_ell_cp_from_coo.F90
@@ -10,10 +10,10 @@ contains
info = psb_success_
- call a%free_space()
+ call a%free_dev_space()
call a%psb_c_ell_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
diff --git a/openacc/impl/psb_c_oacc_ell_cp_from_fmt.F90 b/openacc/impl/psb_c_oacc_ell_cp_from_fmt.F90
index 31d6c4b4..559cb65c 100644
--- a/openacc/impl/psb_c_oacc_ell_cp_from_fmt.F90
+++ b/openacc/impl/psb_c_oacc_ell_cp_from_fmt.F90
@@ -14,10 +14,10 @@ contains
type is (psb_c_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
- call a%free_space()
+ call a%free_dev_space()
call a%psb_c_ell_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
end select
diff --git a/openacc/impl/psb_c_oacc_ell_inner_vect_sv.F90 b/openacc/impl/psb_c_oacc_ell_inner_vect_sv.F90
index df642ee8..cfabcb78 100644
--- a/openacc/impl/psb_c_oacc_ell_inner_vect_sv.F90
+++ b/openacc/impl/psb_c_oacc_ell_inner_vect_sv.F90
@@ -40,9 +40,9 @@ contains
call a%psb_c_ell_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans)
call y%set_host()
else
- select type (xx => x)
+ select type (xx => x)
type is (psb_c_vect_oacc)
- select type(yy => y)
+ select type(yy => y)
type is (psb_c_vect_oacc)
if (xx%is_host()) call xx%sync()
if (beta /= dzero) then
diff --git a/openacc/impl/psb_c_oacc_ell_mv_from_coo.F90 b/openacc/impl/psb_c_oacc_ell_mv_from_coo.F90
index 1ca43435..183be9a5 100644
--- a/openacc/impl/psb_c_oacc_ell_mv_from_coo.F90
+++ b/openacc/impl/psb_c_oacc_ell_mv_from_coo.F90
@@ -10,10 +10,10 @@ contains
info = psb_success_
- call a%free_space()
+ call a%free_dev_space()
call a%psb_c_ell_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
diff --git a/openacc/impl/psb_c_oacc_ell_mv_from_fmt.F90 b/openacc/impl/psb_c_oacc_ell_mv_from_fmt.F90
index 95798429..d1bd6330 100644
--- a/openacc/impl/psb_c_oacc_ell_mv_from_fmt.F90
+++ b/openacc/impl/psb_c_oacc_ell_mv_from_fmt.F90
@@ -14,10 +14,10 @@ contains
type is (psb_c_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
- call a%free_space()
+ call a%free_dev_space()
call a%psb_c_ell_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
end select
diff --git a/openacc/impl/psb_c_oacc_ell_reallocate_nz.F90 b/openacc/impl/psb_c_oacc_ell_reallocate_nz.F90
index 9f21c5df..24d153f6 100644
--- a/openacc/impl/psb_c_oacc_ell_reallocate_nz.F90
+++ b/openacc/impl/psb_c_oacc_ell_reallocate_nz.F90
@@ -14,8 +14,8 @@ contains
info = psb_success_
call a%psb_c_ell_sparse_mat%reallocate(nz)
-
- call a%set_dev()
+ call a%sync_dev_space()
+ call a%set_host()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
diff --git a/openacc/impl/psb_c_oacc_hll_allocate_mnnz.F90 b/openacc/impl/psb_c_oacc_hll_allocate_mnnz.F90
index 0840d0d6..c90fc652 100644
--- a/openacc/impl/psb_c_oacc_hll_allocate_mnnz.F90
+++ b/openacc/impl/psb_c_oacc_hll_allocate_mnnz.F90
@@ -22,25 +22,8 @@ contains
end if
call a%psb_c_hll_sparse_mat%allocate(m, n, nz_)
-
- hksz = a%hksz
- nhacks = (m + hksz - 1) / hksz
-
- if (.not.allocated(a%val)) then
- allocate(a%val(nz_ * m))
- allocate(a%ja(nz_ * m))
- allocate(a%irn(m))
- allocate(a%idiag(m))
- allocate(a%hkoffs(nhacks))
- end if
-
- a%val = czero
- a%ja = -1
- a%irn = 0
- a%idiag = 0
- a%hkoffs = 0
-
- call a%set_dev()
+ call a%sync_dev_space()
+ call a%set_host()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
diff --git a/openacc/impl/psb_c_oacc_hll_cp_from_coo.F90 b/openacc/impl/psb_c_oacc_hll_cp_from_coo.F90
index 32391cc8..0eaebf9d 100644
--- a/openacc/impl/psb_c_oacc_hll_cp_from_coo.F90
+++ b/openacc/impl/psb_c_oacc_hll_cp_from_coo.F90
@@ -10,10 +10,10 @@ contains
info = psb_success_
- call a%free_space()
+ call a%free_dev_space()
call a%psb_c_hll_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
diff --git a/openacc/impl/psb_c_oacc_hll_cp_from_fmt.F90 b/openacc/impl/psb_c_oacc_hll_cp_from_fmt.F90
index e442b668..29b18470 100644
--- a/openacc/impl/psb_c_oacc_hll_cp_from_fmt.F90
+++ b/openacc/impl/psb_c_oacc_hll_cp_from_fmt.F90
@@ -14,10 +14,10 @@ contains
type is (psb_c_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
- call a%free_space()
+ call a%free_dev_space()
call a%psb_c_hll_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
end select
diff --git a/openacc/impl/psb_c_oacc_hll_inner_vect_sv.F90 b/openacc/impl/psb_c_oacc_hll_inner_vect_sv.F90
index a8d486b2..4b45f518 100644
--- a/openacc/impl/psb_c_oacc_hll_inner_vect_sv.F90
+++ b/openacc/impl/psb_c_oacc_hll_inner_vect_sv.F90
@@ -40,9 +40,9 @@ contains
call a%psb_c_hll_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans)
call y%set_host()
else
- select type (xx => x)
+ select type (xx => x)
type is (psb_c_vect_oacc)
- select type(yy => y)
+ select type(yy => y)
type is (psb_c_vect_oacc)
if (xx%is_host()) call xx%sync()
if (beta /= dzero) then
diff --git a/openacc/impl/psb_c_oacc_hll_mv_from_coo.F90 b/openacc/impl/psb_c_oacc_hll_mv_from_coo.F90
index 30d723fe..193f9753 100644
--- a/openacc/impl/psb_c_oacc_hll_mv_from_coo.F90
+++ b/openacc/impl/psb_c_oacc_hll_mv_from_coo.F90
@@ -10,10 +10,10 @@ contains
info = psb_success_
- call a%free_space()
+ call a%free_dev_space()
call a%psb_c_hll_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
diff --git a/openacc/impl/psb_c_oacc_hll_mv_from_fmt.F90 b/openacc/impl/psb_c_oacc_hll_mv_from_fmt.F90
index 0ac69af8..1c928067 100644
--- a/openacc/impl/psb_c_oacc_hll_mv_from_fmt.F90
+++ b/openacc/impl/psb_c_oacc_hll_mv_from_fmt.F90
@@ -14,10 +14,10 @@ contains
type is (psb_c_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
- call a%free_space()
+ call a%free_dev_space()
call a%psb_c_hll_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
end select
diff --git a/openacc/impl/psb_c_oacc_hll_reallocate_nz.F90 b/openacc/impl/psb_c_oacc_hll_reallocate_nz.F90
index 52983d4e..9290c381 100644
--- a/openacc/impl/psb_c_oacc_hll_reallocate_nz.F90
+++ b/openacc/impl/psb_c_oacc_hll_reallocate_nz.F90
@@ -15,8 +15,8 @@ contains
info = psb_success_
call a%psb_c_hll_sparse_mat%reallocate(nz)
-
- call a%set_dev()
+ call a%sync_dev_space()
+ call a%set_host()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
diff --git a/openacc/impl/psb_c_oacc_mlt_v.f90 b/openacc/impl/psb_c_oacc_mlt_v.f90
index a366543a..e5f215be 100644
--- a/openacc/impl/psb_c_oacc_mlt_v.f90
+++ b/openacc/impl/psb_c_oacc_mlt_v.f90
@@ -1,6 +1,6 @@
subroutine psb_c_oacc_mlt_v(x, y, info)
- use psb_c_oacc_vect_mod, psb_protect_name => psb_c_oacc_mlt_v
+ use psb_c_oacc_vect_mod, psb_protect_name => psb_c_oacc_mlt_v
implicit none
class(psb_c_base_vect_type), intent(inout) :: x
@@ -13,7 +13,7 @@ subroutine psb_c_oacc_mlt_v(x, y, info)
n = min(x%get_nrows(), y%get_nrows())
info = 0
n = min(x%get_nrows(), y%get_nrows())
- select type(xx => x)
+ select type(xx => x)
class is (psb_c_vect_oacc)
if (y%is_host()) call y%sync()
if (xx%is_host()) call xx%sync()
diff --git a/openacc/impl/psb_c_oacc_mlt_v_2.f90 b/openacc/impl/psb_c_oacc_mlt_v_2.f90
index f7bceae7..b47cd1ae 100644
--- a/openacc/impl/psb_c_oacc_mlt_v_2.f90
+++ b/openacc/impl/psb_c_oacc_mlt_v_2.f90
@@ -1,5 +1,5 @@
subroutine psb_c_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
- use psb_c_oacc_vect_mod, psb_protect_name => psb_c_oacc_mlt_v_2
+ use psb_c_oacc_vect_mod, psb_protect_name => psb_c_oacc_mlt_v_2
use psb_string_mod
implicit none
complex(psb_spk_), intent(in) :: alpha, beta
@@ -18,9 +18,9 @@ subroutine psb_c_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
n = min(x%get_nrows(), y%get_nrows(), z%get_nrows())
info = 0
- select type(xx => x)
+ select type(xx => x)
class is (psb_c_vect_oacc)
- select type (yy => y)
+ select type (yy => y)
class is (psb_c_vect_oacc)
if (xx%is_host()) call xx%sync()
if (yy%is_host()) call yy%sync()
diff --git a/openacc/impl/psb_d_oacc_csr_allocate_mnnz.F90 b/openacc/impl/psb_d_oacc_csr_allocate_mnnz.F90
index 5281edee..1f210a09 100644
--- a/openacc/impl/psb_d_oacc_csr_allocate_mnnz.F90
+++ b/openacc/impl/psb_d_oacc_csr_allocate_mnnz.F90
@@ -15,14 +15,8 @@ contains
info = psb_success_
call a%psb_d_csr_sparse_mat%allocate(m, n, nz)
-
- if (.not.allocated(a%val)) then
- allocate(a%val(nz))
- allocate(a%ja(nz))
- allocate(a%irp(m+1))
- end if
-
- call a%set_dev()
+ call a%set_host()
+ call a%sync_dev_space()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
diff --git a/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 b/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90
index 50da7692..92770e0f 100644
--- a/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90
+++ b/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90
@@ -10,10 +10,10 @@ contains
info = psb_success_
- call a%free_space()
+ call a%free_dev_space()
call a%psb_d_csr_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
diff --git a/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 b/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90
index 0d35e247..9da6d861 100644
--- a/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90
+++ b/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90
@@ -14,10 +14,10 @@ contains
type is (psb_d_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
- call a%free_space()
+ call a%free_dev_space()
call a%psb_d_csr_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
end select
diff --git a/openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90 b/openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90
index 5f0ef4a0..c9a875bc 100644
--- a/openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90
+++ b/openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90
@@ -40,9 +40,9 @@ contains
call a%psb_d_csr_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans)
call y%set_host()
else
- select type (xx => x)
+ select type (xx => x)
type is (psb_d_vect_oacc)
- select type(yy => y)
+ select type(yy => y)
type is (psb_d_vect_oacc)
if (xx%is_host()) call xx%sync()
if (beta /= dzero) then
diff --git a/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 b/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90
index 97fa07d1..0f020e06 100644
--- a/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90
+++ b/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90
@@ -10,10 +10,10 @@ contains
info = psb_success_
- call a%free_space()
+ call a%free_dev_space()
call a%psb_d_csr_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
diff --git a/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 b/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90
index e992f41a..cfd7d7dd 100644
--- a/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90
+++ b/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90
@@ -14,10 +14,10 @@ contains
type is (psb_d_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
- call a%free_space()
+ call a%free_dev_space()
call a%psb_d_csr_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
end select
diff --git a/openacc/impl/psb_d_oacc_csr_reallocate_nz.F90 b/openacc/impl/psb_d_oacc_csr_reallocate_nz.F90
index c345f681..8f746105 100644
--- a/openacc/impl/psb_d_oacc_csr_reallocate_nz.F90
+++ b/openacc/impl/psb_d_oacc_csr_reallocate_nz.F90
@@ -14,8 +14,8 @@ contains
info = psb_success_
call a%psb_d_csr_sparse_mat%reallocate(nz)
-
- call a%set_dev()
+ call a%sync_dev_space()
+ call a%set_host()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
diff --git a/openacc/impl/psb_d_oacc_ell_allocate_mnnz.F90 b/openacc/impl/psb_d_oacc_ell_allocate_mnnz.F90
index b46c5454..ca0a0f84 100644
--- a/openacc/impl/psb_d_oacc_ell_allocate_mnnz.F90
+++ b/openacc/impl/psb_d_oacc_ell_allocate_mnnz.F90
@@ -21,20 +21,8 @@ contains
end if
call a%psb_d_ell_sparse_mat%allocate(m, n, nz_)
-
- if (.not.allocated(a%val)) then
- allocate(a%val(m, nz_))
- allocate(a%ja(m, nz_))
- allocate(a%irn(m))
- allocate(a%idiag(m))
- end if
-
- a%val = dzero
- a%ja = -1
- a%irn = 0
- a%idiag = 0
-
- call a%set_dev()
+ call a%sync_dev_space()
+ call a%set_host()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
diff --git a/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 b/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90
index 6c24098e..384701ce 100644
--- a/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90
+++ b/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90
@@ -10,10 +10,10 @@ contains
info = psb_success_
- call a%free_space()
+ call a%free_dev_space()
call a%psb_d_ell_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
diff --git a/openacc/impl/psb_d_oacc_ell_cp_from_fmt.F90 b/openacc/impl/psb_d_oacc_ell_cp_from_fmt.F90
index 991681e9..6622a642 100644
--- a/openacc/impl/psb_d_oacc_ell_cp_from_fmt.F90
+++ b/openacc/impl/psb_d_oacc_ell_cp_from_fmt.F90
@@ -14,10 +14,10 @@ contains
type is (psb_d_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
- call a%free_space()
+ call a%free_dev_space()
call a%psb_d_ell_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
end select
diff --git a/openacc/impl/psb_d_oacc_ell_inner_vect_sv.F90 b/openacc/impl/psb_d_oacc_ell_inner_vect_sv.F90
index 2c5563ce..466594fb 100644
--- a/openacc/impl/psb_d_oacc_ell_inner_vect_sv.F90
+++ b/openacc/impl/psb_d_oacc_ell_inner_vect_sv.F90
@@ -40,9 +40,9 @@ contains
call a%psb_d_ell_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans)
call y%set_host()
else
- select type (xx => x)
+ select type (xx => x)
type is (psb_d_vect_oacc)
- select type(yy => y)
+ select type(yy => y)
type is (psb_d_vect_oacc)
if (xx%is_host()) call xx%sync()
if (beta /= dzero) then
diff --git a/openacc/impl/psb_d_oacc_ell_mv_from_coo.F90 b/openacc/impl/psb_d_oacc_ell_mv_from_coo.F90
index 9214ba3f..7bddced9 100644
--- a/openacc/impl/psb_d_oacc_ell_mv_from_coo.F90
+++ b/openacc/impl/psb_d_oacc_ell_mv_from_coo.F90
@@ -10,10 +10,10 @@ contains
info = psb_success_
- call a%free_space()
+ call a%free_dev_space()
call a%psb_d_ell_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
diff --git a/openacc/impl/psb_d_oacc_ell_mv_from_fmt.F90 b/openacc/impl/psb_d_oacc_ell_mv_from_fmt.F90
index 292165fc..53e45b98 100644
--- a/openacc/impl/psb_d_oacc_ell_mv_from_fmt.F90
+++ b/openacc/impl/psb_d_oacc_ell_mv_from_fmt.F90
@@ -14,10 +14,10 @@ contains
type is (psb_d_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
- call a%free_space()
+ call a%free_dev_space()
call a%psb_d_ell_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
end select
diff --git a/openacc/impl/psb_d_oacc_ell_reallocate_nz.F90 b/openacc/impl/psb_d_oacc_ell_reallocate_nz.F90
index 11332472..130a931e 100644
--- a/openacc/impl/psb_d_oacc_ell_reallocate_nz.F90
+++ b/openacc/impl/psb_d_oacc_ell_reallocate_nz.F90
@@ -14,8 +14,8 @@ contains
info = psb_success_
call a%psb_d_ell_sparse_mat%reallocate(nz)
-
- call a%set_dev()
+ call a%sync_dev_space()
+ call a%set_host()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
diff --git a/openacc/impl/psb_d_oacc_hll_allocate_mnnz.F90 b/openacc/impl/psb_d_oacc_hll_allocate_mnnz.F90
index 47a6933b..a30a0b00 100644
--- a/openacc/impl/psb_d_oacc_hll_allocate_mnnz.F90
+++ b/openacc/impl/psb_d_oacc_hll_allocate_mnnz.F90
@@ -22,25 +22,8 @@ contains
end if
call a%psb_d_hll_sparse_mat%allocate(m, n, nz_)
-
- hksz = a%hksz
- nhacks = (m + hksz - 1) / hksz
-
- if (.not.allocated(a%val)) then
- allocate(a%val(nz_ * m))
- allocate(a%ja(nz_ * m))
- allocate(a%irn(m))
- allocate(a%idiag(m))
- allocate(a%hkoffs(nhacks))
- end if
-
- a%val = dzero
- a%ja = -1
- a%irn = 0
- a%idiag = 0
- a%hkoffs = 0
-
- call a%set_dev()
+ call a%sync_dev_space()
+ call a%set_host()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
diff --git a/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 b/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90
index e39a29b7..34a31b0b 100644
--- a/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90
+++ b/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90
@@ -10,10 +10,10 @@ contains
info = psb_success_
- call a%free_space()
+ call a%free_dev_space()
call a%psb_d_hll_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
diff --git a/openacc/impl/psb_d_oacc_hll_cp_from_fmt.F90 b/openacc/impl/psb_d_oacc_hll_cp_from_fmt.F90
index a838e31e..7d14f65c 100644
--- a/openacc/impl/psb_d_oacc_hll_cp_from_fmt.F90
+++ b/openacc/impl/psb_d_oacc_hll_cp_from_fmt.F90
@@ -14,10 +14,10 @@ contains
type is (psb_d_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
- call a%free_space()
+ call a%free_dev_space()
call a%psb_d_hll_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
end select
diff --git a/openacc/impl/psb_d_oacc_hll_inner_vect_sv.F90 b/openacc/impl/psb_d_oacc_hll_inner_vect_sv.F90
index ae1c3c94..ff6a4580 100644
--- a/openacc/impl/psb_d_oacc_hll_inner_vect_sv.F90
+++ b/openacc/impl/psb_d_oacc_hll_inner_vect_sv.F90
@@ -40,9 +40,9 @@ contains
call a%psb_d_hll_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans)
call y%set_host()
else
- select type (xx => x)
+ select type (xx => x)
type is (psb_d_vect_oacc)
- select type(yy => y)
+ select type(yy => y)
type is (psb_d_vect_oacc)
if (xx%is_host()) call xx%sync()
if (beta /= dzero) then
diff --git a/openacc/impl/psb_d_oacc_hll_mv_from_coo.F90 b/openacc/impl/psb_d_oacc_hll_mv_from_coo.F90
index 29494a39..0f0ce6f1 100644
--- a/openacc/impl/psb_d_oacc_hll_mv_from_coo.F90
+++ b/openacc/impl/psb_d_oacc_hll_mv_from_coo.F90
@@ -10,10 +10,10 @@ contains
info = psb_success_
- call a%free_space()
+ call a%free_dev_space()
call a%psb_d_hll_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
diff --git a/openacc/impl/psb_d_oacc_hll_mv_from_fmt.F90 b/openacc/impl/psb_d_oacc_hll_mv_from_fmt.F90
index e03e2f30..1fcfa4f6 100644
--- a/openacc/impl/psb_d_oacc_hll_mv_from_fmt.F90
+++ b/openacc/impl/psb_d_oacc_hll_mv_from_fmt.F90
@@ -14,10 +14,10 @@ contains
type is (psb_d_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
- call a%free_space()
+ call a%free_dev_space()
call a%psb_d_hll_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
end select
diff --git a/openacc/impl/psb_d_oacc_hll_reallocate_nz.F90 b/openacc/impl/psb_d_oacc_hll_reallocate_nz.F90
index 412409d1..21f5c48a 100644
--- a/openacc/impl/psb_d_oacc_hll_reallocate_nz.F90
+++ b/openacc/impl/psb_d_oacc_hll_reallocate_nz.F90
@@ -15,8 +15,8 @@ contains
info = psb_success_
call a%psb_d_hll_sparse_mat%reallocate(nz)
-
- call a%set_dev()
+ call a%sync_dev_space()
+ call a%set_host()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
diff --git a/openacc/impl/psb_d_oacc_mlt_v.f90 b/openacc/impl/psb_d_oacc_mlt_v.f90
index dac62a65..8b3a05b1 100644
--- a/openacc/impl/psb_d_oacc_mlt_v.f90
+++ b/openacc/impl/psb_d_oacc_mlt_v.f90
@@ -1,6 +1,6 @@
subroutine psb_d_oacc_mlt_v(x, y, info)
- use psb_d_oacc_vect_mod, psb_protect_name => psb_d_oacc_mlt_v
+ use psb_d_oacc_vect_mod, psb_protect_name => psb_d_oacc_mlt_v
implicit none
class(psb_d_base_vect_type), intent(inout) :: x
@@ -13,7 +13,7 @@ subroutine psb_d_oacc_mlt_v(x, y, info)
n = min(x%get_nrows(), y%get_nrows())
info = 0
n = min(x%get_nrows(), y%get_nrows())
- select type(xx => x)
+ select type(xx => x)
class is (psb_d_vect_oacc)
if (y%is_host()) call y%sync()
if (xx%is_host()) call xx%sync()
diff --git a/openacc/impl/psb_d_oacc_mlt_v_2.f90 b/openacc/impl/psb_d_oacc_mlt_v_2.f90
index 3f3a457d..ce460924 100644
--- a/openacc/impl/psb_d_oacc_mlt_v_2.f90
+++ b/openacc/impl/psb_d_oacc_mlt_v_2.f90
@@ -1,5 +1,5 @@
subroutine psb_d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
- use psb_d_oacc_vect_mod, psb_protect_name => psb_d_oacc_mlt_v_2
+ use psb_d_oacc_vect_mod, psb_protect_name => psb_d_oacc_mlt_v_2
use psb_string_mod
implicit none
real(psb_dpk_), intent(in) :: alpha, beta
@@ -18,9 +18,9 @@ subroutine psb_d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
n = min(x%get_nrows(), y%get_nrows(), z%get_nrows())
info = 0
- select type(xx => x)
+ select type(xx => x)
class is (psb_d_vect_oacc)
- select type (yy => y)
+ select type (yy => y)
class is (psb_d_vect_oacc)
if (xx%is_host()) call xx%sync()
if (yy%is_host()) call yy%sync()
diff --git a/openacc/impl/psb_s_oacc_csr_allocate_mnnz.F90 b/openacc/impl/psb_s_oacc_csr_allocate_mnnz.F90
index 08c51bce..0d531129 100644
--- a/openacc/impl/psb_s_oacc_csr_allocate_mnnz.F90
+++ b/openacc/impl/psb_s_oacc_csr_allocate_mnnz.F90
@@ -15,14 +15,8 @@ contains
info = psb_success_
call a%psb_s_csr_sparse_mat%allocate(m, n, nz)
-
- if (.not.allocated(a%val)) then
- allocate(a%val(nz))
- allocate(a%ja(nz))
- allocate(a%irp(m+1))
- end if
-
- call a%set_dev()
+ call a%set_host()
+ call a%sync_dev_space()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
diff --git a/openacc/impl/psb_s_oacc_csr_cp_from_coo.F90 b/openacc/impl/psb_s_oacc_csr_cp_from_coo.F90
index 3c2fb1cb..f740cf18 100644
--- a/openacc/impl/psb_s_oacc_csr_cp_from_coo.F90
+++ b/openacc/impl/psb_s_oacc_csr_cp_from_coo.F90
@@ -10,10 +10,10 @@ contains
info = psb_success_
- call a%free_space()
+ call a%free_dev_space()
call a%psb_s_csr_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
diff --git a/openacc/impl/psb_s_oacc_csr_cp_from_fmt.F90 b/openacc/impl/psb_s_oacc_csr_cp_from_fmt.F90
index e47959f8..fc495668 100644
--- a/openacc/impl/psb_s_oacc_csr_cp_from_fmt.F90
+++ b/openacc/impl/psb_s_oacc_csr_cp_from_fmt.F90
@@ -14,10 +14,10 @@ contains
type is (psb_s_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
- call a%free_space()
+ call a%free_dev_space()
call a%psb_s_csr_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
end select
diff --git a/openacc/impl/psb_s_oacc_csr_inner_vect_sv.F90 b/openacc/impl/psb_s_oacc_csr_inner_vect_sv.F90
index ec7f4bad..b1785b49 100644
--- a/openacc/impl/psb_s_oacc_csr_inner_vect_sv.F90
+++ b/openacc/impl/psb_s_oacc_csr_inner_vect_sv.F90
@@ -40,9 +40,9 @@ contains
call a%psb_s_csr_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans)
call y%set_host()
else
- select type (xx => x)
+ select type (xx => x)
type is (psb_s_vect_oacc)
- select type(yy => y)
+ select type(yy => y)
type is (psb_s_vect_oacc)
if (xx%is_host()) call xx%sync()
if (beta /= dzero) then
diff --git a/openacc/impl/psb_s_oacc_csr_mv_from_coo.F90 b/openacc/impl/psb_s_oacc_csr_mv_from_coo.F90
index fdbf3a0c..0c92e476 100644
--- a/openacc/impl/psb_s_oacc_csr_mv_from_coo.F90
+++ b/openacc/impl/psb_s_oacc_csr_mv_from_coo.F90
@@ -10,10 +10,10 @@ contains
info = psb_success_
- call a%free_space()
+ call a%free_dev_space()
call a%psb_s_csr_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
diff --git a/openacc/impl/psb_s_oacc_csr_mv_from_fmt.F90 b/openacc/impl/psb_s_oacc_csr_mv_from_fmt.F90
index a7a581b8..1c61eb3b 100644
--- a/openacc/impl/psb_s_oacc_csr_mv_from_fmt.F90
+++ b/openacc/impl/psb_s_oacc_csr_mv_from_fmt.F90
@@ -14,10 +14,10 @@ contains
type is (psb_s_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
- call a%free_space()
+ call a%free_dev_space()
call a%psb_s_csr_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
end select
diff --git a/openacc/impl/psb_s_oacc_csr_reallocate_nz.F90 b/openacc/impl/psb_s_oacc_csr_reallocate_nz.F90
index 77c17120..e49bf2c8 100644
--- a/openacc/impl/psb_s_oacc_csr_reallocate_nz.F90
+++ b/openacc/impl/psb_s_oacc_csr_reallocate_nz.F90
@@ -14,8 +14,8 @@ contains
info = psb_success_
call a%psb_s_csr_sparse_mat%reallocate(nz)
-
- call a%set_dev()
+ call a%sync_dev_space()
+ call a%set_host()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
diff --git a/openacc/impl/psb_s_oacc_ell_allocate_mnnz.F90 b/openacc/impl/psb_s_oacc_ell_allocate_mnnz.F90
index 38c19b78..b9c25654 100644
--- a/openacc/impl/psb_s_oacc_ell_allocate_mnnz.F90
+++ b/openacc/impl/psb_s_oacc_ell_allocate_mnnz.F90
@@ -21,20 +21,8 @@ contains
end if
call a%psb_s_ell_sparse_mat%allocate(m, n, nz_)
-
- if (.not.allocated(a%val)) then
- allocate(a%val(m, nz_))
- allocate(a%ja(m, nz_))
- allocate(a%irn(m))
- allocate(a%idiag(m))
- end if
-
- a%val = szero
- a%ja = -1
- a%irn = 0
- a%idiag = 0
-
- call a%set_dev()
+ call a%sync_dev_space()
+ call a%set_host()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
diff --git a/openacc/impl/psb_s_oacc_ell_cp_from_coo.F90 b/openacc/impl/psb_s_oacc_ell_cp_from_coo.F90
index 0f6fbc48..5a607370 100644
--- a/openacc/impl/psb_s_oacc_ell_cp_from_coo.F90
+++ b/openacc/impl/psb_s_oacc_ell_cp_from_coo.F90
@@ -10,10 +10,10 @@ contains
info = psb_success_
- call a%free_space()
+ call a%free_dev_space()
call a%psb_s_ell_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
diff --git a/openacc/impl/psb_s_oacc_ell_cp_from_fmt.F90 b/openacc/impl/psb_s_oacc_ell_cp_from_fmt.F90
index 793c2779..47ce5241 100644
--- a/openacc/impl/psb_s_oacc_ell_cp_from_fmt.F90
+++ b/openacc/impl/psb_s_oacc_ell_cp_from_fmt.F90
@@ -14,10 +14,10 @@ contains
type is (psb_s_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
- call a%free_space()
+ call a%free_dev_space()
call a%psb_s_ell_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
end select
diff --git a/openacc/impl/psb_s_oacc_ell_inner_vect_sv.F90 b/openacc/impl/psb_s_oacc_ell_inner_vect_sv.F90
index ba42af12..5fc672d0 100644
--- a/openacc/impl/psb_s_oacc_ell_inner_vect_sv.F90
+++ b/openacc/impl/psb_s_oacc_ell_inner_vect_sv.F90
@@ -40,9 +40,9 @@ contains
call a%psb_s_ell_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans)
call y%set_host()
else
- select type (xx => x)
+ select type (xx => x)
type is (psb_s_vect_oacc)
- select type(yy => y)
+ select type(yy => y)
type is (psb_s_vect_oacc)
if (xx%is_host()) call xx%sync()
if (beta /= dzero) then
diff --git a/openacc/impl/psb_s_oacc_ell_mv_from_coo.F90 b/openacc/impl/psb_s_oacc_ell_mv_from_coo.F90
index ba82049f..736d4253 100644
--- a/openacc/impl/psb_s_oacc_ell_mv_from_coo.F90
+++ b/openacc/impl/psb_s_oacc_ell_mv_from_coo.F90
@@ -10,10 +10,10 @@ contains
info = psb_success_
- call a%free_space()
+ call a%free_dev_space()
call a%psb_s_ell_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
diff --git a/openacc/impl/psb_s_oacc_ell_mv_from_fmt.F90 b/openacc/impl/psb_s_oacc_ell_mv_from_fmt.F90
index df789664..d8d13aa2 100644
--- a/openacc/impl/psb_s_oacc_ell_mv_from_fmt.F90
+++ b/openacc/impl/psb_s_oacc_ell_mv_from_fmt.F90
@@ -14,10 +14,10 @@ contains
type is (psb_s_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
- call a%free_space()
+ call a%free_dev_space()
call a%psb_s_ell_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
end select
diff --git a/openacc/impl/psb_s_oacc_ell_reallocate_nz.F90 b/openacc/impl/psb_s_oacc_ell_reallocate_nz.F90
index 373c2b67..34036cdb 100644
--- a/openacc/impl/psb_s_oacc_ell_reallocate_nz.F90
+++ b/openacc/impl/psb_s_oacc_ell_reallocate_nz.F90
@@ -14,8 +14,8 @@ contains
info = psb_success_
call a%psb_s_ell_sparse_mat%reallocate(nz)
-
- call a%set_dev()
+ call a%sync_dev_space()
+ call a%set_host()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
diff --git a/openacc/impl/psb_s_oacc_hll_allocate_mnnz.F90 b/openacc/impl/psb_s_oacc_hll_allocate_mnnz.F90
index c67ea621..1db3e55d 100644
--- a/openacc/impl/psb_s_oacc_hll_allocate_mnnz.F90
+++ b/openacc/impl/psb_s_oacc_hll_allocate_mnnz.F90
@@ -22,25 +22,8 @@ contains
end if
call a%psb_s_hll_sparse_mat%allocate(m, n, nz_)
-
- hksz = a%hksz
- nhacks = (m + hksz - 1) / hksz
-
- if (.not.allocated(a%val)) then
- allocate(a%val(nz_ * m))
- allocate(a%ja(nz_ * m))
- allocate(a%irn(m))
- allocate(a%idiag(m))
- allocate(a%hkoffs(nhacks))
- end if
-
- a%val = szero
- a%ja = -1
- a%irn = 0
- a%idiag = 0
- a%hkoffs = 0
-
- call a%set_dev()
+ call a%sync_dev_space()
+ call a%set_host()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
diff --git a/openacc/impl/psb_s_oacc_hll_cp_from_coo.F90 b/openacc/impl/psb_s_oacc_hll_cp_from_coo.F90
index dfba3c6c..ca4afeb7 100644
--- a/openacc/impl/psb_s_oacc_hll_cp_from_coo.F90
+++ b/openacc/impl/psb_s_oacc_hll_cp_from_coo.F90
@@ -10,10 +10,10 @@ contains
info = psb_success_
- call a%free_space()
+ call a%free_dev_space()
call a%psb_s_hll_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
diff --git a/openacc/impl/psb_s_oacc_hll_cp_from_fmt.F90 b/openacc/impl/psb_s_oacc_hll_cp_from_fmt.F90
index 849e03b7..47a41b8d 100644
--- a/openacc/impl/psb_s_oacc_hll_cp_from_fmt.F90
+++ b/openacc/impl/psb_s_oacc_hll_cp_from_fmt.F90
@@ -14,10 +14,10 @@ contains
type is (psb_s_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
- call a%free_space()
+ call a%free_dev_space()
call a%psb_s_hll_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
end select
diff --git a/openacc/impl/psb_s_oacc_hll_inner_vect_sv.F90 b/openacc/impl/psb_s_oacc_hll_inner_vect_sv.F90
index 900b8982..ea81574b 100644
--- a/openacc/impl/psb_s_oacc_hll_inner_vect_sv.F90
+++ b/openacc/impl/psb_s_oacc_hll_inner_vect_sv.F90
@@ -40,9 +40,9 @@ contains
call a%psb_s_hll_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans)
call y%set_host()
else
- select type (xx => x)
+ select type (xx => x)
type is (psb_s_vect_oacc)
- select type(yy => y)
+ select type(yy => y)
type is (psb_s_vect_oacc)
if (xx%is_host()) call xx%sync()
if (beta /= dzero) then
diff --git a/openacc/impl/psb_s_oacc_hll_mv_from_coo.F90 b/openacc/impl/psb_s_oacc_hll_mv_from_coo.F90
index c22818fa..c26c5018 100644
--- a/openacc/impl/psb_s_oacc_hll_mv_from_coo.F90
+++ b/openacc/impl/psb_s_oacc_hll_mv_from_coo.F90
@@ -10,10 +10,10 @@ contains
info = psb_success_
- call a%free_space()
+ call a%free_dev_space()
call a%psb_s_hll_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
diff --git a/openacc/impl/psb_s_oacc_hll_mv_from_fmt.F90 b/openacc/impl/psb_s_oacc_hll_mv_from_fmt.F90
index 992b1c7b..dd04bbab 100644
--- a/openacc/impl/psb_s_oacc_hll_mv_from_fmt.F90
+++ b/openacc/impl/psb_s_oacc_hll_mv_from_fmt.F90
@@ -14,10 +14,10 @@ contains
type is (psb_s_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
- call a%free_space()
+ call a%free_dev_space()
call a%psb_s_hll_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
end select
diff --git a/openacc/impl/psb_s_oacc_hll_reallocate_nz.F90 b/openacc/impl/psb_s_oacc_hll_reallocate_nz.F90
index 7768d1ed..242ffe0d 100644
--- a/openacc/impl/psb_s_oacc_hll_reallocate_nz.F90
+++ b/openacc/impl/psb_s_oacc_hll_reallocate_nz.F90
@@ -15,8 +15,8 @@ contains
info = psb_success_
call a%psb_s_hll_sparse_mat%reallocate(nz)
-
- call a%set_dev()
+ call a%sync_dev_space()
+ call a%set_host()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
diff --git a/openacc/impl/psb_s_oacc_mlt_v.f90 b/openacc/impl/psb_s_oacc_mlt_v.f90
index 61a1d152..11dcac6c 100644
--- a/openacc/impl/psb_s_oacc_mlt_v.f90
+++ b/openacc/impl/psb_s_oacc_mlt_v.f90
@@ -1,6 +1,6 @@
subroutine psb_s_oacc_mlt_v(x, y, info)
- use psb_s_oacc_vect_mod, psb_protect_name => psb_s_oacc_mlt_v
+ use psb_s_oacc_vect_mod, psb_protect_name => psb_s_oacc_mlt_v
implicit none
class(psb_s_base_vect_type), intent(inout) :: x
@@ -13,7 +13,7 @@ subroutine psb_s_oacc_mlt_v(x, y, info)
n = min(x%get_nrows(), y%get_nrows())
info = 0
n = min(x%get_nrows(), y%get_nrows())
- select type(xx => x)
+ select type(xx => x)
class is (psb_s_vect_oacc)
if (y%is_host()) call y%sync()
if (xx%is_host()) call xx%sync()
diff --git a/openacc/impl/psb_s_oacc_mlt_v_2.f90 b/openacc/impl/psb_s_oacc_mlt_v_2.f90
index bcaebfbe..2ce7fe53 100644
--- a/openacc/impl/psb_s_oacc_mlt_v_2.f90
+++ b/openacc/impl/psb_s_oacc_mlt_v_2.f90
@@ -1,5 +1,5 @@
subroutine psb_s_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
- use psb_s_oacc_vect_mod, psb_protect_name => psb_s_oacc_mlt_v_2
+ use psb_s_oacc_vect_mod, psb_protect_name => psb_s_oacc_mlt_v_2
use psb_string_mod
implicit none
real(psb_spk_), intent(in) :: alpha, beta
@@ -18,9 +18,9 @@ subroutine psb_s_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
n = min(x%get_nrows(), y%get_nrows(), z%get_nrows())
info = 0
- select type(xx => x)
+ select type(xx => x)
class is (psb_s_vect_oacc)
- select type (yy => y)
+ select type (yy => y)
class is (psb_s_vect_oacc)
if (xx%is_host()) call xx%sync()
if (yy%is_host()) call yy%sync()
diff --git a/openacc/impl/psb_z_oacc_csr_allocate_mnnz.F90 b/openacc/impl/psb_z_oacc_csr_allocate_mnnz.F90
index fd19d6f9..b7ba4bad 100644
--- a/openacc/impl/psb_z_oacc_csr_allocate_mnnz.F90
+++ b/openacc/impl/psb_z_oacc_csr_allocate_mnnz.F90
@@ -15,14 +15,8 @@ contains
info = psb_success_
call a%psb_z_csr_sparse_mat%allocate(m, n, nz)
-
- if (.not.allocated(a%val)) then
- allocate(a%val(nz))
- allocate(a%ja(nz))
- allocate(a%irp(m+1))
- end if
-
- call a%set_dev()
+ call a%set_host()
+ call a%sync_dev_space()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
diff --git a/openacc/impl/psb_z_oacc_csr_cp_from_coo.F90 b/openacc/impl/psb_z_oacc_csr_cp_from_coo.F90
index 6c40d0d2..ef1b5b73 100644
--- a/openacc/impl/psb_z_oacc_csr_cp_from_coo.F90
+++ b/openacc/impl/psb_z_oacc_csr_cp_from_coo.F90
@@ -10,10 +10,10 @@ contains
info = psb_success_
- call a%free_space()
+ call a%free_dev_space()
call a%psb_z_csr_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
diff --git a/openacc/impl/psb_z_oacc_csr_cp_from_fmt.F90 b/openacc/impl/psb_z_oacc_csr_cp_from_fmt.F90
index 3025fde2..7c3470a2 100644
--- a/openacc/impl/psb_z_oacc_csr_cp_from_fmt.F90
+++ b/openacc/impl/psb_z_oacc_csr_cp_from_fmt.F90
@@ -14,10 +14,10 @@ contains
type is (psb_z_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
- call a%free_space()
+ call a%free_dev_space()
call a%psb_z_csr_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
end select
diff --git a/openacc/impl/psb_z_oacc_csr_inner_vect_sv.F90 b/openacc/impl/psb_z_oacc_csr_inner_vect_sv.F90
index 4975b276..25715903 100644
--- a/openacc/impl/psb_z_oacc_csr_inner_vect_sv.F90
+++ b/openacc/impl/psb_z_oacc_csr_inner_vect_sv.F90
@@ -40,9 +40,9 @@ contains
call a%psb_z_csr_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans)
call y%set_host()
else
- select type (xx => x)
+ select type (xx => x)
type is (psb_z_vect_oacc)
- select type(yy => y)
+ select type(yy => y)
type is (psb_z_vect_oacc)
if (xx%is_host()) call xx%sync()
if (beta /= dzero) then
diff --git a/openacc/impl/psb_z_oacc_csr_mv_from_coo.F90 b/openacc/impl/psb_z_oacc_csr_mv_from_coo.F90
index 6dae625a..d2b9ee48 100644
--- a/openacc/impl/psb_z_oacc_csr_mv_from_coo.F90
+++ b/openacc/impl/psb_z_oacc_csr_mv_from_coo.F90
@@ -10,10 +10,10 @@ contains
info = psb_success_
- call a%free_space()
+ call a%free_dev_space()
call a%psb_z_csr_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
diff --git a/openacc/impl/psb_z_oacc_csr_mv_from_fmt.F90 b/openacc/impl/psb_z_oacc_csr_mv_from_fmt.F90
index 1d7dd723..af7fb1da 100644
--- a/openacc/impl/psb_z_oacc_csr_mv_from_fmt.F90
+++ b/openacc/impl/psb_z_oacc_csr_mv_from_fmt.F90
@@ -14,10 +14,10 @@ contains
type is (psb_z_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
- call a%free_space()
+ call a%free_dev_space()
call a%psb_z_csr_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
end select
diff --git a/openacc/impl/psb_z_oacc_csr_reallocate_nz.F90 b/openacc/impl/psb_z_oacc_csr_reallocate_nz.F90
index bdfb88d6..4cfea07c 100644
--- a/openacc/impl/psb_z_oacc_csr_reallocate_nz.F90
+++ b/openacc/impl/psb_z_oacc_csr_reallocate_nz.F90
@@ -14,8 +14,8 @@ contains
info = psb_success_
call a%psb_z_csr_sparse_mat%reallocate(nz)
-
- call a%set_dev()
+ call a%sync_dev_space()
+ call a%set_host()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
diff --git a/openacc/impl/psb_z_oacc_ell_allocate_mnnz.F90 b/openacc/impl/psb_z_oacc_ell_allocate_mnnz.F90
index 48a5e202..aa8a03cd 100644
--- a/openacc/impl/psb_z_oacc_ell_allocate_mnnz.F90
+++ b/openacc/impl/psb_z_oacc_ell_allocate_mnnz.F90
@@ -21,20 +21,8 @@ contains
end if
call a%psb_z_ell_sparse_mat%allocate(m, n, nz_)
-
- if (.not.allocated(a%val)) then
- allocate(a%val(m, nz_))
- allocate(a%ja(m, nz_))
- allocate(a%irn(m))
- allocate(a%idiag(m))
- end if
-
- a%val = zzero
- a%ja = -1
- a%irn = 0
- a%idiag = 0
-
- call a%set_dev()
+ call a%sync_dev_space()
+ call a%set_host()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
diff --git a/openacc/impl/psb_z_oacc_ell_cp_from_coo.F90 b/openacc/impl/psb_z_oacc_ell_cp_from_coo.F90
index 6dd60bd7..a3129ff6 100644
--- a/openacc/impl/psb_z_oacc_ell_cp_from_coo.F90
+++ b/openacc/impl/psb_z_oacc_ell_cp_from_coo.F90
@@ -10,10 +10,10 @@ contains
info = psb_success_
- call a%free_space()
+ call a%free_dev_space()
call a%psb_z_ell_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
diff --git a/openacc/impl/psb_z_oacc_ell_cp_from_fmt.F90 b/openacc/impl/psb_z_oacc_ell_cp_from_fmt.F90
index 60d94bb2..f9a52d62 100644
--- a/openacc/impl/psb_z_oacc_ell_cp_from_fmt.F90
+++ b/openacc/impl/psb_z_oacc_ell_cp_from_fmt.F90
@@ -14,10 +14,10 @@ contains
type is (psb_z_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
- call a%free_space()
+ call a%free_dev_space()
call a%psb_z_ell_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
end select
diff --git a/openacc/impl/psb_z_oacc_ell_inner_vect_sv.F90 b/openacc/impl/psb_z_oacc_ell_inner_vect_sv.F90
index f445a6b4..1cbe249c 100644
--- a/openacc/impl/psb_z_oacc_ell_inner_vect_sv.F90
+++ b/openacc/impl/psb_z_oacc_ell_inner_vect_sv.F90
@@ -40,9 +40,9 @@ contains
call a%psb_z_ell_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans)
call y%set_host()
else
- select type (xx => x)
+ select type (xx => x)
type is (psb_z_vect_oacc)
- select type(yy => y)
+ select type(yy => y)
type is (psb_z_vect_oacc)
if (xx%is_host()) call xx%sync()
if (beta /= dzero) then
diff --git a/openacc/impl/psb_z_oacc_ell_mv_from_coo.F90 b/openacc/impl/psb_z_oacc_ell_mv_from_coo.F90
index db70b944..9abb1a74 100644
--- a/openacc/impl/psb_z_oacc_ell_mv_from_coo.F90
+++ b/openacc/impl/psb_z_oacc_ell_mv_from_coo.F90
@@ -10,10 +10,10 @@ contains
info = psb_success_
- call a%free_space()
+ call a%free_dev_space()
call a%psb_z_ell_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
diff --git a/openacc/impl/psb_z_oacc_ell_mv_from_fmt.F90 b/openacc/impl/psb_z_oacc_ell_mv_from_fmt.F90
index f99b3817..eb49b21a 100644
--- a/openacc/impl/psb_z_oacc_ell_mv_from_fmt.F90
+++ b/openacc/impl/psb_z_oacc_ell_mv_from_fmt.F90
@@ -14,10 +14,10 @@ contains
type is (psb_z_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
- call a%free_space()
+ call a%free_dev_space()
call a%psb_z_ell_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
end select
diff --git a/openacc/impl/psb_z_oacc_ell_reallocate_nz.F90 b/openacc/impl/psb_z_oacc_ell_reallocate_nz.F90
index 8fd3ad77..c7c88c65 100644
--- a/openacc/impl/psb_z_oacc_ell_reallocate_nz.F90
+++ b/openacc/impl/psb_z_oacc_ell_reallocate_nz.F90
@@ -14,8 +14,8 @@ contains
info = psb_success_
call a%psb_z_ell_sparse_mat%reallocate(nz)
-
- call a%set_dev()
+ call a%sync_dev_space()
+ call a%set_host()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
diff --git a/openacc/impl/psb_z_oacc_hll_allocate_mnnz.F90 b/openacc/impl/psb_z_oacc_hll_allocate_mnnz.F90
index c398d259..fb84dfb7 100644
--- a/openacc/impl/psb_z_oacc_hll_allocate_mnnz.F90
+++ b/openacc/impl/psb_z_oacc_hll_allocate_mnnz.F90
@@ -22,25 +22,8 @@ contains
end if
call a%psb_z_hll_sparse_mat%allocate(m, n, nz_)
-
- hksz = a%hksz
- nhacks = (m + hksz - 1) / hksz
-
- if (.not.allocated(a%val)) then
- allocate(a%val(nz_ * m))
- allocate(a%ja(nz_ * m))
- allocate(a%irn(m))
- allocate(a%idiag(m))
- allocate(a%hkoffs(nhacks))
- end if
-
- a%val = zzero
- a%ja = -1
- a%irn = 0
- a%idiag = 0
- a%hkoffs = 0
-
- call a%set_dev()
+ call a%sync_dev_space()
+ call a%set_host()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
diff --git a/openacc/impl/psb_z_oacc_hll_cp_from_coo.F90 b/openacc/impl/psb_z_oacc_hll_cp_from_coo.F90
index e018e762..9fb903ca 100644
--- a/openacc/impl/psb_z_oacc_hll_cp_from_coo.F90
+++ b/openacc/impl/psb_z_oacc_hll_cp_from_coo.F90
@@ -10,10 +10,10 @@ contains
info = psb_success_
- call a%free_space()
+ call a%free_dev_space()
call a%psb_z_hll_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
diff --git a/openacc/impl/psb_z_oacc_hll_cp_from_fmt.F90 b/openacc/impl/psb_z_oacc_hll_cp_from_fmt.F90
index 7b18b255..3a77865d 100644
--- a/openacc/impl/psb_z_oacc_hll_cp_from_fmt.F90
+++ b/openacc/impl/psb_z_oacc_hll_cp_from_fmt.F90
@@ -14,10 +14,10 @@ contains
type is (psb_z_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
- call a%free_space()
+ call a%free_dev_space()
call a%psb_z_hll_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
end select
diff --git a/openacc/impl/psb_z_oacc_hll_inner_vect_sv.F90 b/openacc/impl/psb_z_oacc_hll_inner_vect_sv.F90
index 1d068542..a5bd54ad 100644
--- a/openacc/impl/psb_z_oacc_hll_inner_vect_sv.F90
+++ b/openacc/impl/psb_z_oacc_hll_inner_vect_sv.F90
@@ -40,9 +40,9 @@ contains
call a%psb_z_hll_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans)
call y%set_host()
else
- select type (xx => x)
+ select type (xx => x)
type is (psb_z_vect_oacc)
- select type(yy => y)
+ select type(yy => y)
type is (psb_z_vect_oacc)
if (xx%is_host()) call xx%sync()
if (beta /= dzero) then
diff --git a/openacc/impl/psb_z_oacc_hll_mv_from_coo.F90 b/openacc/impl/psb_z_oacc_hll_mv_from_coo.F90
index 267f13f3..46d4d6bd 100644
--- a/openacc/impl/psb_z_oacc_hll_mv_from_coo.F90
+++ b/openacc/impl/psb_z_oacc_hll_mv_from_coo.F90
@@ -10,10 +10,10 @@ contains
info = psb_success_
- call a%free_space()
+ call a%free_dev_space()
call a%psb_z_hll_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
diff --git a/openacc/impl/psb_z_oacc_hll_mv_from_fmt.F90 b/openacc/impl/psb_z_oacc_hll_mv_from_fmt.F90
index 151dc6ce..45e9846a 100644
--- a/openacc/impl/psb_z_oacc_hll_mv_from_fmt.F90
+++ b/openacc/impl/psb_z_oacc_hll_mv_from_fmt.F90
@@ -14,10 +14,10 @@ contains
type is (psb_z_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
- call a%free_space()
+ call a%free_dev_space()
call a%psb_z_hll_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
- call a%sync_space()
+ call a%sync_dev_space()
call a%set_host()
call a%sync()
end select
diff --git a/openacc/impl/psb_z_oacc_hll_reallocate_nz.F90 b/openacc/impl/psb_z_oacc_hll_reallocate_nz.F90
index 5b49efe5..7d9589cb 100644
--- a/openacc/impl/psb_z_oacc_hll_reallocate_nz.F90
+++ b/openacc/impl/psb_z_oacc_hll_reallocate_nz.F90
@@ -15,8 +15,8 @@ contains
info = psb_success_
call a%psb_z_hll_sparse_mat%reallocate(nz)
-
- call a%set_dev()
+ call a%sync_dev_space()
+ call a%set_host()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
diff --git a/openacc/impl/psb_z_oacc_mlt_v.f90 b/openacc/impl/psb_z_oacc_mlt_v.f90
index 4bc582d2..3e8f6030 100644
--- a/openacc/impl/psb_z_oacc_mlt_v.f90
+++ b/openacc/impl/psb_z_oacc_mlt_v.f90
@@ -1,6 +1,6 @@
subroutine psb_z_oacc_mlt_v(x, y, info)
- use psb_z_oacc_vect_mod, psb_protect_name => psb_z_oacc_mlt_v
+ use psb_z_oacc_vect_mod, psb_protect_name => psb_z_oacc_mlt_v
implicit none
class(psb_z_base_vect_type), intent(inout) :: x
@@ -13,7 +13,7 @@ subroutine psb_z_oacc_mlt_v(x, y, info)
n = min(x%get_nrows(), y%get_nrows())
info = 0
n = min(x%get_nrows(), y%get_nrows())
- select type(xx => x)
+ select type(xx => x)
class is (psb_z_vect_oacc)
if (y%is_host()) call y%sync()
if (xx%is_host()) call xx%sync()
diff --git a/openacc/impl/psb_z_oacc_mlt_v_2.f90 b/openacc/impl/psb_z_oacc_mlt_v_2.f90
index 337a0a96..f69d863c 100644
--- a/openacc/impl/psb_z_oacc_mlt_v_2.f90
+++ b/openacc/impl/psb_z_oacc_mlt_v_2.f90
@@ -1,5 +1,5 @@
subroutine psb_z_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
- use psb_z_oacc_vect_mod, psb_protect_name => psb_z_oacc_mlt_v_2
+ use psb_z_oacc_vect_mod, psb_protect_name => psb_z_oacc_mlt_v_2
use psb_string_mod
implicit none
complex(psb_dpk_), intent(in) :: alpha, beta
@@ -18,9 +18,9 @@ subroutine psb_z_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
n = min(x%get_nrows(), y%get_nrows(), z%get_nrows())
info = 0
- select type(xx => x)
+ select type(xx => x)
class is (psb_z_vect_oacc)
- select type (yy => y)
+ select type (yy => y)
class is (psb_z_vect_oacc)
if (xx%is_host()) call xx%sync()
if (yy%is_host()) call yy%sync()
diff --git a/openacc/psb_c_oacc_csr_mat_mod.F90 b/openacc/psb_c_oacc_csr_mat_mod.F90
index 07734762..a1f6e2c3 100644
--- a/openacc/psb_c_oacc_csr_mat_mod.F90
+++ b/openacc/psb_c_oacc_csr_mat_mod.F90
@@ -13,32 +13,32 @@ module psb_c_oacc_csr_mat_mod
type, extends(psb_c_csr_sparse_mat) :: psb_c_oacc_csr_sparse_mat
integer(psb_ipk_) :: devstate = is_host
contains
- procedure, nopass :: get_fmt => c_oacc_csr_get_fmt
- procedure, pass(a) :: sizeof => c_oacc_csr_sizeof
- procedure, pass(a) :: vect_mv => psb_c_oacc_csr_vect_mv
- procedure, pass(a) :: in_vect_sv => psb_c_oacc_csr_inner_vect_sv
- procedure, pass(a) :: csmm => psb_c_oacc_csr_csmm
- procedure, pass(a) :: csmv => psb_c_oacc_csr_csmv
- procedure, pass(a) :: scals => psb_c_oacc_csr_scals
- procedure, pass(a) :: scalv => psb_c_oacc_csr_scal
- procedure, pass(a) :: reallocate_nz => psb_c_oacc_csr_reallocate_nz
- procedure, pass(a) :: allocate_mnnz => psb_c_oacc_csr_allocate_mnnz
- procedure, pass(a) :: cp_from_coo => psb_c_oacc_csr_cp_from_coo
- procedure, pass(a) :: cp_from_fmt => psb_c_oacc_csr_cp_from_fmt
- procedure, pass(a) :: mv_from_coo => psb_c_oacc_csr_mv_from_coo
- procedure, pass(a) :: mv_from_fmt => psb_c_oacc_csr_mv_from_fmt
- procedure, pass(a) :: free => c_oacc_csr_free
- procedure, pass(a) :: mold => psb_c_oacc_csr_mold
- procedure, pass(a) :: all => c_oacc_csr_all
- procedure, pass(a) :: is_host => c_oacc_csr_is_host
- procedure, pass(a) :: is_sync => c_oacc_csr_is_sync
- procedure, pass(a) :: is_dev => c_oacc_csr_is_dev
- procedure, pass(a) :: set_host => c_oacc_csr_set_host
- procedure, pass(a) :: set_sync => c_oacc_csr_set_sync
- procedure, pass(a) :: set_dev => c_oacc_csr_set_dev
- procedure, pass(a) :: free_space => c_oacc_csr_free_space
- procedure, pass(a) :: sync_space => c_oacc_csr_sync_space
- procedure, pass(a) :: sync => c_oacc_csr_sync
+ procedure, nopass :: get_fmt => c_oacc_csr_get_fmt
+ procedure, pass(a) :: sizeof => c_oacc_csr_sizeof
+ procedure, pass(a) :: vect_mv => psb_c_oacc_csr_vect_mv
+ procedure, pass(a) :: in_vect_sv => psb_c_oacc_csr_inner_vect_sv
+ procedure, pass(a) :: csmm => psb_c_oacc_csr_csmm
+ procedure, pass(a) :: csmv => psb_c_oacc_csr_csmv
+ procedure, pass(a) :: scals => psb_c_oacc_csr_scals
+ procedure, pass(a) :: scalv => psb_c_oacc_csr_scal
+ procedure, pass(a) :: reallocate_nz => psb_c_oacc_csr_reallocate_nz
+ procedure, pass(a) :: allocate_mnnz => psb_c_oacc_csr_allocate_mnnz
+ procedure, pass(a) :: cp_from_coo => psb_c_oacc_csr_cp_from_coo
+ procedure, pass(a) :: cp_from_fmt => psb_c_oacc_csr_cp_from_fmt
+ procedure, pass(a) :: mv_from_coo => psb_c_oacc_csr_mv_from_coo
+ procedure, pass(a) :: mv_from_fmt => psb_c_oacc_csr_mv_from_fmt
+ procedure, pass(a) :: free => c_oacc_csr_free
+ procedure, pass(a) :: mold => psb_c_oacc_csr_mold
+ procedure, pass(a) :: all => c_oacc_csr_all
+ procedure, pass(a) :: is_host => c_oacc_csr_is_host
+ procedure, pass(a) :: is_sync => c_oacc_csr_is_sync
+ procedure, pass(a) :: is_dev => c_oacc_csr_is_dev
+ procedure, pass(a) :: set_host => c_oacc_csr_set_host
+ procedure, pass(a) :: set_sync => c_oacc_csr_set_sync
+ procedure, pass(a) :: set_dev => c_oacc_csr_set_dev
+ procedure, pass(a) :: free_dev_space => c_oacc_csr_free_dev_space
+ procedure, pass(a) :: sync_dev_space => c_oacc_csr_sync_dev_space
+ procedure, pass(a) :: sync => c_oacc_csr_sync
end type psb_c_oacc_csr_sparse_mat
interface
@@ -156,7 +156,7 @@ module psb_c_oacc_csr_mat_mod
contains
- subroutine c_oacc_csr_free_space(a)
+ subroutine c_oacc_csr_free_dev_space(a)
use psb_base_mod
implicit none
class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
@@ -167,7 +167,7 @@ contains
if (allocated(a%irp)) call acc_delete_finalize(a%irp)
return
- end subroutine c_oacc_csr_free_space
+ end subroutine c_oacc_csr_free_dev_space
subroutine c_oacc_csr_free(a)
use psb_base_mod
@@ -175,7 +175,7 @@ contains
class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
- call a%free_space()
+ call a%free_dev_space()
call a%psb_c_csr_sparse_mat%free()
return
@@ -218,7 +218,7 @@ contains
allocate(a%ja(nz),stat=info)
allocate(a%irp(m+1),stat=info)
if (info == 0) call a%set_host()
- if (info == 0) call a%sync_space()
+ if (info == 0) call a%sync_dev_space()
end subroutine c_oacc_csr_all
function c_oacc_csr_is_host(a) result(res)
@@ -266,13 +266,13 @@ contains
a%devstate = is_dev
end subroutine c_oacc_csr_set_dev
- subroutine c_oacc_csr_sync_space(a)
+ subroutine c_oacc_csr_sync_dev_space(a)
implicit none
class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
if (allocated(a%val)) call acc_create(a%val)
if (allocated(a%ja)) call acc_create(a%ja)
if (allocated(a%irp)) call acc_create(a%irp)
- end subroutine c_oacc_csr_sync_space
+ end subroutine c_oacc_csr_sync_dev_space
subroutine c_oacc_csr_sync(a)
implicit none
@@ -280,7 +280,7 @@ contains
class(psb_c_oacc_csr_sparse_mat), pointer :: tmpa
integer(psb_ipk_) :: info
- tmpa => a
+ tmpa => a
if (a%is_dev()) then
call acc_update_self(a%val)
call acc_update_self(a%ja)
diff --git a/openacc/psb_c_oacc_ell_mat_mod.F90 b/openacc/psb_c_oacc_ell_mat_mod.F90
index d23d4d0a..996d4628 100644
--- a/openacc/psb_c_oacc_ell_mat_mod.F90
+++ b/openacc/psb_c_oacc_ell_mat_mod.F90
@@ -12,31 +12,31 @@ module psb_c_oacc_ell_mat_mod
type, extends(psb_c_ell_sparse_mat) :: psb_c_oacc_ell_sparse_mat
integer(psb_ipk_) :: devstate = is_host
contains
- procedure, nopass :: get_fmt => c_oacc_ell_get_fmt
- procedure, pass(a) :: sizeof => c_oacc_ell_sizeof
- procedure, pass(a) :: is_host => c_oacc_ell_is_host
- procedure, pass(a) :: is_sync => c_oacc_ell_is_sync
- procedure, pass(a) :: is_dev => c_oacc_ell_is_dev
- procedure, pass(a) :: set_host => c_oacc_ell_set_host
- procedure, pass(a) :: set_sync => c_oacc_ell_set_sync
- procedure, pass(a) :: set_dev => c_oacc_ell_set_dev
- procedure, pass(a) :: sync_space => c_oacc_ell_sync_space
- procedure, pass(a) :: sync => c_oacc_ell_sync
- procedure, pass(a) :: free_space => c_oacc_ell_free_space
- procedure, pass(a) :: free => c_oacc_ell_free
- procedure, pass(a) :: vect_mv => psb_c_oacc_ell_vect_mv
- procedure, pass(a) :: in_vect_sv => psb_c_oacc_ell_inner_vect_sv
- procedure, pass(a) :: csmm => psb_c_oacc_ell_csmm
- procedure, pass(a) :: csmv => psb_c_oacc_ell_csmv
- procedure, pass(a) :: scals => psb_c_oacc_ell_scals
- procedure, pass(a) :: scalv => psb_c_oacc_ell_scal
- procedure, pass(a) :: reallocate_nz => psb_c_oacc_ell_reallocate_nz
- procedure, pass(a) :: allocate_mnnz => psb_c_oacc_ell_allocate_mnnz
- procedure, pass(a) :: cp_from_coo => psb_c_oacc_ell_cp_from_coo
- procedure, pass(a) :: cp_from_fmt => psb_c_oacc_ell_cp_from_fmt
- procedure, pass(a) :: mv_from_coo => psb_c_oacc_ell_mv_from_coo
- procedure, pass(a) :: mv_from_fmt => psb_c_oacc_ell_mv_from_fmt
- procedure, pass(a) :: mold => psb_c_oacc_ell_mold
+ procedure, nopass :: get_fmt => c_oacc_ell_get_fmt
+ procedure, pass(a) :: sizeof => c_oacc_ell_sizeof
+ procedure, pass(a) :: is_host => c_oacc_ell_is_host
+ procedure, pass(a) :: is_sync => c_oacc_ell_is_sync
+ procedure, pass(a) :: is_dev => c_oacc_ell_is_dev
+ procedure, pass(a) :: set_host => c_oacc_ell_set_host
+ procedure, pass(a) :: set_sync => c_oacc_ell_set_sync
+ procedure, pass(a) :: set_dev => c_oacc_ell_set_dev
+ procedure, pass(a) :: sync_dev_space => c_oacc_ell_sync_dev_space
+ procedure, pass(a) :: sync => c_oacc_ell_sync
+ procedure, pass(a) :: free_dev_space => c_oacc_ell_free_dev_space
+ procedure, pass(a) :: free => c_oacc_ell_free
+ procedure, pass(a) :: vect_mv => psb_c_oacc_ell_vect_mv
+ procedure, pass(a) :: in_vect_sv => psb_c_oacc_ell_inner_vect_sv
+ procedure, pass(a) :: csmm => psb_c_oacc_ell_csmm
+ procedure, pass(a) :: csmv => psb_c_oacc_ell_csmv
+ procedure, pass(a) :: scals => psb_c_oacc_ell_scals
+ procedure, pass(a) :: scalv => psb_c_oacc_ell_scal
+ procedure, pass(a) :: reallocate_nz => psb_c_oacc_ell_reallocate_nz
+ procedure, pass(a) :: allocate_mnnz => psb_c_oacc_ell_allocate_mnnz
+ procedure, pass(a) :: cp_from_coo => psb_c_oacc_ell_cp_from_coo
+ procedure, pass(a) :: cp_from_fmt => psb_c_oacc_ell_cp_from_fmt
+ procedure, pass(a) :: mv_from_coo => psb_c_oacc_ell_mv_from_coo
+ procedure, pass(a) :: mv_from_fmt => psb_c_oacc_ell_mv_from_fmt
+ procedure, pass(a) :: mold => psb_c_oacc_ell_mold
end type psb_c_oacc_ell_sparse_mat
@@ -154,7 +154,7 @@ module psb_c_oacc_ell_mat_mod
contains
- subroutine c_oacc_ell_free_space(a)
+ subroutine c_oacc_ell_free_dev_space(a)
use psb_base_mod
implicit none
class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
@@ -166,7 +166,7 @@ contains
if (allocated(a%idiag)) call acc_delete_finalize(a%idiag)
return
- end subroutine c_oacc_ell_free_space
+ end subroutine c_oacc_ell_free_dev_space
subroutine c_oacc_ell_free(a)
use psb_base_mod
@@ -174,7 +174,7 @@ contains
class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
- call a%free_space()
+ call a%free_dev_space()
call a%psb_c_ell_sparse_mat%free()
return
@@ -195,7 +195,7 @@ contains
end function c_oacc_ell_sizeof
- subroutine c_oacc_ell_sync_space(a)
+ subroutine c_oacc_ell_sync_dev_space(a)
implicit none
class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
@@ -203,7 +203,7 @@ contains
if (allocated(a%ja)) call acc_create(a%ja)
if (allocated(a%irn)) call acc_create(a%irn)
if (allocated(a%idiag)) call acc_create(a%idiag)
- end subroutine c_oacc_ell_sync_space
+ end subroutine c_oacc_ell_sync_dev_space
function c_oacc_ell_is_host(a) result(res)
implicit none
@@ -262,7 +262,7 @@ contains
class(psb_c_oacc_ell_sparse_mat), pointer :: tmpa
integer(psb_ipk_) :: info
- tmpa => a
+ tmpa => a
if (a%is_dev()) then
call acc_update_self(a%val)
call acc_update_self(a%ja)
diff --git a/openacc/psb_c_oacc_hll_mat_mod.F90 b/openacc/psb_c_oacc_hll_mat_mod.F90
index 2d2095bf..ebcbf29e 100644
--- a/openacc/psb_c_oacc_hll_mat_mod.F90
+++ b/openacc/psb_c_oacc_hll_mat_mod.F90
@@ -12,31 +12,31 @@ module psb_c_oacc_hll_mat_mod
type, extends(psb_c_hll_sparse_mat) :: psb_c_oacc_hll_sparse_mat
integer(psb_ipk_) :: devstate = is_host
contains
- procedure, nopass :: get_fmt => c_oacc_hll_get_fmt
- procedure, pass(a) :: sizeof => c_oacc_hll_sizeof
- procedure, pass(a) :: is_host => c_oacc_hll_is_host
- procedure, pass(a) :: is_sync => c_oacc_hll_is_sync
- procedure, pass(a) :: is_dev => c_oacc_hll_is_dev
- procedure, pass(a) :: set_host => c_oacc_hll_set_host
- procedure, pass(a) :: set_sync => c_oacc_hll_set_sync
- procedure, pass(a) :: set_dev => c_oacc_hll_set_dev
- procedure, pass(a) :: sync_space => c_oacc_hll_sync_space
- procedure, pass(a) :: sync => c_oacc_hll_sync
- procedure, pass(a) :: free_space => c_oacc_hll_free_space
- procedure, pass(a) :: free => c_oacc_hll_free
- procedure, pass(a) :: vect_mv => psb_c_oacc_hll_vect_mv
- procedure, pass(a) :: in_vect_sv => psb_c_oacc_hll_inner_vect_sv
- procedure, pass(a) :: csmm => psb_c_oacc_hll_csmm
- procedure, pass(a) :: csmv => psb_c_oacc_hll_csmv
- procedure, pass(a) :: scals => psb_c_oacc_hll_scals
- procedure, pass(a) :: scalv => psb_c_oacc_hll_scal
- procedure, pass(a) :: reallocate_nz => psb_c_oacc_hll_reallocate_nz
- procedure, pass(a) :: allocate_mnnz => psb_c_oacc_hll_allocate_mnnz
- procedure, pass(a) :: cp_from_coo => psb_c_oacc_hll_cp_from_coo
- procedure, pass(a) :: cp_from_fmt => psb_c_oacc_hll_cp_from_fmt
- procedure, pass(a) :: mv_from_coo => psb_c_oacc_hll_mv_from_coo
- procedure, pass(a) :: mv_from_fmt => psb_c_oacc_hll_mv_from_fmt
- procedure, pass(a) :: mold => psb_c_oacc_hll_mold
+ procedure, nopass :: get_fmt => c_oacc_hll_get_fmt
+ procedure, pass(a) :: sizeof => c_oacc_hll_sizeof
+ procedure, pass(a) :: is_host => c_oacc_hll_is_host
+ procedure, pass(a) :: is_sync => c_oacc_hll_is_sync
+ procedure, pass(a) :: is_dev => c_oacc_hll_is_dev
+ procedure, pass(a) :: set_host => c_oacc_hll_set_host
+ procedure, pass(a) :: set_sync => c_oacc_hll_set_sync
+ procedure, pass(a) :: set_dev => c_oacc_hll_set_dev
+ procedure, pass(a) :: sync_dev_space => c_oacc_hll_sync_dev_space
+ procedure, pass(a) :: sync => c_oacc_hll_sync
+ procedure, pass(a) :: free_dev_space => c_oacc_hll_free_dev_space
+ procedure, pass(a) :: free => c_oacc_hll_free
+ procedure, pass(a) :: vect_mv => psb_c_oacc_hll_vect_mv
+ procedure, pass(a) :: in_vect_sv => psb_c_oacc_hll_inner_vect_sv
+ procedure, pass(a) :: csmm => psb_c_oacc_hll_csmm
+ procedure, pass(a) :: csmv => psb_c_oacc_hll_csmv
+ procedure, pass(a) :: scals => psb_c_oacc_hll_scals
+ procedure, pass(a) :: scalv => psb_c_oacc_hll_scal
+ procedure, pass(a) :: reallocate_nz => psb_c_oacc_hll_reallocate_nz
+ procedure, pass(a) :: allocate_mnnz => psb_c_oacc_hll_allocate_mnnz
+ procedure, pass(a) :: cp_from_coo => psb_c_oacc_hll_cp_from_coo
+ procedure, pass(a) :: cp_from_fmt => psb_c_oacc_hll_cp_from_fmt
+ procedure, pass(a) :: mv_from_coo => psb_c_oacc_hll_mv_from_coo
+ procedure, pass(a) :: mv_from_fmt => psb_c_oacc_hll_mv_from_fmt
+ procedure, pass(a) :: mold => psb_c_oacc_hll_mold
end type psb_c_oacc_hll_sparse_mat
@@ -154,7 +154,7 @@ module psb_c_oacc_hll_mat_mod
contains
- subroutine c_oacc_hll_free_space(a)
+ subroutine c_oacc_hll_free_dev_space(a)
use psb_base_mod
implicit none
class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
@@ -167,7 +167,7 @@ contains
if (allocated(a%hkoffs)) call acc_delete_finalize(a%hkoffs)
return
- end subroutine c_oacc_hll_free_space
+ end subroutine c_oacc_hll_free_dev_space
subroutine c_oacc_hll_free(a)
use psb_base_mod
@@ -175,7 +175,7 @@ contains
class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
- call a%free_space()
+ call a%free_dev_space()
call a%psb_c_hll_sparse_mat%free()
return
@@ -249,7 +249,7 @@ contains
res = 'HLLOA'
end function c_oacc_hll_get_fmt
- subroutine c_oacc_hll_sync_space(a)
+ subroutine c_oacc_hll_sync_dev_space(a)
implicit none
class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
@@ -258,7 +258,7 @@ contains
if (allocated(a%irn)) call acc_create(a%irn)
if (allocated(a%idiag)) call acc_create(a%idiag)
if (allocated(a%hkoffs)) call acc_create(a%hkoffs)
- end subroutine c_oacc_hll_sync_space
+ end subroutine c_oacc_hll_sync_dev_space
subroutine c_oacc_hll_sync(a)
@@ -267,7 +267,7 @@ contains
class(psb_c_oacc_hll_sparse_mat), pointer :: tmpa
integer(psb_ipk_) :: info
- tmpa => a
+ tmpa => a
if (a%is_dev()) then
call acc_update_self(a%val)
call acc_update_self(a%ja)
diff --git a/openacc/psb_c_oacc_vect_mod.F90 b/openacc/psb_c_oacc_vect_mod.F90
index 79ff0ca3..95c45646 100644
--- a/openacc/psb_c_oacc_vect_mod.F90
+++ b/openacc/psb_c_oacc_vect_mod.F90
@@ -15,50 +15,50 @@ module psb_c_oacc_vect_mod
integer :: state = is_host
contains
- procedure, pass(x) :: get_nrows => c_oacc_get_nrows
- procedure, nopass :: get_fmt => c_oacc_get_fmt
-
- procedure, pass(x) :: all => c_oacc_vect_all
- procedure, pass(x) :: zero => c_oacc_zero
- procedure, pass(x) :: asb_m => c_oacc_asb_m
- procedure, pass(x) :: sync => c_oacc_sync
- procedure, pass(x) :: sync_space => c_oacc_sync_space
- procedure, pass(x) :: bld_x => c_oacc_bld_x
- procedure, pass(x) :: bld_mn => c_oacc_bld_mn
- procedure, pass(x) :: free => c_oacc_vect_free
- procedure, pass(x) :: ins_a => c_oacc_ins_a
- procedure, pass(x) :: ins_v => c_oacc_ins_v
- procedure, pass(x) :: is_host => c_oacc_is_host
- procedure, pass(x) :: is_dev => c_oacc_is_dev
- procedure, pass(x) :: is_sync => c_oacc_is_sync
- procedure, pass(x) :: set_host => c_oacc_set_host
- procedure, pass(x) :: set_dev => c_oacc_set_dev
- procedure, pass(x) :: set_sync => c_oacc_set_sync
- procedure, pass(x) :: set_scal => c_oacc_set_scal
-
- procedure, pass(x) :: gthzv_x => c_oacc_gthzv_x
- procedure, pass(x) :: gthzbuf_x => c_oacc_gthzbuf
- procedure, pass(y) :: sctb => c_oacc_sctb
- procedure, pass(y) :: sctb_x => c_oacc_sctb_x
- procedure, pass(y) :: sctb_buf => c_oacc_sctb_buf
-
- procedure, pass(x) :: get_size => c_oacc_get_size
-
- procedure, pass(x) :: dot_v => c_oacc_vect_dot
- procedure, pass(x) :: dot_a => c_oacc_dot_a
- procedure, pass(y) :: axpby_v => c_oacc_axpby_v
- procedure, pass(y) :: axpby_a => c_oacc_axpby_a
- procedure, pass(z) :: upd_xyz => c_oacc_upd_xyz
- procedure, pass(y) :: mlt_a => c_oacc_mlt_a
- procedure, pass(z) :: mlt_a_2 => c_oacc_mlt_a_2
- procedure, pass(y) :: mlt_v => psb_c_oacc_mlt_v
- procedure, pass(z) :: mlt_v_2 => psb_c_oacc_mlt_v_2
- procedure, pass(x) :: scal => c_oacc_scal
- procedure, pass(x) :: nrm2 => c_oacc_nrm2
- procedure, pass(x) :: amax => c_oacc_amax
- procedure, pass(x) :: asum => c_oacc_asum
- procedure, pass(x) :: absval1 => c_oacc_absval1
- procedure, pass(x) :: absval2 => c_oacc_absval2
+ procedure, pass(x) :: get_nrows => c_oacc_get_nrows
+ procedure, nopass :: get_fmt => c_oacc_get_fmt
+
+ procedure, pass(x) :: all => c_oacc_vect_all
+ procedure, pass(x) :: zero => c_oacc_zero
+ procedure, pass(x) :: asb_m => c_oacc_asb_m
+ procedure, pass(x) :: sync => c_oacc_sync
+ procedure, pass(x) :: sync_dev_space => c_oacc_sync_dev_space
+ procedure, pass(x) :: bld_x => c_oacc_bld_x
+ procedure, pass(x) :: bld_mn => c_oacc_bld_mn
+ procedure, pass(x) :: free => c_oacc_vect_free
+ procedure, pass(x) :: ins_a => c_oacc_ins_a
+ procedure, pass(x) :: ins_v => c_oacc_ins_v
+ procedure, pass(x) :: is_host => c_oacc_is_host
+ procedure, pass(x) :: is_dev => c_oacc_is_dev
+ procedure, pass(x) :: is_sync => c_oacc_is_sync
+ procedure, pass(x) :: set_host => c_oacc_set_host
+ procedure, pass(x) :: set_dev => c_oacc_set_dev
+ procedure, pass(x) :: set_sync => c_oacc_set_sync
+ procedure, pass(x) :: set_scal => c_oacc_set_scal
+
+ procedure, pass(x) :: gthzv_x => c_oacc_gthzv_x
+ procedure, pass(x) :: gthzbuf_x => c_oacc_gthzbuf
+ procedure, pass(y) :: sctb => c_oacc_sctb
+ procedure, pass(y) :: sctb_x => c_oacc_sctb_x
+ procedure, pass(y) :: sctb_buf => c_oacc_sctb_buf
+
+ procedure, pass(x) :: get_size => c_oacc_get_size
+
+ procedure, pass(x) :: dot_v => c_oacc_vect_dot
+ procedure, pass(x) :: dot_a => c_oacc_dot_a
+ procedure, pass(y) :: axpby_v => c_oacc_axpby_v
+ procedure, pass(y) :: axpby_a => c_oacc_axpby_a
+ procedure, pass(z) :: upd_xyz => c_oacc_upd_xyz
+ procedure, pass(y) :: mlt_a => c_oacc_mlt_a
+ procedure, pass(z) :: mlt_a_2 => c_oacc_mlt_a_2
+ procedure, pass(y) :: mlt_v => psb_c_oacc_mlt_v
+ procedure, pass(z) :: mlt_v_2 => psb_c_oacc_mlt_v_2
+ procedure, pass(x) :: scal => c_oacc_scal
+ procedure, pass(x) :: nrm2 => c_oacc_nrm2
+ procedure, pass(x) :: amax => c_oacc_amax
+ procedure, pass(x) :: asum => c_oacc_asum
+ procedure, pass(x) :: absval1 => c_oacc_absval1
+ procedure, pass(x) :: absval2 => c_oacc_absval2
final :: c_oacc_final_vect_free
end type psb_c_vect_oacc
@@ -117,7 +117,7 @@ contains
integer(psb_ipk_) :: i
n = min(size(x%v), size(y%v))
- select type (yy => y)
+ select type (yy => y)
class is (psb_c_vect_oacc)
if (x%is_host()) call x%sync()
if (yy%is_host()) call yy%sync()
@@ -287,7 +287,7 @@ contains
info = psb_success_
- select type(xx => x)
+ select type(xx => x)
type is (psb_c_vect_oacc)
if ((beta /= czero) .and. y%is_host()) call y%sync()
if (xx%is_host()) call xx%sync()
@@ -354,11 +354,11 @@ contains
info = psb_success_
gpu_done = .false.
- select type(xx => x)
+ select type(xx => x)
class is (psb_c_vect_oacc)
- select type(yy => y)
+ select type(yy => y)
class is (psb_c_vect_oacc)
- select type(zz => z)
+ select type(zz => z)
class is (psb_c_vect_oacc)
if ((beta /= czero) .and. yy%is_host()) call yy%sync()
if ((delta /= czero) .and. zz%is_host()) call zz%sync()
@@ -405,7 +405,7 @@ contains
return
end if
- select type(ii => idx)
+ select type(ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
if (y%is_host()) call y%sync()
@@ -432,7 +432,7 @@ contains
class(psb_c_vect_oacc) :: y
integer(psb_ipk_) :: info, ni
- select type(ii => idx)
+ select type(ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
class default
@@ -485,7 +485,7 @@ contains
return
end if
- select type(ii => idx)
+ select type(ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
class default
@@ -512,7 +512,7 @@ contains
info = 0
- select type(ii => idx)
+ select type(ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
class default
@@ -544,9 +544,9 @@ contains
if (psb_errstatus_fatal()) return
done_oacc = .false.
- select type(virl => irl)
+ select type(virl => irl)
type is (psb_i_vect_oacc)
- select type(vval => val)
+ select type(vval => val)
type is (psb_c_vect_oacc)
if (vval%is_host()) call vval%sync()
if (virl%is_host()) call virl%sync()
@@ -561,11 +561,11 @@ contains
end select
if (.not.done_oacc) then
- select type(virl => irl)
+ select type(virl => irl)
type is (psb_i_vect_oacc)
if (virl%is_dev()) call virl%sync()
end select
- select type(vval => val)
+ select type(vval => val)
type is (psb_c_vect_oacc)
if (vval%is_dev()) call vval%sync()
end select
@@ -611,7 +611,7 @@ contains
call psb_errpush(info, 'c_oacc_bld_mn', i_err=(/n, n, n, n, n/))
end if
call x%set_host()
- call x%sync_space()
+ call x%sync_dev_space()
end subroutine c_oacc_bld_mn
@@ -633,7 +633,7 @@ contains
end if
x%v(:) = this(:)
call x%set_host()
- call x%sync_space()
+ call x%sync_dev_space()
end subroutine c_oacc_bld_x
@@ -719,7 +719,7 @@ contains
res = czero
!write(0,*) 'dot_v'
- select type(yy => y)
+ select type(yy => y)
type is (psb_c_base_vect_type)
if (x%is_dev()) call x%sync()
res = ddot(n, x%v, 1, yy%v, 1)
@@ -762,11 +762,11 @@ contains
end function c_oacc_dot_a
- subroutine c_oacc_sync_space(x)
+ subroutine c_oacc_sync_dev_space(x)
implicit none
class(psb_c_vect_oacc), intent(inout) :: x
if (allocated(x%v)) call acc_create(x%v)
- end subroutine c_oacc_sync_space
+ end subroutine c_oacc_sync_dev_space
subroutine c_oacc_sync(x)
implicit none
@@ -840,7 +840,7 @@ contains
i_err=(/n, n, n, n, n/))
end if
call x%set_host()
- call x%sync_space()
+ call x%sync_dev_space()
end subroutine c_oacc_vect_all
subroutine c_oacc_final_vect_free(x)
diff --git a/openacc/psb_d_oacc_csr_mat_mod.F90 b/openacc/psb_d_oacc_csr_mat_mod.F90
index 74031e89..a9d72eae 100644
--- a/openacc/psb_d_oacc_csr_mat_mod.F90
+++ b/openacc/psb_d_oacc_csr_mat_mod.F90
@@ -13,32 +13,32 @@ module psb_d_oacc_csr_mat_mod
type, extends(psb_d_csr_sparse_mat) :: psb_d_oacc_csr_sparse_mat
integer(psb_ipk_) :: devstate = is_host
contains
- procedure, nopass :: get_fmt => d_oacc_csr_get_fmt
- procedure, pass(a) :: sizeof => d_oacc_csr_sizeof
- procedure, pass(a) :: vect_mv => psb_d_oacc_csr_vect_mv
- procedure, pass(a) :: in_vect_sv => psb_d_oacc_csr_inner_vect_sv
- procedure, pass(a) :: csmm => psb_d_oacc_csr_csmm
- procedure, pass(a) :: csmv => psb_d_oacc_csr_csmv
- procedure, pass(a) :: scals => psb_d_oacc_csr_scals
- procedure, pass(a) :: scalv => psb_d_oacc_csr_scal
- procedure, pass(a) :: reallocate_nz => psb_d_oacc_csr_reallocate_nz
- procedure, pass(a) :: allocate_mnnz => psb_d_oacc_csr_allocate_mnnz
- procedure, pass(a) :: cp_from_coo => psb_d_oacc_csr_cp_from_coo
- procedure, pass(a) :: cp_from_fmt => psb_d_oacc_csr_cp_from_fmt
- procedure, pass(a) :: mv_from_coo => psb_d_oacc_csr_mv_from_coo
- procedure, pass(a) :: mv_from_fmt => psb_d_oacc_csr_mv_from_fmt
- procedure, pass(a) :: free => d_oacc_csr_free
- procedure, pass(a) :: mold => psb_d_oacc_csr_mold
- procedure, pass(a) :: all => d_oacc_csr_all
- procedure, pass(a) :: is_host => d_oacc_csr_is_host
- procedure, pass(a) :: is_sync => d_oacc_csr_is_sync
- procedure, pass(a) :: is_dev => d_oacc_csr_is_dev
- procedure, pass(a) :: set_host => d_oacc_csr_set_host
- procedure, pass(a) :: set_sync => d_oacc_csr_set_sync
- procedure, pass(a) :: set_dev => d_oacc_csr_set_dev
- procedure, pass(a) :: free_space => d_oacc_csr_free_space
- procedure, pass(a) :: sync_space => d_oacc_csr_sync_space
- procedure, pass(a) :: sync => d_oacc_csr_sync
+ procedure, nopass :: get_fmt => d_oacc_csr_get_fmt
+ procedure, pass(a) :: sizeof => d_oacc_csr_sizeof
+ procedure, pass(a) :: vect_mv => psb_d_oacc_csr_vect_mv
+ procedure, pass(a) :: in_vect_sv => psb_d_oacc_csr_inner_vect_sv
+ procedure, pass(a) :: csmm => psb_d_oacc_csr_csmm
+ procedure, pass(a) :: csmv => psb_d_oacc_csr_csmv
+ procedure, pass(a) :: scals => psb_d_oacc_csr_scals
+ procedure, pass(a) :: scalv => psb_d_oacc_csr_scal
+ procedure, pass(a) :: reallocate_nz => psb_d_oacc_csr_reallocate_nz
+ procedure, pass(a) :: allocate_mnnz => psb_d_oacc_csr_allocate_mnnz
+ procedure, pass(a) :: cp_from_coo => psb_d_oacc_csr_cp_from_coo
+ procedure, pass(a) :: cp_from_fmt => psb_d_oacc_csr_cp_from_fmt
+ procedure, pass(a) :: mv_from_coo => psb_d_oacc_csr_mv_from_coo
+ procedure, pass(a) :: mv_from_fmt => psb_d_oacc_csr_mv_from_fmt
+ procedure, pass(a) :: free => d_oacc_csr_free
+ procedure, pass(a) :: mold => psb_d_oacc_csr_mold
+ procedure, pass(a) :: all => d_oacc_csr_all
+ procedure, pass(a) :: is_host => d_oacc_csr_is_host
+ procedure, pass(a) :: is_sync => d_oacc_csr_is_sync
+ procedure, pass(a) :: is_dev => d_oacc_csr_is_dev
+ procedure, pass(a) :: set_host => d_oacc_csr_set_host
+ procedure, pass(a) :: set_sync => d_oacc_csr_set_sync
+ procedure, pass(a) :: set_dev => d_oacc_csr_set_dev
+ procedure, pass(a) :: free_dev_space => d_oacc_csr_free_dev_space
+ procedure, pass(a) :: sync_dev_space => d_oacc_csr_sync_dev_space
+ procedure, pass(a) :: sync => d_oacc_csr_sync
end type psb_d_oacc_csr_sparse_mat
interface
@@ -156,7 +156,7 @@ module psb_d_oacc_csr_mat_mod
contains
- subroutine d_oacc_csr_free_space(a)
+ subroutine d_oacc_csr_free_dev_space(a)
use psb_base_mod
implicit none
class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
@@ -167,7 +167,7 @@ contains
if (allocated(a%irp)) call acc_delete_finalize(a%irp)
return
- end subroutine d_oacc_csr_free_space
+ end subroutine d_oacc_csr_free_dev_space
subroutine d_oacc_csr_free(a)
use psb_base_mod
@@ -175,7 +175,7 @@ contains
class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
- call a%free_space()
+ call a%free_dev_space()
call a%psb_d_csr_sparse_mat%free()
return
@@ -218,7 +218,7 @@ contains
allocate(a%ja(nz),stat=info)
allocate(a%irp(m+1),stat=info)
if (info == 0) call a%set_host()
- if (info == 0) call a%sync_space()
+ if (info == 0) call a%sync_dev_space()
end subroutine d_oacc_csr_all
function d_oacc_csr_is_host(a) result(res)
@@ -266,13 +266,13 @@ contains
a%devstate = is_dev
end subroutine d_oacc_csr_set_dev
- subroutine d_oacc_csr_sync_space(a)
+ subroutine d_oacc_csr_sync_dev_space(a)
implicit none
class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
if (allocated(a%val)) call acc_create(a%val)
if (allocated(a%ja)) call acc_create(a%ja)
if (allocated(a%irp)) call acc_create(a%irp)
- end subroutine d_oacc_csr_sync_space
+ end subroutine d_oacc_csr_sync_dev_space
subroutine d_oacc_csr_sync(a)
implicit none
@@ -280,7 +280,7 @@ contains
class(psb_d_oacc_csr_sparse_mat), pointer :: tmpa
integer(psb_ipk_) :: info
- tmpa => a
+ tmpa => a
if (a%is_dev()) then
call acc_update_self(a%val)
call acc_update_self(a%ja)
diff --git a/openacc/psb_d_oacc_ell_mat_mod.F90 b/openacc/psb_d_oacc_ell_mat_mod.F90
index 45ffc92d..1409a1d6 100644
--- a/openacc/psb_d_oacc_ell_mat_mod.F90
+++ b/openacc/psb_d_oacc_ell_mat_mod.F90
@@ -12,31 +12,31 @@ module psb_d_oacc_ell_mat_mod
type, extends(psb_d_ell_sparse_mat) :: psb_d_oacc_ell_sparse_mat
integer(psb_ipk_) :: devstate = is_host
contains
- procedure, nopass :: get_fmt => d_oacc_ell_get_fmt
- procedure, pass(a) :: sizeof => d_oacc_ell_sizeof
- procedure, pass(a) :: is_host => d_oacc_ell_is_host
- procedure, pass(a) :: is_sync => d_oacc_ell_is_sync
- procedure, pass(a) :: is_dev => d_oacc_ell_is_dev
- procedure, pass(a) :: set_host => d_oacc_ell_set_host
- procedure, pass(a) :: set_sync => d_oacc_ell_set_sync
- procedure, pass(a) :: set_dev => d_oacc_ell_set_dev
- procedure, pass(a) :: sync_space => d_oacc_ell_sync_space
- procedure, pass(a) :: sync => d_oacc_ell_sync
- procedure, pass(a) :: free_space => d_oacc_ell_free_space
- procedure, pass(a) :: free => d_oacc_ell_free
- procedure, pass(a) :: vect_mv => psb_d_oacc_ell_vect_mv
- procedure, pass(a) :: in_vect_sv => psb_d_oacc_ell_inner_vect_sv
- procedure, pass(a) :: csmm => psb_d_oacc_ell_csmm
- procedure, pass(a) :: csmv => psb_d_oacc_ell_csmv
- procedure, pass(a) :: scals => psb_d_oacc_ell_scals
- procedure, pass(a) :: scalv => psb_d_oacc_ell_scal
- procedure, pass(a) :: reallocate_nz => psb_d_oacc_ell_reallocate_nz
- procedure, pass(a) :: allocate_mnnz => psb_d_oacc_ell_allocate_mnnz
- procedure, pass(a) :: cp_from_coo => psb_d_oacc_ell_cp_from_coo
- procedure, pass(a) :: cp_from_fmt => psb_d_oacc_ell_cp_from_fmt
- procedure, pass(a) :: mv_from_coo => psb_d_oacc_ell_mv_from_coo
- procedure, pass(a) :: mv_from_fmt => psb_d_oacc_ell_mv_from_fmt
- procedure, pass(a) :: mold => psb_d_oacc_ell_mold
+ procedure, nopass :: get_fmt => d_oacc_ell_get_fmt
+ procedure, pass(a) :: sizeof => d_oacc_ell_sizeof
+ procedure, pass(a) :: is_host => d_oacc_ell_is_host
+ procedure, pass(a) :: is_sync => d_oacc_ell_is_sync
+ procedure, pass(a) :: is_dev => d_oacc_ell_is_dev
+ procedure, pass(a) :: set_host => d_oacc_ell_set_host
+ procedure, pass(a) :: set_sync => d_oacc_ell_set_sync
+ procedure, pass(a) :: set_dev => d_oacc_ell_set_dev
+ procedure, pass(a) :: sync_dev_space => d_oacc_ell_sync_dev_space
+ procedure, pass(a) :: sync => d_oacc_ell_sync
+ procedure, pass(a) :: free_dev_space => d_oacc_ell_free_dev_space
+ procedure, pass(a) :: free => d_oacc_ell_free
+ procedure, pass(a) :: vect_mv => psb_d_oacc_ell_vect_mv
+ procedure, pass(a) :: in_vect_sv => psb_d_oacc_ell_inner_vect_sv
+ procedure, pass(a) :: csmm => psb_d_oacc_ell_csmm
+ procedure, pass(a) :: csmv => psb_d_oacc_ell_csmv
+ procedure, pass(a) :: scals => psb_d_oacc_ell_scals
+ procedure, pass(a) :: scalv => psb_d_oacc_ell_scal
+ procedure, pass(a) :: reallocate_nz => psb_d_oacc_ell_reallocate_nz
+ procedure, pass(a) :: allocate_mnnz => psb_d_oacc_ell_allocate_mnnz
+ procedure, pass(a) :: cp_from_coo => psb_d_oacc_ell_cp_from_coo
+ procedure, pass(a) :: cp_from_fmt => psb_d_oacc_ell_cp_from_fmt
+ procedure, pass(a) :: mv_from_coo => psb_d_oacc_ell_mv_from_coo
+ procedure, pass(a) :: mv_from_fmt => psb_d_oacc_ell_mv_from_fmt
+ procedure, pass(a) :: mold => psb_d_oacc_ell_mold
end type psb_d_oacc_ell_sparse_mat
@@ -154,7 +154,7 @@ module psb_d_oacc_ell_mat_mod
contains
- subroutine d_oacc_ell_free_space(a)
+ subroutine d_oacc_ell_free_dev_space(a)
use psb_base_mod
implicit none
class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
@@ -166,7 +166,7 @@ contains
if (allocated(a%idiag)) call acc_delete_finalize(a%idiag)
return
- end subroutine d_oacc_ell_free_space
+ end subroutine d_oacc_ell_free_dev_space
subroutine d_oacc_ell_free(a)
use psb_base_mod
@@ -174,7 +174,7 @@ contains
class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
- call a%free_space()
+ call a%free_dev_space()
call a%psb_d_ell_sparse_mat%free()
return
@@ -195,7 +195,7 @@ contains
end function d_oacc_ell_sizeof
- subroutine d_oacc_ell_sync_space(a)
+ subroutine d_oacc_ell_sync_dev_space(a)
implicit none
class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
@@ -203,7 +203,7 @@ contains
if (allocated(a%ja)) call acc_create(a%ja)
if (allocated(a%irn)) call acc_create(a%irn)
if (allocated(a%idiag)) call acc_create(a%idiag)
- end subroutine d_oacc_ell_sync_space
+ end subroutine d_oacc_ell_sync_dev_space
function d_oacc_ell_is_host(a) result(res)
implicit none
@@ -262,7 +262,7 @@ contains
class(psb_d_oacc_ell_sparse_mat), pointer :: tmpa
integer(psb_ipk_) :: info
- tmpa => a
+ tmpa => a
if (a%is_dev()) then
call acc_update_self(a%val)
call acc_update_self(a%ja)
diff --git a/openacc/psb_d_oacc_hll_mat_mod.F90 b/openacc/psb_d_oacc_hll_mat_mod.F90
index 8009f085..15ae055e 100644
--- a/openacc/psb_d_oacc_hll_mat_mod.F90
+++ b/openacc/psb_d_oacc_hll_mat_mod.F90
@@ -12,31 +12,31 @@ module psb_d_oacc_hll_mat_mod
type, extends(psb_d_hll_sparse_mat) :: psb_d_oacc_hll_sparse_mat
integer(psb_ipk_) :: devstate = is_host
contains
- procedure, nopass :: get_fmt => d_oacc_hll_get_fmt
- procedure, pass(a) :: sizeof => d_oacc_hll_sizeof
- procedure, pass(a) :: is_host => d_oacc_hll_is_host
- procedure, pass(a) :: is_sync => d_oacc_hll_is_sync
- procedure, pass(a) :: is_dev => d_oacc_hll_is_dev
- procedure, pass(a) :: set_host => d_oacc_hll_set_host
- procedure, pass(a) :: set_sync => d_oacc_hll_set_sync
- procedure, pass(a) :: set_dev => d_oacc_hll_set_dev
- procedure, pass(a) :: sync_space => d_oacc_hll_sync_space
- procedure, pass(a) :: sync => d_oacc_hll_sync
- procedure, pass(a) :: free_space => d_oacc_hll_free_space
- procedure, pass(a) :: free => d_oacc_hll_free
- procedure, pass(a) :: vect_mv => psb_d_oacc_hll_vect_mv
- procedure, pass(a) :: in_vect_sv => psb_d_oacc_hll_inner_vect_sv
- procedure, pass(a) :: csmm => psb_d_oacc_hll_csmm
- procedure, pass(a) :: csmv => psb_d_oacc_hll_csmv
- procedure, pass(a) :: scals => psb_d_oacc_hll_scals
- procedure, pass(a) :: scalv => psb_d_oacc_hll_scal
- procedure, pass(a) :: reallocate_nz => psb_d_oacc_hll_reallocate_nz
- procedure, pass(a) :: allocate_mnnz => psb_d_oacc_hll_allocate_mnnz
- procedure, pass(a) :: cp_from_coo => psb_d_oacc_hll_cp_from_coo
- procedure, pass(a) :: cp_from_fmt => psb_d_oacc_hll_cp_from_fmt
- procedure, pass(a) :: mv_from_coo => psb_d_oacc_hll_mv_from_coo
- procedure, pass(a) :: mv_from_fmt => psb_d_oacc_hll_mv_from_fmt
- procedure, pass(a) :: mold => psb_d_oacc_hll_mold
+ procedure, nopass :: get_fmt => d_oacc_hll_get_fmt
+ procedure, pass(a) :: sizeof => d_oacc_hll_sizeof
+ procedure, pass(a) :: is_host => d_oacc_hll_is_host
+ procedure, pass(a) :: is_sync => d_oacc_hll_is_sync
+ procedure, pass(a) :: is_dev => d_oacc_hll_is_dev
+ procedure, pass(a) :: set_host => d_oacc_hll_set_host
+ procedure, pass(a) :: set_sync => d_oacc_hll_set_sync
+ procedure, pass(a) :: set_dev => d_oacc_hll_set_dev
+ procedure, pass(a) :: sync_dev_space => d_oacc_hll_sync_dev_space
+ procedure, pass(a) :: sync => d_oacc_hll_sync
+ procedure, pass(a) :: free_dev_space => d_oacc_hll_free_dev_space
+ procedure, pass(a) :: free => d_oacc_hll_free
+ procedure, pass(a) :: vect_mv => psb_d_oacc_hll_vect_mv
+ procedure, pass(a) :: in_vect_sv => psb_d_oacc_hll_inner_vect_sv
+ procedure, pass(a) :: csmm => psb_d_oacc_hll_csmm
+ procedure, pass(a) :: csmv => psb_d_oacc_hll_csmv
+ procedure, pass(a) :: scals => psb_d_oacc_hll_scals
+ procedure, pass(a) :: scalv => psb_d_oacc_hll_scal
+ procedure, pass(a) :: reallocate_nz => psb_d_oacc_hll_reallocate_nz
+ procedure, pass(a) :: allocate_mnnz => psb_d_oacc_hll_allocate_mnnz
+ procedure, pass(a) :: cp_from_coo => psb_d_oacc_hll_cp_from_coo
+ procedure, pass(a) :: cp_from_fmt => psb_d_oacc_hll_cp_from_fmt
+ procedure, pass(a) :: mv_from_coo => psb_d_oacc_hll_mv_from_coo
+ procedure, pass(a) :: mv_from_fmt => psb_d_oacc_hll_mv_from_fmt
+ procedure, pass(a) :: mold => psb_d_oacc_hll_mold
end type psb_d_oacc_hll_sparse_mat
@@ -154,7 +154,7 @@ module psb_d_oacc_hll_mat_mod
contains
- subroutine d_oacc_hll_free_space(a)
+ subroutine d_oacc_hll_free_dev_space(a)
use psb_base_mod
implicit none
class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
@@ -167,7 +167,7 @@ contains
if (allocated(a%hkoffs)) call acc_delete_finalize(a%hkoffs)
return
- end subroutine d_oacc_hll_free_space
+ end subroutine d_oacc_hll_free_dev_space
subroutine d_oacc_hll_free(a)
use psb_base_mod
@@ -175,7 +175,7 @@ contains
class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
- call a%free_space()
+ call a%free_dev_space()
call a%psb_d_hll_sparse_mat%free()
return
@@ -249,7 +249,7 @@ contains
res = 'HLLOA'
end function d_oacc_hll_get_fmt
- subroutine d_oacc_hll_sync_space(a)
+ subroutine d_oacc_hll_sync_dev_space(a)
implicit none
class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
@@ -258,7 +258,7 @@ contains
if (allocated(a%irn)) call acc_create(a%irn)
if (allocated(a%idiag)) call acc_create(a%idiag)
if (allocated(a%hkoffs)) call acc_create(a%hkoffs)
- end subroutine d_oacc_hll_sync_space
+ end subroutine d_oacc_hll_sync_dev_space
subroutine d_oacc_hll_sync(a)
@@ -267,7 +267,7 @@ contains
class(psb_d_oacc_hll_sparse_mat), pointer :: tmpa
integer(psb_ipk_) :: info
- tmpa => a
+ tmpa => a
if (a%is_dev()) then
call acc_update_self(a%val)
call acc_update_self(a%ja)
diff --git a/openacc/psb_d_oacc_vect_mod.F90 b/openacc/psb_d_oacc_vect_mod.F90
index c7804bc1..3d71e54c 100644
--- a/openacc/psb_d_oacc_vect_mod.F90
+++ b/openacc/psb_d_oacc_vect_mod.F90
@@ -15,50 +15,50 @@ module psb_d_oacc_vect_mod
integer :: state = is_host
contains
- procedure, pass(x) :: get_nrows => d_oacc_get_nrows
- procedure, nopass :: get_fmt => d_oacc_get_fmt
-
- procedure, pass(x) :: all => d_oacc_vect_all
- procedure, pass(x) :: zero => d_oacc_zero
- procedure, pass(x) :: asb_m => d_oacc_asb_m
- procedure, pass(x) :: sync => d_oacc_sync
- procedure, pass(x) :: sync_space => d_oacc_sync_space
- procedure, pass(x) :: bld_x => d_oacc_bld_x
- procedure, pass(x) :: bld_mn => d_oacc_bld_mn
- procedure, pass(x) :: free => d_oacc_vect_free
- procedure, pass(x) :: ins_a => d_oacc_ins_a
- procedure, pass(x) :: ins_v => d_oacc_ins_v
- procedure, pass(x) :: is_host => d_oacc_is_host
- procedure, pass(x) :: is_dev => d_oacc_is_dev
- procedure, pass(x) :: is_sync => d_oacc_is_sync
- procedure, pass(x) :: set_host => d_oacc_set_host
- procedure, pass(x) :: set_dev => d_oacc_set_dev
- procedure, pass(x) :: set_sync => d_oacc_set_sync
- procedure, pass(x) :: set_scal => d_oacc_set_scal
-
- procedure, pass(x) :: gthzv_x => d_oacc_gthzv_x
- procedure, pass(x) :: gthzbuf_x => d_oacc_gthzbuf
- procedure, pass(y) :: sctb => d_oacc_sctb
- procedure, pass(y) :: sctb_x => d_oacc_sctb_x
- procedure, pass(y) :: sctb_buf => d_oacc_sctb_buf
-
- procedure, pass(x) :: get_size => d_oacc_get_size
-
- procedure, pass(x) :: dot_v => d_oacc_vect_dot
- procedure, pass(x) :: dot_a => d_oacc_dot_a
- procedure, pass(y) :: axpby_v => d_oacc_axpby_v
- procedure, pass(y) :: axpby_a => d_oacc_axpby_a
- procedure, pass(z) :: upd_xyz => d_oacc_upd_xyz
- procedure, pass(y) :: mlt_a => d_oacc_mlt_a
- procedure, pass(z) :: mlt_a_2 => d_oacc_mlt_a_2
- procedure, pass(y) :: mlt_v => psb_d_oacc_mlt_v
- procedure, pass(z) :: mlt_v_2 => psb_d_oacc_mlt_v_2
- procedure, pass(x) :: scal => d_oacc_scal
- procedure, pass(x) :: nrm2 => d_oacc_nrm2
- procedure, pass(x) :: amax => d_oacc_amax
- procedure, pass(x) :: asum => d_oacc_asum
- procedure, pass(x) :: absval1 => d_oacc_absval1
- procedure, pass(x) :: absval2 => d_oacc_absval2
+ procedure, pass(x) :: get_nrows => d_oacc_get_nrows
+ procedure, nopass :: get_fmt => d_oacc_get_fmt
+
+ procedure, pass(x) :: all => d_oacc_vect_all
+ procedure, pass(x) :: zero => d_oacc_zero
+ procedure, pass(x) :: asb_m => d_oacc_asb_m
+ procedure, pass(x) :: sync => d_oacc_sync
+ procedure, pass(x) :: sync_dev_space => d_oacc_sync_dev_space
+ procedure, pass(x) :: bld_x => d_oacc_bld_x
+ procedure, pass(x) :: bld_mn => d_oacc_bld_mn
+ procedure, pass(x) :: free => d_oacc_vect_free
+ procedure, pass(x) :: ins_a => d_oacc_ins_a
+ procedure, pass(x) :: ins_v => d_oacc_ins_v
+ procedure, pass(x) :: is_host => d_oacc_is_host
+ procedure, pass(x) :: is_dev => d_oacc_is_dev
+ procedure, pass(x) :: is_sync => d_oacc_is_sync
+ procedure, pass(x) :: set_host => d_oacc_set_host
+ procedure, pass(x) :: set_dev => d_oacc_set_dev
+ procedure, pass(x) :: set_sync => d_oacc_set_sync
+ procedure, pass(x) :: set_scal => d_oacc_set_scal
+
+ procedure, pass(x) :: gthzv_x => d_oacc_gthzv_x
+ procedure, pass(x) :: gthzbuf_x => d_oacc_gthzbuf
+ procedure, pass(y) :: sctb => d_oacc_sctb
+ procedure, pass(y) :: sctb_x => d_oacc_sctb_x
+ procedure, pass(y) :: sctb_buf => d_oacc_sctb_buf
+
+ procedure, pass(x) :: get_size => d_oacc_get_size
+
+ procedure, pass(x) :: dot_v => d_oacc_vect_dot
+ procedure, pass(x) :: dot_a => d_oacc_dot_a
+ procedure, pass(y) :: axpby_v => d_oacc_axpby_v
+ procedure, pass(y) :: axpby_a => d_oacc_axpby_a
+ procedure, pass(z) :: upd_xyz => d_oacc_upd_xyz
+ procedure, pass(y) :: mlt_a => d_oacc_mlt_a
+ procedure, pass(z) :: mlt_a_2 => d_oacc_mlt_a_2
+ procedure, pass(y) :: mlt_v => psb_d_oacc_mlt_v
+ procedure, pass(z) :: mlt_v_2 => psb_d_oacc_mlt_v_2
+ procedure, pass(x) :: scal => d_oacc_scal
+ procedure, pass(x) :: nrm2 => d_oacc_nrm2
+ procedure, pass(x) :: amax => d_oacc_amax
+ procedure, pass(x) :: asum => d_oacc_asum
+ procedure, pass(x) :: absval1 => d_oacc_absval1
+ procedure, pass(x) :: absval2 => d_oacc_absval2
final :: d_oacc_final_vect_free
end type psb_d_vect_oacc
@@ -117,7 +117,7 @@ contains
integer(psb_ipk_) :: i
n = min(size(x%v), size(y%v))
- select type (yy => y)
+ select type (yy => y)
class is (psb_d_vect_oacc)
if (x%is_host()) call x%sync()
if (yy%is_host()) call yy%sync()
@@ -287,7 +287,7 @@ contains
info = psb_success_
- select type(xx => x)
+ select type(xx => x)
type is (psb_d_vect_oacc)
if ((beta /= dzero) .and. y%is_host()) call y%sync()
if (xx%is_host()) call xx%sync()
@@ -354,11 +354,11 @@ contains
info = psb_success_
gpu_done = .false.
- select type(xx => x)
+ select type(xx => x)
class is (psb_d_vect_oacc)
- select type(yy => y)
+ select type(yy => y)
class is (psb_d_vect_oacc)
- select type(zz => z)
+ select type(zz => z)
class is (psb_d_vect_oacc)
if ((beta /= dzero) .and. yy%is_host()) call yy%sync()
if ((delta /= dzero) .and. zz%is_host()) call zz%sync()
@@ -405,7 +405,7 @@ contains
return
end if
- select type(ii => idx)
+ select type(ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
if (y%is_host()) call y%sync()
@@ -432,7 +432,7 @@ contains
class(psb_d_vect_oacc) :: y
integer(psb_ipk_) :: info, ni
- select type(ii => idx)
+ select type(ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
class default
@@ -485,7 +485,7 @@ contains
return
end if
- select type(ii => idx)
+ select type(ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
class default
@@ -512,7 +512,7 @@ contains
info = 0
- select type(ii => idx)
+ select type(ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
class default
@@ -544,9 +544,9 @@ contains
if (psb_errstatus_fatal()) return
done_oacc = .false.
- select type(virl => irl)
+ select type(virl => irl)
type is (psb_i_vect_oacc)
- select type(vval => val)
+ select type(vval => val)
type is (psb_d_vect_oacc)
if (vval%is_host()) call vval%sync()
if (virl%is_host()) call virl%sync()
@@ -561,11 +561,11 @@ contains
end select
if (.not.done_oacc) then
- select type(virl => irl)
+ select type(virl => irl)
type is (psb_i_vect_oacc)
if (virl%is_dev()) call virl%sync()
end select
- select type(vval => val)
+ select type(vval => val)
type is (psb_d_vect_oacc)
if (vval%is_dev()) call vval%sync()
end select
@@ -611,7 +611,7 @@ contains
call psb_errpush(info, 'd_oacc_bld_mn', i_err=(/n, n, n, n, n/))
end if
call x%set_host()
- call x%sync_space()
+ call x%sync_dev_space()
end subroutine d_oacc_bld_mn
@@ -633,7 +633,7 @@ contains
end if
x%v(:) = this(:)
call x%set_host()
- call x%sync_space()
+ call x%sync_dev_space()
end subroutine d_oacc_bld_x
@@ -719,7 +719,7 @@ contains
res = dzero
!write(0,*) 'dot_v'
- select type(yy => y)
+ select type(yy => y)
type is (psb_d_base_vect_type)
if (x%is_dev()) call x%sync()
res = ddot(n, x%v, 1, yy%v, 1)
@@ -762,11 +762,11 @@ contains
end function d_oacc_dot_a
- subroutine d_oacc_sync_space(x)
+ subroutine d_oacc_sync_dev_space(x)
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
if (allocated(x%v)) call acc_create(x%v)
- end subroutine d_oacc_sync_space
+ end subroutine d_oacc_sync_dev_space
subroutine d_oacc_sync(x)
implicit none
@@ -840,7 +840,7 @@ contains
i_err=(/n, n, n, n, n/))
end if
call x%set_host()
- call x%sync_space()
+ call x%sync_dev_space()
end subroutine d_oacc_vect_all
subroutine d_oacc_final_vect_free(x)
diff --git a/openacc/psb_i_oacc_vect_mod.F90 b/openacc/psb_i_oacc_vect_mod.F90
index 8c813134..42226f0c 100644
--- a/openacc/psb_i_oacc_vect_mod.F90
+++ b/openacc/psb_i_oacc_vect_mod.F90
@@ -13,34 +13,34 @@ module psb_i_oacc_vect_mod
integer :: state = is_host
contains
- procedure, pass(x) :: get_nrows => i_oacc_get_nrows
- procedure, nopass :: get_fmt => i_oacc_get_fmt
-
- procedure, pass(x) :: all => i_oacc_vect_all
- procedure, pass(x) :: zero => i_oacc_zero
- procedure, pass(x) :: asb_m => i_oacc_asb_m
- procedure, pass(x) :: sync => i_oacc_sync
- procedure, pass(x) :: sync_space => i_oacc_sync_space
- procedure, pass(x) :: bld_x => i_oacc_bld_x
- procedure, pass(x) :: bld_mn => i_oacc_bld_mn
- procedure, pass(x) :: free => i_oacc_vect_free
- procedure, pass(x) :: ins_a => i_oacc_ins_a
- procedure, pass(x) :: ins_v => i_oacc_ins_v
- procedure, pass(x) :: is_host => i_oacc_is_host
- procedure, pass(x) :: is_dev => i_oacc_is_dev
- procedure, pass(x) :: is_sync => i_oacc_is_sync
- procedure, pass(x) :: set_host => i_oacc_set_host
- procedure, pass(x) :: set_dev => i_oacc_set_dev
- procedure, pass(x) :: set_sync => i_oacc_set_sync
- procedure, pass(x) :: set_scal => i_oacc_set_scal
-
- procedure, pass(x) :: gthzv_x => i_oacc_gthzv_x
- procedure, pass(x) :: gthzbuf_x => i_oacc_gthzbuf
- procedure, pass(y) :: sctb => i_oacc_sctb
- procedure, pass(y) :: sctb_x => i_oacc_sctb_x
- procedure, pass(y) :: sctb_buf => i_oacc_sctb_buf
-
- procedure, pass(x) :: get_size => i_oacc_get_size
+ procedure, pass(x) :: get_nrows => i_oacc_get_nrows
+ procedure, nopass :: get_fmt => i_oacc_get_fmt
+
+ procedure, pass(x) :: all => i_oacc_vect_all
+ procedure, pass(x) :: zero => i_oacc_zero
+ procedure, pass(x) :: asb_m => i_oacc_asb_m
+ procedure, pass(x) :: sync => i_oacc_sync
+ procedure, pass(x) :: sync_dev_space => i_oacc_sync_dev_space
+ procedure, pass(x) :: bld_x => i_oacc_bld_x
+ procedure, pass(x) :: bld_mn => i_oacc_bld_mn
+ procedure, pass(x) :: free => i_oacc_vect_free
+ procedure, pass(x) :: ins_a => i_oacc_ins_a
+ procedure, pass(x) :: ins_v => i_oacc_ins_v
+ procedure, pass(x) :: is_host => i_oacc_is_host
+ procedure, pass(x) :: is_dev => i_oacc_is_dev
+ procedure, pass(x) :: is_sync => i_oacc_is_sync
+ procedure, pass(x) :: set_host => i_oacc_set_host
+ procedure, pass(x) :: set_dev => i_oacc_set_dev
+ procedure, pass(x) :: set_sync => i_oacc_set_sync
+ procedure, pass(x) :: set_scal => i_oacc_set_scal
+
+ procedure, pass(x) :: gthzv_x => i_oacc_gthzv_x
+ procedure, pass(x) :: gthzbuf_x => i_oacc_gthzbuf
+ procedure, pass(y) :: sctb => i_oacc_sctb
+ procedure, pass(y) :: sctb_x => i_oacc_sctb_x
+ procedure, pass(y) :: sctb_buf => i_oacc_sctb_buf
+
+ procedure, pass(x) :: get_size => i_oacc_get_size
final :: i_oacc_final_vect_free
end type psb_i_vect_oacc
@@ -63,7 +63,7 @@ contains
return
end if
- select type(ii => idx)
+ select type(ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
if (y%is_host()) call y%sync()
@@ -90,7 +90,7 @@ contains
class(psb_i_vect_oacc) :: y
integer(psb_ipk_) :: info, ni
- select type(ii => idx)
+ select type(ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
class default
@@ -143,7 +143,7 @@ contains
return
end if
- select type(ii => idx)
+ select type(ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
class default
@@ -170,7 +170,7 @@ contains
info = 0
- select type(ii => idx)
+ select type(ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
class default
@@ -202,9 +202,9 @@ contains
if (psb_errstatus_fatal()) return
done_oacc = .false.
- select type(virl => irl)
+ select type(virl => irl)
type is (psb_i_vect_oacc)
- select type(vval => val)
+ select type(vval => val)
type is (psb_i_vect_oacc)
if (vval%is_host()) call vval%sync()
if (virl%is_host()) call virl%sync()
@@ -219,11 +219,11 @@ contains
end select
if (.not.done_oacc) then
- select type(virl => irl)
+ select type(virl => irl)
type is (psb_i_vect_oacc)
if (virl%is_dev()) call virl%sync()
end select
- select type(vval => val)
+ select type(vval => val)
type is (psb_i_vect_oacc)
if (vval%is_dev()) call vval%sync()
end select
@@ -269,7 +269,7 @@ contains
call psb_errpush(info, 'i_oacc_bld_mn', i_err=(/n, n, n, n, n/))
end if
call x%set_host()
- call x%sync_space()
+ call x%sync_dev_space()
end subroutine i_oacc_bld_mn
@@ -291,7 +291,7 @@ contains
end if
x%v(:) = this(:)
call x%set_host()
- call x%sync_space()
+ call x%sync_dev_space()
end subroutine i_oacc_bld_x
@@ -366,11 +366,11 @@ contains
end function i_oacc_get_fmt
- subroutine i_oacc_sync_space(x)
+ subroutine i_oacc_sync_dev_space(x)
implicit none
class(psb_i_vect_oacc), intent(inout) :: x
if (allocated(x%v)) call acc_create(x%v)
- end subroutine i_oacc_sync_space
+ end subroutine i_oacc_sync_dev_space
subroutine i_oacc_sync(x)
implicit none
@@ -444,7 +444,7 @@ contains
i_err=(/n, n, n, n, n/))
end if
call x%set_host()
- call x%sync_space()
+ call x%sync_dev_space()
end subroutine i_oacc_vect_all
subroutine i_oacc_final_vect_free(x)
diff --git a/openacc/psb_l_oacc_vect_mod.F90 b/openacc/psb_l_oacc_vect_mod.F90
index 9ff100bc..eb9b2b9a 100644
--- a/openacc/psb_l_oacc_vect_mod.F90
+++ b/openacc/psb_l_oacc_vect_mod.F90
@@ -15,34 +15,34 @@ module psb_l_oacc_vect_mod
integer :: state = is_host
contains
- procedure, pass(x) :: get_nrows => l_oacc_get_nrows
- procedure, nopass :: get_fmt => l_oacc_get_fmt
-
- procedure, pass(x) :: all => l_oacc_vect_all
- procedure, pass(x) :: zero => l_oacc_zero
- procedure, pass(x) :: asb_m => l_oacc_asb_m
- procedure, pass(x) :: sync => l_oacc_sync
- procedure, pass(x) :: sync_space => l_oacc_sync_space
- procedure, pass(x) :: bld_x => l_oacc_bld_x
- procedure, pass(x) :: bld_mn => l_oacc_bld_mn
- procedure, pass(x) :: free => l_oacc_vect_free
- procedure, pass(x) :: ins_a => l_oacc_ins_a
- procedure, pass(x) :: ins_v => l_oacc_ins_v
- procedure, pass(x) :: is_host => l_oacc_is_host
- procedure, pass(x) :: is_dev => l_oacc_is_dev
- procedure, pass(x) :: is_sync => l_oacc_is_sync
- procedure, pass(x) :: set_host => l_oacc_set_host
- procedure, pass(x) :: set_dev => l_oacc_set_dev
- procedure, pass(x) :: set_sync => l_oacc_set_sync
- procedure, pass(x) :: set_scal => l_oacc_set_scal
-
- procedure, pass(x) :: gthzv_x => l_oacc_gthzv_x
- procedure, pass(x) :: gthzbuf_x => l_oacc_gthzbuf
- procedure, pass(y) :: sctb => l_oacc_sctb
- procedure, pass(y) :: sctb_x => l_oacc_sctb_x
- procedure, pass(y) :: sctb_buf => l_oacc_sctb_buf
-
- procedure, pass(x) :: get_size => l_oacc_get_size
+ procedure, pass(x) :: get_nrows => l_oacc_get_nrows
+ procedure, nopass :: get_fmt => l_oacc_get_fmt
+
+ procedure, pass(x) :: all => l_oacc_vect_all
+ procedure, pass(x) :: zero => l_oacc_zero
+ procedure, pass(x) :: asb_m => l_oacc_asb_m
+ procedure, pass(x) :: sync => l_oacc_sync
+ procedure, pass(x) :: sync_dev_space => l_oacc_sync_dev_space
+ procedure, pass(x) :: bld_x => l_oacc_bld_x
+ procedure, pass(x) :: bld_mn => l_oacc_bld_mn
+ procedure, pass(x) :: free => l_oacc_vect_free
+ procedure, pass(x) :: ins_a => l_oacc_ins_a
+ procedure, pass(x) :: ins_v => l_oacc_ins_v
+ procedure, pass(x) :: is_host => l_oacc_is_host
+ procedure, pass(x) :: is_dev => l_oacc_is_dev
+ procedure, pass(x) :: is_sync => l_oacc_is_sync
+ procedure, pass(x) :: set_host => l_oacc_set_host
+ procedure, pass(x) :: set_dev => l_oacc_set_dev
+ procedure, pass(x) :: set_sync => l_oacc_set_sync
+ procedure, pass(x) :: set_scal => l_oacc_set_scal
+
+ procedure, pass(x) :: gthzv_x => l_oacc_gthzv_x
+ procedure, pass(x) :: gthzbuf_x => l_oacc_gthzbuf
+ procedure, pass(y) :: sctb => l_oacc_sctb
+ procedure, pass(y) :: sctb_x => l_oacc_sctb_x
+ procedure, pass(y) :: sctb_buf => l_oacc_sctb_buf
+
+ procedure, pass(x) :: get_size => l_oacc_get_size
final :: l_oacc_final_vect_free
end type psb_l_vect_oacc
@@ -65,7 +65,7 @@ contains
return
end if
- select type(ii => idx)
+ select type(ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
if (y%is_host()) call y%sync()
@@ -92,7 +92,7 @@ contains
class(psb_l_vect_oacc) :: y
integer(psb_ipk_) :: info, ni
- select type(ii => idx)
+ select type(ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
class default
@@ -145,7 +145,7 @@ contains
return
end if
- select type(ii => idx)
+ select type(ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
class default
@@ -172,7 +172,7 @@ contains
info = 0
- select type(ii => idx)
+ select type(ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
class default
@@ -204,9 +204,9 @@ contains
if (psb_errstatus_fatal()) return
done_oacc = .false.
- select type(virl => irl)
+ select type(virl => irl)
type is (psb_i_vect_oacc)
- select type(vval => val)
+ select type(vval => val)
type is (psb_l_vect_oacc)
if (vval%is_host()) call vval%sync()
if (virl%is_host()) call virl%sync()
@@ -221,11 +221,11 @@ contains
end select
if (.not.done_oacc) then
- select type(virl => irl)
+ select type(virl => irl)
type is (psb_i_vect_oacc)
if (virl%is_dev()) call virl%sync()
end select
- select type(vval => val)
+ select type(vval => val)
type is (psb_l_vect_oacc)
if (vval%is_dev()) call vval%sync()
end select
@@ -271,7 +271,7 @@ contains
call psb_errpush(info, 'l_oacc_bld_mn', i_err=(/n, n, n, n, n/))
end if
call x%set_host()
- call x%sync_space()
+ call x%sync_dev_space()
end subroutine l_oacc_bld_mn
@@ -293,7 +293,7 @@ contains
end if
x%v(:) = this(:)
call x%set_host()
- call x%sync_space()
+ call x%sync_dev_space()
end subroutine l_oacc_bld_x
@@ -368,11 +368,11 @@ contains
end function l_oacc_get_fmt
- subroutine l_oacc_sync_space(x)
+ subroutine l_oacc_sync_dev_space(x)
implicit none
class(psb_l_vect_oacc), intent(inout) :: x
if (allocated(x%v)) call acc_create(x%v)
- end subroutine l_oacc_sync_space
+ end subroutine l_oacc_sync_dev_space
subroutine l_oacc_sync(x)
implicit none
@@ -446,7 +446,7 @@ contains
i_err=(/n, n, n, n, n/))
end if
call x%set_host()
- call x%sync_space()
+ call x%sync_dev_space()
end subroutine l_oacc_vect_all
subroutine l_oacc_final_vect_free(x)
diff --git a/openacc/psb_s_oacc_csr_mat_mod.F90 b/openacc/psb_s_oacc_csr_mat_mod.F90
index b577daac..64a5f9a1 100644
--- a/openacc/psb_s_oacc_csr_mat_mod.F90
+++ b/openacc/psb_s_oacc_csr_mat_mod.F90
@@ -13,32 +13,32 @@ module psb_s_oacc_csr_mat_mod
type, extends(psb_s_csr_sparse_mat) :: psb_s_oacc_csr_sparse_mat
integer(psb_ipk_) :: devstate = is_host
contains
- procedure, nopass :: get_fmt => s_oacc_csr_get_fmt
- procedure, pass(a) :: sizeof => s_oacc_csr_sizeof
- procedure, pass(a) :: vect_mv => psb_s_oacc_csr_vect_mv
- procedure, pass(a) :: in_vect_sv => psb_s_oacc_csr_inner_vect_sv
- procedure, pass(a) :: csmm => psb_s_oacc_csr_csmm
- procedure, pass(a) :: csmv => psb_s_oacc_csr_csmv
- procedure, pass(a) :: scals => psb_s_oacc_csr_scals
- procedure, pass(a) :: scalv => psb_s_oacc_csr_scal
- procedure, pass(a) :: reallocate_nz => psb_s_oacc_csr_reallocate_nz
- procedure, pass(a) :: allocate_mnnz => psb_s_oacc_csr_allocate_mnnz
- procedure, pass(a) :: cp_from_coo => psb_s_oacc_csr_cp_from_coo
- procedure, pass(a) :: cp_from_fmt => psb_s_oacc_csr_cp_from_fmt
- procedure, pass(a) :: mv_from_coo => psb_s_oacc_csr_mv_from_coo
- procedure, pass(a) :: mv_from_fmt => psb_s_oacc_csr_mv_from_fmt
- procedure, pass(a) :: free => s_oacc_csr_free
- procedure, pass(a) :: mold => psb_s_oacc_csr_mold
- procedure, pass(a) :: all => s_oacc_csr_all
- procedure, pass(a) :: is_host => s_oacc_csr_is_host
- procedure, pass(a) :: is_sync => s_oacc_csr_is_sync
- procedure, pass(a) :: is_dev => s_oacc_csr_is_dev
- procedure, pass(a) :: set_host => s_oacc_csr_set_host
- procedure, pass(a) :: set_sync => s_oacc_csr_set_sync
- procedure, pass(a) :: set_dev => s_oacc_csr_set_dev
- procedure, pass(a) :: free_space => s_oacc_csr_free_space
- procedure, pass(a) :: sync_space => s_oacc_csr_sync_space
- procedure, pass(a) :: sync => s_oacc_csr_sync
+ procedure, nopass :: get_fmt => s_oacc_csr_get_fmt
+ procedure, pass(a) :: sizeof => s_oacc_csr_sizeof
+ procedure, pass(a) :: vect_mv => psb_s_oacc_csr_vect_mv
+ procedure, pass(a) :: in_vect_sv => psb_s_oacc_csr_inner_vect_sv
+ procedure, pass(a) :: csmm => psb_s_oacc_csr_csmm
+ procedure, pass(a) :: csmv => psb_s_oacc_csr_csmv
+ procedure, pass(a) :: scals => psb_s_oacc_csr_scals
+ procedure, pass(a) :: scalv => psb_s_oacc_csr_scal
+ procedure, pass(a) :: reallocate_nz => psb_s_oacc_csr_reallocate_nz
+ procedure, pass(a) :: allocate_mnnz => psb_s_oacc_csr_allocate_mnnz
+ procedure, pass(a) :: cp_from_coo => psb_s_oacc_csr_cp_from_coo
+ procedure, pass(a) :: cp_from_fmt => psb_s_oacc_csr_cp_from_fmt
+ procedure, pass(a) :: mv_from_coo => psb_s_oacc_csr_mv_from_coo
+ procedure, pass(a) :: mv_from_fmt => psb_s_oacc_csr_mv_from_fmt
+ procedure, pass(a) :: free => s_oacc_csr_free
+ procedure, pass(a) :: mold => psb_s_oacc_csr_mold
+ procedure, pass(a) :: all => s_oacc_csr_all
+ procedure, pass(a) :: is_host => s_oacc_csr_is_host
+ procedure, pass(a) :: is_sync => s_oacc_csr_is_sync
+ procedure, pass(a) :: is_dev => s_oacc_csr_is_dev
+ procedure, pass(a) :: set_host => s_oacc_csr_set_host
+ procedure, pass(a) :: set_sync => s_oacc_csr_set_sync
+ procedure, pass(a) :: set_dev => s_oacc_csr_set_dev
+ procedure, pass(a) :: free_dev_space => s_oacc_csr_free_dev_space
+ procedure, pass(a) :: sync_dev_space => s_oacc_csr_sync_dev_space
+ procedure, pass(a) :: sync => s_oacc_csr_sync
end type psb_s_oacc_csr_sparse_mat
interface
@@ -156,7 +156,7 @@ module psb_s_oacc_csr_mat_mod
contains
- subroutine s_oacc_csr_free_space(a)
+ subroutine s_oacc_csr_free_dev_space(a)
use psb_base_mod
implicit none
class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
@@ -167,7 +167,7 @@ contains
if (allocated(a%irp)) call acc_delete_finalize(a%irp)
return
- end subroutine s_oacc_csr_free_space
+ end subroutine s_oacc_csr_free_dev_space
subroutine s_oacc_csr_free(a)
use psb_base_mod
@@ -175,7 +175,7 @@ contains
class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
- call a%free_space()
+ call a%free_dev_space()
call a%psb_s_csr_sparse_mat%free()
return
@@ -218,7 +218,7 @@ contains
allocate(a%ja(nz),stat=info)
allocate(a%irp(m+1),stat=info)
if (info == 0) call a%set_host()
- if (info == 0) call a%sync_space()
+ if (info == 0) call a%sync_dev_space()
end subroutine s_oacc_csr_all
function s_oacc_csr_is_host(a) result(res)
@@ -266,13 +266,13 @@ contains
a%devstate = is_dev
end subroutine s_oacc_csr_set_dev
- subroutine s_oacc_csr_sync_space(a)
+ subroutine s_oacc_csr_sync_dev_space(a)
implicit none
class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
if (allocated(a%val)) call acc_create(a%val)
if (allocated(a%ja)) call acc_create(a%ja)
if (allocated(a%irp)) call acc_create(a%irp)
- end subroutine s_oacc_csr_sync_space
+ end subroutine s_oacc_csr_sync_dev_space
subroutine s_oacc_csr_sync(a)
implicit none
@@ -280,7 +280,7 @@ contains
class(psb_s_oacc_csr_sparse_mat), pointer :: tmpa
integer(psb_ipk_) :: info
- tmpa => a
+ tmpa => a
if (a%is_dev()) then
call acc_update_self(a%val)
call acc_update_self(a%ja)
diff --git a/openacc/psb_s_oacc_ell_mat_mod.F90 b/openacc/psb_s_oacc_ell_mat_mod.F90
index 793bf353..2893c3e5 100644
--- a/openacc/psb_s_oacc_ell_mat_mod.F90
+++ b/openacc/psb_s_oacc_ell_mat_mod.F90
@@ -12,31 +12,31 @@ module psb_s_oacc_ell_mat_mod
type, extends(psb_s_ell_sparse_mat) :: psb_s_oacc_ell_sparse_mat
integer(psb_ipk_) :: devstate = is_host
contains
- procedure, nopass :: get_fmt => s_oacc_ell_get_fmt
- procedure, pass(a) :: sizeof => s_oacc_ell_sizeof
- procedure, pass(a) :: is_host => s_oacc_ell_is_host
- procedure, pass(a) :: is_sync => s_oacc_ell_is_sync
- procedure, pass(a) :: is_dev => s_oacc_ell_is_dev
- procedure, pass(a) :: set_host => s_oacc_ell_set_host
- procedure, pass(a) :: set_sync => s_oacc_ell_set_sync
- procedure, pass(a) :: set_dev => s_oacc_ell_set_dev
- procedure, pass(a) :: sync_space => s_oacc_ell_sync_space
- procedure, pass(a) :: sync => s_oacc_ell_sync
- procedure, pass(a) :: free_space => s_oacc_ell_free_space
- procedure, pass(a) :: free => s_oacc_ell_free
- procedure, pass(a) :: vect_mv => psb_s_oacc_ell_vect_mv
- procedure, pass(a) :: in_vect_sv => psb_s_oacc_ell_inner_vect_sv
- procedure, pass(a) :: csmm => psb_s_oacc_ell_csmm
- procedure, pass(a) :: csmv => psb_s_oacc_ell_csmv
- procedure, pass(a) :: scals => psb_s_oacc_ell_scals
- procedure, pass(a) :: scalv => psb_s_oacc_ell_scal
- procedure, pass(a) :: reallocate_nz => psb_s_oacc_ell_reallocate_nz
- procedure, pass(a) :: allocate_mnnz => psb_s_oacc_ell_allocate_mnnz
- procedure, pass(a) :: cp_from_coo => psb_s_oacc_ell_cp_from_coo
- procedure, pass(a) :: cp_from_fmt => psb_s_oacc_ell_cp_from_fmt
- procedure, pass(a) :: mv_from_coo => psb_s_oacc_ell_mv_from_coo
- procedure, pass(a) :: mv_from_fmt => psb_s_oacc_ell_mv_from_fmt
- procedure, pass(a) :: mold => psb_s_oacc_ell_mold
+ procedure, nopass :: get_fmt => s_oacc_ell_get_fmt
+ procedure, pass(a) :: sizeof => s_oacc_ell_sizeof
+ procedure, pass(a) :: is_host => s_oacc_ell_is_host
+ procedure, pass(a) :: is_sync => s_oacc_ell_is_sync
+ procedure, pass(a) :: is_dev => s_oacc_ell_is_dev
+ procedure, pass(a) :: set_host => s_oacc_ell_set_host
+ procedure, pass(a) :: set_sync => s_oacc_ell_set_sync
+ procedure, pass(a) :: set_dev => s_oacc_ell_set_dev
+ procedure, pass(a) :: sync_dev_space => s_oacc_ell_sync_dev_space
+ procedure, pass(a) :: sync => s_oacc_ell_sync
+ procedure, pass(a) :: free_dev_space => s_oacc_ell_free_dev_space
+ procedure, pass(a) :: free => s_oacc_ell_free
+ procedure, pass(a) :: vect_mv => psb_s_oacc_ell_vect_mv
+ procedure, pass(a) :: in_vect_sv => psb_s_oacc_ell_inner_vect_sv
+ procedure, pass(a) :: csmm => psb_s_oacc_ell_csmm
+ procedure, pass(a) :: csmv => psb_s_oacc_ell_csmv
+ procedure, pass(a) :: scals => psb_s_oacc_ell_scals
+ procedure, pass(a) :: scalv => psb_s_oacc_ell_scal
+ procedure, pass(a) :: reallocate_nz => psb_s_oacc_ell_reallocate_nz
+ procedure, pass(a) :: allocate_mnnz => psb_s_oacc_ell_allocate_mnnz
+ procedure, pass(a) :: cp_from_coo => psb_s_oacc_ell_cp_from_coo
+ procedure, pass(a) :: cp_from_fmt => psb_s_oacc_ell_cp_from_fmt
+ procedure, pass(a) :: mv_from_coo => psb_s_oacc_ell_mv_from_coo
+ procedure, pass(a) :: mv_from_fmt => psb_s_oacc_ell_mv_from_fmt
+ procedure, pass(a) :: mold => psb_s_oacc_ell_mold
end type psb_s_oacc_ell_sparse_mat
@@ -154,7 +154,7 @@ module psb_s_oacc_ell_mat_mod
contains
- subroutine s_oacc_ell_free_space(a)
+ subroutine s_oacc_ell_free_dev_space(a)
use psb_base_mod
implicit none
class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
@@ -166,7 +166,7 @@ contains
if (allocated(a%idiag)) call acc_delete_finalize(a%idiag)
return
- end subroutine s_oacc_ell_free_space
+ end subroutine s_oacc_ell_free_dev_space
subroutine s_oacc_ell_free(a)
use psb_base_mod
@@ -174,7 +174,7 @@ contains
class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
- call a%free_space()
+ call a%free_dev_space()
call a%psb_s_ell_sparse_mat%free()
return
@@ -195,7 +195,7 @@ contains
end function s_oacc_ell_sizeof
- subroutine s_oacc_ell_sync_space(a)
+ subroutine s_oacc_ell_sync_dev_space(a)
implicit none
class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
@@ -203,7 +203,7 @@ contains
if (allocated(a%ja)) call acc_create(a%ja)
if (allocated(a%irn)) call acc_create(a%irn)
if (allocated(a%idiag)) call acc_create(a%idiag)
- end subroutine s_oacc_ell_sync_space
+ end subroutine s_oacc_ell_sync_dev_space
function s_oacc_ell_is_host(a) result(res)
implicit none
@@ -262,7 +262,7 @@ contains
class(psb_s_oacc_ell_sparse_mat), pointer :: tmpa
integer(psb_ipk_) :: info
- tmpa => a
+ tmpa => a
if (a%is_dev()) then
call acc_update_self(a%val)
call acc_update_self(a%ja)
diff --git a/openacc/psb_s_oacc_hll_mat_mod.F90 b/openacc/psb_s_oacc_hll_mat_mod.F90
index 508591e4..583a281e 100644
--- a/openacc/psb_s_oacc_hll_mat_mod.F90
+++ b/openacc/psb_s_oacc_hll_mat_mod.F90
@@ -12,31 +12,31 @@ module psb_s_oacc_hll_mat_mod
type, extends(psb_s_hll_sparse_mat) :: psb_s_oacc_hll_sparse_mat
integer(psb_ipk_) :: devstate = is_host
contains
- procedure, nopass :: get_fmt => s_oacc_hll_get_fmt
- procedure, pass(a) :: sizeof => s_oacc_hll_sizeof
- procedure, pass(a) :: is_host => s_oacc_hll_is_host
- procedure, pass(a) :: is_sync => s_oacc_hll_is_sync
- procedure, pass(a) :: is_dev => s_oacc_hll_is_dev
- procedure, pass(a) :: set_host => s_oacc_hll_set_host
- procedure, pass(a) :: set_sync => s_oacc_hll_set_sync
- procedure, pass(a) :: set_dev => s_oacc_hll_set_dev
- procedure, pass(a) :: sync_space => s_oacc_hll_sync_space
- procedure, pass(a) :: sync => s_oacc_hll_sync
- procedure, pass(a) :: free_space => s_oacc_hll_free_space
- procedure, pass(a) :: free => s_oacc_hll_free
- procedure, pass(a) :: vect_mv => psb_s_oacc_hll_vect_mv
- procedure, pass(a) :: in_vect_sv => psb_s_oacc_hll_inner_vect_sv
- procedure, pass(a) :: csmm => psb_s_oacc_hll_csmm
- procedure, pass(a) :: csmv => psb_s_oacc_hll_csmv
- procedure, pass(a) :: scals => psb_s_oacc_hll_scals
- procedure, pass(a) :: scalv => psb_s_oacc_hll_scal
- procedure, pass(a) :: reallocate_nz => psb_s_oacc_hll_reallocate_nz
- procedure, pass(a) :: allocate_mnnz => psb_s_oacc_hll_allocate_mnnz
- procedure, pass(a) :: cp_from_coo => psb_s_oacc_hll_cp_from_coo
- procedure, pass(a) :: cp_from_fmt => psb_s_oacc_hll_cp_from_fmt
- procedure, pass(a) :: mv_from_coo => psb_s_oacc_hll_mv_from_coo
- procedure, pass(a) :: mv_from_fmt => psb_s_oacc_hll_mv_from_fmt
- procedure, pass(a) :: mold => psb_s_oacc_hll_mold
+ procedure, nopass :: get_fmt => s_oacc_hll_get_fmt
+ procedure, pass(a) :: sizeof => s_oacc_hll_sizeof
+ procedure, pass(a) :: is_host => s_oacc_hll_is_host
+ procedure, pass(a) :: is_sync => s_oacc_hll_is_sync
+ procedure, pass(a) :: is_dev => s_oacc_hll_is_dev
+ procedure, pass(a) :: set_host => s_oacc_hll_set_host
+ procedure, pass(a) :: set_sync => s_oacc_hll_set_sync
+ procedure, pass(a) :: set_dev => s_oacc_hll_set_dev
+ procedure, pass(a) :: sync_dev_space => s_oacc_hll_sync_dev_space
+ procedure, pass(a) :: sync => s_oacc_hll_sync
+ procedure, pass(a) :: free_dev_space => s_oacc_hll_free_dev_space
+ procedure, pass(a) :: free => s_oacc_hll_free
+ procedure, pass(a) :: vect_mv => psb_s_oacc_hll_vect_mv
+ procedure, pass(a) :: in_vect_sv => psb_s_oacc_hll_inner_vect_sv
+ procedure, pass(a) :: csmm => psb_s_oacc_hll_csmm
+ procedure, pass(a) :: csmv => psb_s_oacc_hll_csmv
+ procedure, pass(a) :: scals => psb_s_oacc_hll_scals
+ procedure, pass(a) :: scalv => psb_s_oacc_hll_scal
+ procedure, pass(a) :: reallocate_nz => psb_s_oacc_hll_reallocate_nz
+ procedure, pass(a) :: allocate_mnnz => psb_s_oacc_hll_allocate_mnnz
+ procedure, pass(a) :: cp_from_coo => psb_s_oacc_hll_cp_from_coo
+ procedure, pass(a) :: cp_from_fmt => psb_s_oacc_hll_cp_from_fmt
+ procedure, pass(a) :: mv_from_coo => psb_s_oacc_hll_mv_from_coo
+ procedure, pass(a) :: mv_from_fmt => psb_s_oacc_hll_mv_from_fmt
+ procedure, pass(a) :: mold => psb_s_oacc_hll_mold
end type psb_s_oacc_hll_sparse_mat
@@ -154,7 +154,7 @@ module psb_s_oacc_hll_mat_mod
contains
- subroutine s_oacc_hll_free_space(a)
+ subroutine s_oacc_hll_free_dev_space(a)
use psb_base_mod
implicit none
class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a
@@ -167,7 +167,7 @@ contains
if (allocated(a%hkoffs)) call acc_delete_finalize(a%hkoffs)
return
- end subroutine s_oacc_hll_free_space
+ end subroutine s_oacc_hll_free_dev_space
subroutine s_oacc_hll_free(a)
use psb_base_mod
@@ -175,7 +175,7 @@ contains
class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
- call a%free_space()
+ call a%free_dev_space()
call a%psb_s_hll_sparse_mat%free()
return
@@ -249,7 +249,7 @@ contains
res = 'HLLOA'
end function s_oacc_hll_get_fmt
- subroutine s_oacc_hll_sync_space(a)
+ subroutine s_oacc_hll_sync_dev_space(a)
implicit none
class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a
@@ -258,7 +258,7 @@ contains
if (allocated(a%irn)) call acc_create(a%irn)
if (allocated(a%idiag)) call acc_create(a%idiag)
if (allocated(a%hkoffs)) call acc_create(a%hkoffs)
- end subroutine s_oacc_hll_sync_space
+ end subroutine s_oacc_hll_sync_dev_space
subroutine s_oacc_hll_sync(a)
@@ -267,7 +267,7 @@ contains
class(psb_s_oacc_hll_sparse_mat), pointer :: tmpa
integer(psb_ipk_) :: info
- tmpa => a
+ tmpa => a
if (a%is_dev()) then
call acc_update_self(a%val)
call acc_update_self(a%ja)
diff --git a/openacc/psb_s_oacc_vect_mod.F90 b/openacc/psb_s_oacc_vect_mod.F90
index 7ce4292f..16b45461 100644
--- a/openacc/psb_s_oacc_vect_mod.F90
+++ b/openacc/psb_s_oacc_vect_mod.F90
@@ -15,50 +15,50 @@ module psb_s_oacc_vect_mod
integer :: state = is_host
contains
- procedure, pass(x) :: get_nrows => s_oacc_get_nrows
- procedure, nopass :: get_fmt => s_oacc_get_fmt
-
- procedure, pass(x) :: all => s_oacc_vect_all
- procedure, pass(x) :: zero => s_oacc_zero
- procedure, pass(x) :: asb_m => s_oacc_asb_m
- procedure, pass(x) :: sync => s_oacc_sync
- procedure, pass(x) :: sync_space => s_oacc_sync_space
- procedure, pass(x) :: bld_x => s_oacc_bld_x
- procedure, pass(x) :: bld_mn => s_oacc_bld_mn
- procedure, pass(x) :: free => s_oacc_vect_free
- procedure, pass(x) :: ins_a => s_oacc_ins_a
- procedure, pass(x) :: ins_v => s_oacc_ins_v
- procedure, pass(x) :: is_host => s_oacc_is_host
- procedure, pass(x) :: is_dev => s_oacc_is_dev
- procedure, pass(x) :: is_sync => s_oacc_is_sync
- procedure, pass(x) :: set_host => s_oacc_set_host
- procedure, pass(x) :: set_dev => s_oacc_set_dev
- procedure, pass(x) :: set_sync => s_oacc_set_sync
- procedure, pass(x) :: set_scal => s_oacc_set_scal
-
- procedure, pass(x) :: gthzv_x => s_oacc_gthzv_x
- procedure, pass(x) :: gthzbuf_x => s_oacc_gthzbuf
- procedure, pass(y) :: sctb => s_oacc_sctb
- procedure, pass(y) :: sctb_x => s_oacc_sctb_x
- procedure, pass(y) :: sctb_buf => s_oacc_sctb_buf
-
- procedure, pass(x) :: get_size => s_oacc_get_size
-
- procedure, pass(x) :: dot_v => s_oacc_vect_dot
- procedure, pass(x) :: dot_a => s_oacc_dot_a
- procedure, pass(y) :: axpby_v => s_oacc_axpby_v
- procedure, pass(y) :: axpby_a => s_oacc_axpby_a
- procedure, pass(z) :: upd_xyz => s_oacc_upd_xyz
- procedure, pass(y) :: mlt_a => s_oacc_mlt_a
- procedure, pass(z) :: mlt_a_2 => s_oacc_mlt_a_2
- procedure, pass(y) :: mlt_v => psb_s_oacc_mlt_v
- procedure, pass(z) :: mlt_v_2 => psb_s_oacc_mlt_v_2
- procedure, pass(x) :: scal => s_oacc_scal
- procedure, pass(x) :: nrm2 => s_oacc_nrm2
- procedure, pass(x) :: amax => s_oacc_amax
- procedure, pass(x) :: asum => s_oacc_asum
- procedure, pass(x) :: absval1 => s_oacc_absval1
- procedure, pass(x) :: absval2 => s_oacc_absval2
+ procedure, pass(x) :: get_nrows => s_oacc_get_nrows
+ procedure, nopass :: get_fmt => s_oacc_get_fmt
+
+ procedure, pass(x) :: all => s_oacc_vect_all
+ procedure, pass(x) :: zero => s_oacc_zero
+ procedure, pass(x) :: asb_m => s_oacc_asb_m
+ procedure, pass(x) :: sync => s_oacc_sync
+ procedure, pass(x) :: sync_dev_space => s_oacc_sync_dev_space
+ procedure, pass(x) :: bld_x => s_oacc_bld_x
+ procedure, pass(x) :: bld_mn => s_oacc_bld_mn
+ procedure, pass(x) :: free => s_oacc_vect_free
+ procedure, pass(x) :: ins_a => s_oacc_ins_a
+ procedure, pass(x) :: ins_v => s_oacc_ins_v
+ procedure, pass(x) :: is_host => s_oacc_is_host
+ procedure, pass(x) :: is_dev => s_oacc_is_dev
+ procedure, pass(x) :: is_sync => s_oacc_is_sync
+ procedure, pass(x) :: set_host => s_oacc_set_host
+ procedure, pass(x) :: set_dev => s_oacc_set_dev
+ procedure, pass(x) :: set_sync => s_oacc_set_sync
+ procedure, pass(x) :: set_scal => s_oacc_set_scal
+
+ procedure, pass(x) :: gthzv_x => s_oacc_gthzv_x
+ procedure, pass(x) :: gthzbuf_x => s_oacc_gthzbuf
+ procedure, pass(y) :: sctb => s_oacc_sctb
+ procedure, pass(y) :: sctb_x => s_oacc_sctb_x
+ procedure, pass(y) :: sctb_buf => s_oacc_sctb_buf
+
+ procedure, pass(x) :: get_size => s_oacc_get_size
+
+ procedure, pass(x) :: dot_v => s_oacc_vect_dot
+ procedure, pass(x) :: dot_a => s_oacc_dot_a
+ procedure, pass(y) :: axpby_v => s_oacc_axpby_v
+ procedure, pass(y) :: axpby_a => s_oacc_axpby_a
+ procedure, pass(z) :: upd_xyz => s_oacc_upd_xyz
+ procedure, pass(y) :: mlt_a => s_oacc_mlt_a
+ procedure, pass(z) :: mlt_a_2 => s_oacc_mlt_a_2
+ procedure, pass(y) :: mlt_v => psb_s_oacc_mlt_v
+ procedure, pass(z) :: mlt_v_2 => psb_s_oacc_mlt_v_2
+ procedure, pass(x) :: scal => s_oacc_scal
+ procedure, pass(x) :: nrm2 => s_oacc_nrm2
+ procedure, pass(x) :: amax => s_oacc_amax
+ procedure, pass(x) :: asum => s_oacc_asum
+ procedure, pass(x) :: absval1 => s_oacc_absval1
+ procedure, pass(x) :: absval2 => s_oacc_absval2
final :: s_oacc_final_vect_free
end type psb_s_vect_oacc
@@ -117,7 +117,7 @@ contains
integer(psb_ipk_) :: i
n = min(size(x%v), size(y%v))
- select type (yy => y)
+ select type (yy => y)
class is (psb_s_vect_oacc)
if (x%is_host()) call x%sync()
if (yy%is_host()) call yy%sync()
@@ -287,7 +287,7 @@ contains
info = psb_success_
- select type(xx => x)
+ select type(xx => x)
type is (psb_s_vect_oacc)
if ((beta /= szero) .and. y%is_host()) call y%sync()
if (xx%is_host()) call xx%sync()
@@ -354,11 +354,11 @@ contains
info = psb_success_
gpu_done = .false.
- select type(xx => x)
+ select type(xx => x)
class is (psb_s_vect_oacc)
- select type(yy => y)
+ select type(yy => y)
class is (psb_s_vect_oacc)
- select type(zz => z)
+ select type(zz => z)
class is (psb_s_vect_oacc)
if ((beta /= szero) .and. yy%is_host()) call yy%sync()
if ((delta /= szero) .and. zz%is_host()) call zz%sync()
@@ -405,7 +405,7 @@ contains
return
end if
- select type(ii => idx)
+ select type(ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
if (y%is_host()) call y%sync()
@@ -432,7 +432,7 @@ contains
class(psb_s_vect_oacc) :: y
integer(psb_ipk_) :: info, ni
- select type(ii => idx)
+ select type(ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
class default
@@ -485,7 +485,7 @@ contains
return
end if
- select type(ii => idx)
+ select type(ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
class default
@@ -512,7 +512,7 @@ contains
info = 0
- select type(ii => idx)
+ select type(ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
class default
@@ -544,9 +544,9 @@ contains
if (psb_errstatus_fatal()) return
done_oacc = .false.
- select type(virl => irl)
+ select type(virl => irl)
type is (psb_i_vect_oacc)
- select type(vval => val)
+ select type(vval => val)
type is (psb_s_vect_oacc)
if (vval%is_host()) call vval%sync()
if (virl%is_host()) call virl%sync()
@@ -561,11 +561,11 @@ contains
end select
if (.not.done_oacc) then
- select type(virl => irl)
+ select type(virl => irl)
type is (psb_i_vect_oacc)
if (virl%is_dev()) call virl%sync()
end select
- select type(vval => val)
+ select type(vval => val)
type is (psb_s_vect_oacc)
if (vval%is_dev()) call vval%sync()
end select
@@ -611,7 +611,7 @@ contains
call psb_errpush(info, 's_oacc_bld_mn', i_err=(/n, n, n, n, n/))
end if
call x%set_host()
- call x%sync_space()
+ call x%sync_dev_space()
end subroutine s_oacc_bld_mn
@@ -633,7 +633,7 @@ contains
end if
x%v(:) = this(:)
call x%set_host()
- call x%sync_space()
+ call x%sync_dev_space()
end subroutine s_oacc_bld_x
@@ -719,7 +719,7 @@ contains
res = szero
!write(0,*) 'dot_v'
- select type(yy => y)
+ select type(yy => y)
type is (psb_s_base_vect_type)
if (x%is_dev()) call x%sync()
res = ddot(n, x%v, 1, yy%v, 1)
@@ -762,11 +762,11 @@ contains
end function s_oacc_dot_a
- subroutine s_oacc_sync_space(x)
+ subroutine s_oacc_sync_dev_space(x)
implicit none
class(psb_s_vect_oacc), intent(inout) :: x
if (allocated(x%v)) call acc_create(x%v)
- end subroutine s_oacc_sync_space
+ end subroutine s_oacc_sync_dev_space
subroutine s_oacc_sync(x)
implicit none
@@ -840,7 +840,7 @@ contains
i_err=(/n, n, n, n, n/))
end if
call x%set_host()
- call x%sync_space()
+ call x%sync_dev_space()
end subroutine s_oacc_vect_all
subroutine s_oacc_final_vect_free(x)
diff --git a/openacc/psb_z_oacc_csr_mat_mod.F90 b/openacc/psb_z_oacc_csr_mat_mod.F90
index dbafb391..47973825 100644
--- a/openacc/psb_z_oacc_csr_mat_mod.F90
+++ b/openacc/psb_z_oacc_csr_mat_mod.F90
@@ -13,32 +13,32 @@ module psb_z_oacc_csr_mat_mod
type, extends(psb_z_csr_sparse_mat) :: psb_z_oacc_csr_sparse_mat
integer(psb_ipk_) :: devstate = is_host
contains
- procedure, nopass :: get_fmt => z_oacc_csr_get_fmt
- procedure, pass(a) :: sizeof => z_oacc_csr_sizeof
- procedure, pass(a) :: vect_mv => psb_z_oacc_csr_vect_mv
- procedure, pass(a) :: in_vect_sv => psb_z_oacc_csr_inner_vect_sv
- procedure, pass(a) :: csmm => psb_z_oacc_csr_csmm
- procedure, pass(a) :: csmv => psb_z_oacc_csr_csmv
- procedure, pass(a) :: scals => psb_z_oacc_csr_scals
- procedure, pass(a) :: scalv => psb_z_oacc_csr_scal
- procedure, pass(a) :: reallocate_nz => psb_z_oacc_csr_reallocate_nz
- procedure, pass(a) :: allocate_mnnz => psb_z_oacc_csr_allocate_mnnz
- procedure, pass(a) :: cp_from_coo => psb_z_oacc_csr_cp_from_coo
- procedure, pass(a) :: cp_from_fmt => psb_z_oacc_csr_cp_from_fmt
- procedure, pass(a) :: mv_from_coo => psb_z_oacc_csr_mv_from_coo
- procedure, pass(a) :: mv_from_fmt => psb_z_oacc_csr_mv_from_fmt
- procedure, pass(a) :: free => z_oacc_csr_free
- procedure, pass(a) :: mold => psb_z_oacc_csr_mold
- procedure, pass(a) :: all => z_oacc_csr_all
- procedure, pass(a) :: is_host => z_oacc_csr_is_host
- procedure, pass(a) :: is_sync => z_oacc_csr_is_sync
- procedure, pass(a) :: is_dev => z_oacc_csr_is_dev
- procedure, pass(a) :: set_host => z_oacc_csr_set_host
- procedure, pass(a) :: set_sync => z_oacc_csr_set_sync
- procedure, pass(a) :: set_dev => z_oacc_csr_set_dev
- procedure, pass(a) :: free_space => z_oacc_csr_free_space
- procedure, pass(a) :: sync_space => z_oacc_csr_sync_space
- procedure, pass(a) :: sync => z_oacc_csr_sync
+ procedure, nopass :: get_fmt => z_oacc_csr_get_fmt
+ procedure, pass(a) :: sizeof => z_oacc_csr_sizeof
+ procedure, pass(a) :: vect_mv => psb_z_oacc_csr_vect_mv
+ procedure, pass(a) :: in_vect_sv => psb_z_oacc_csr_inner_vect_sv
+ procedure, pass(a) :: csmm => psb_z_oacc_csr_csmm
+ procedure, pass(a) :: csmv => psb_z_oacc_csr_csmv
+ procedure, pass(a) :: scals => psb_z_oacc_csr_scals
+ procedure, pass(a) :: scalv => psb_z_oacc_csr_scal
+ procedure, pass(a) :: reallocate_nz => psb_z_oacc_csr_reallocate_nz
+ procedure, pass(a) :: allocate_mnnz => psb_z_oacc_csr_allocate_mnnz
+ procedure, pass(a) :: cp_from_coo => psb_z_oacc_csr_cp_from_coo
+ procedure, pass(a) :: cp_from_fmt => psb_z_oacc_csr_cp_from_fmt
+ procedure, pass(a) :: mv_from_coo => psb_z_oacc_csr_mv_from_coo
+ procedure, pass(a) :: mv_from_fmt => psb_z_oacc_csr_mv_from_fmt
+ procedure, pass(a) :: free => z_oacc_csr_free
+ procedure, pass(a) :: mold => psb_z_oacc_csr_mold
+ procedure, pass(a) :: all => z_oacc_csr_all
+ procedure, pass(a) :: is_host => z_oacc_csr_is_host
+ procedure, pass(a) :: is_sync => z_oacc_csr_is_sync
+ procedure, pass(a) :: is_dev => z_oacc_csr_is_dev
+ procedure, pass(a) :: set_host => z_oacc_csr_set_host
+ procedure, pass(a) :: set_sync => z_oacc_csr_set_sync
+ procedure, pass(a) :: set_dev => z_oacc_csr_set_dev
+ procedure, pass(a) :: free_dev_space => z_oacc_csr_free_dev_space
+ procedure, pass(a) :: sync_dev_space => z_oacc_csr_sync_dev_space
+ procedure, pass(a) :: sync => z_oacc_csr_sync
end type psb_z_oacc_csr_sparse_mat
interface
@@ -156,7 +156,7 @@ module psb_z_oacc_csr_mat_mod
contains
- subroutine z_oacc_csr_free_space(a)
+ subroutine z_oacc_csr_free_dev_space(a)
use psb_base_mod
implicit none
class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a
@@ -167,7 +167,7 @@ contains
if (allocated(a%irp)) call acc_delete_finalize(a%irp)
return
- end subroutine z_oacc_csr_free_space
+ end subroutine z_oacc_csr_free_dev_space
subroutine z_oacc_csr_free(a)
use psb_base_mod
@@ -175,7 +175,7 @@ contains
class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
- call a%free_space()
+ call a%free_dev_space()
call a%psb_z_csr_sparse_mat%free()
return
@@ -218,7 +218,7 @@ contains
allocate(a%ja(nz),stat=info)
allocate(a%irp(m+1),stat=info)
if (info == 0) call a%set_host()
- if (info == 0) call a%sync_space()
+ if (info == 0) call a%sync_dev_space()
end subroutine z_oacc_csr_all
function z_oacc_csr_is_host(a) result(res)
@@ -266,13 +266,13 @@ contains
a%devstate = is_dev
end subroutine z_oacc_csr_set_dev
- subroutine z_oacc_csr_sync_space(a)
+ subroutine z_oacc_csr_sync_dev_space(a)
implicit none
class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a
if (allocated(a%val)) call acc_create(a%val)
if (allocated(a%ja)) call acc_create(a%ja)
if (allocated(a%irp)) call acc_create(a%irp)
- end subroutine z_oacc_csr_sync_space
+ end subroutine z_oacc_csr_sync_dev_space
subroutine z_oacc_csr_sync(a)
implicit none
@@ -280,7 +280,7 @@ contains
class(psb_z_oacc_csr_sparse_mat), pointer :: tmpa
integer(psb_ipk_) :: info
- tmpa => a
+ tmpa => a
if (a%is_dev()) then
call acc_update_self(a%val)
call acc_update_self(a%ja)
diff --git a/openacc/psb_z_oacc_ell_mat_mod.F90 b/openacc/psb_z_oacc_ell_mat_mod.F90
index 76b0182d..4013ee9d 100644
--- a/openacc/psb_z_oacc_ell_mat_mod.F90
+++ b/openacc/psb_z_oacc_ell_mat_mod.F90
@@ -12,31 +12,31 @@ module psb_z_oacc_ell_mat_mod
type, extends(psb_z_ell_sparse_mat) :: psb_z_oacc_ell_sparse_mat
integer(psb_ipk_) :: devstate = is_host
contains
- procedure, nopass :: get_fmt => z_oacc_ell_get_fmt
- procedure, pass(a) :: sizeof => z_oacc_ell_sizeof
- procedure, pass(a) :: is_host => z_oacc_ell_is_host
- procedure, pass(a) :: is_sync => z_oacc_ell_is_sync
- procedure, pass(a) :: is_dev => z_oacc_ell_is_dev
- procedure, pass(a) :: set_host => z_oacc_ell_set_host
- procedure, pass(a) :: set_sync => z_oacc_ell_set_sync
- procedure, pass(a) :: set_dev => z_oacc_ell_set_dev
- procedure, pass(a) :: sync_space => z_oacc_ell_sync_space
- procedure, pass(a) :: sync => z_oacc_ell_sync
- procedure, pass(a) :: free_space => z_oacc_ell_free_space
- procedure, pass(a) :: free => z_oacc_ell_free
- procedure, pass(a) :: vect_mv => psb_z_oacc_ell_vect_mv
- procedure, pass(a) :: in_vect_sv => psb_z_oacc_ell_inner_vect_sv
- procedure, pass(a) :: csmm => psb_z_oacc_ell_csmm
- procedure, pass(a) :: csmv => psb_z_oacc_ell_csmv
- procedure, pass(a) :: scals => psb_z_oacc_ell_scals
- procedure, pass(a) :: scalv => psb_z_oacc_ell_scal
- procedure, pass(a) :: reallocate_nz => psb_z_oacc_ell_reallocate_nz
- procedure, pass(a) :: allocate_mnnz => psb_z_oacc_ell_allocate_mnnz
- procedure, pass(a) :: cp_from_coo => psb_z_oacc_ell_cp_from_coo
- procedure, pass(a) :: cp_from_fmt => psb_z_oacc_ell_cp_from_fmt
- procedure, pass(a) :: mv_from_coo => psb_z_oacc_ell_mv_from_coo
- procedure, pass(a) :: mv_from_fmt => psb_z_oacc_ell_mv_from_fmt
- procedure, pass(a) :: mold => psb_z_oacc_ell_mold
+ procedure, nopass :: get_fmt => z_oacc_ell_get_fmt
+ procedure, pass(a) :: sizeof => z_oacc_ell_sizeof
+ procedure, pass(a) :: is_host => z_oacc_ell_is_host
+ procedure, pass(a) :: is_sync => z_oacc_ell_is_sync
+ procedure, pass(a) :: is_dev => z_oacc_ell_is_dev
+ procedure, pass(a) :: set_host => z_oacc_ell_set_host
+ procedure, pass(a) :: set_sync => z_oacc_ell_set_sync
+ procedure, pass(a) :: set_dev => z_oacc_ell_set_dev
+ procedure, pass(a) :: sync_dev_space => z_oacc_ell_sync_dev_space
+ procedure, pass(a) :: sync => z_oacc_ell_sync
+ procedure, pass(a) :: free_dev_space => z_oacc_ell_free_dev_space
+ procedure, pass(a) :: free => z_oacc_ell_free
+ procedure, pass(a) :: vect_mv => psb_z_oacc_ell_vect_mv
+ procedure, pass(a) :: in_vect_sv => psb_z_oacc_ell_inner_vect_sv
+ procedure, pass(a) :: csmm => psb_z_oacc_ell_csmm
+ procedure, pass(a) :: csmv => psb_z_oacc_ell_csmv
+ procedure, pass(a) :: scals => psb_z_oacc_ell_scals
+ procedure, pass(a) :: scalv => psb_z_oacc_ell_scal
+ procedure, pass(a) :: reallocate_nz => psb_z_oacc_ell_reallocate_nz
+ procedure, pass(a) :: allocate_mnnz => psb_z_oacc_ell_allocate_mnnz
+ procedure, pass(a) :: cp_from_coo => psb_z_oacc_ell_cp_from_coo
+ procedure, pass(a) :: cp_from_fmt => psb_z_oacc_ell_cp_from_fmt
+ procedure, pass(a) :: mv_from_coo => psb_z_oacc_ell_mv_from_coo
+ procedure, pass(a) :: mv_from_fmt => psb_z_oacc_ell_mv_from_fmt
+ procedure, pass(a) :: mold => psb_z_oacc_ell_mold
end type psb_z_oacc_ell_sparse_mat
@@ -154,7 +154,7 @@ module psb_z_oacc_ell_mat_mod
contains
- subroutine z_oacc_ell_free_space(a)
+ subroutine z_oacc_ell_free_dev_space(a)
use psb_base_mod
implicit none
class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a
@@ -166,7 +166,7 @@ contains
if (allocated(a%idiag)) call acc_delete_finalize(a%idiag)
return
- end subroutine z_oacc_ell_free_space
+ end subroutine z_oacc_ell_free_dev_space
subroutine z_oacc_ell_free(a)
use psb_base_mod
@@ -174,7 +174,7 @@ contains
class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
- call a%free_space()
+ call a%free_dev_space()
call a%psb_z_ell_sparse_mat%free()
return
@@ -195,7 +195,7 @@ contains
end function z_oacc_ell_sizeof
- subroutine z_oacc_ell_sync_space(a)
+ subroutine z_oacc_ell_sync_dev_space(a)
implicit none
class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a
@@ -203,7 +203,7 @@ contains
if (allocated(a%ja)) call acc_create(a%ja)
if (allocated(a%irn)) call acc_create(a%irn)
if (allocated(a%idiag)) call acc_create(a%idiag)
- end subroutine z_oacc_ell_sync_space
+ end subroutine z_oacc_ell_sync_dev_space
function z_oacc_ell_is_host(a) result(res)
implicit none
@@ -262,7 +262,7 @@ contains
class(psb_z_oacc_ell_sparse_mat), pointer :: tmpa
integer(psb_ipk_) :: info
- tmpa => a
+ tmpa => a
if (a%is_dev()) then
call acc_update_self(a%val)
call acc_update_self(a%ja)
diff --git a/openacc/psb_z_oacc_hll_mat_mod.F90 b/openacc/psb_z_oacc_hll_mat_mod.F90
index 4a657fd1..289b78ac 100644
--- a/openacc/psb_z_oacc_hll_mat_mod.F90
+++ b/openacc/psb_z_oacc_hll_mat_mod.F90
@@ -12,31 +12,31 @@ module psb_z_oacc_hll_mat_mod
type, extends(psb_z_hll_sparse_mat) :: psb_z_oacc_hll_sparse_mat
integer(psb_ipk_) :: devstate = is_host
contains
- procedure, nopass :: get_fmt => z_oacc_hll_get_fmt
- procedure, pass(a) :: sizeof => z_oacc_hll_sizeof
- procedure, pass(a) :: is_host => z_oacc_hll_is_host
- procedure, pass(a) :: is_sync => z_oacc_hll_is_sync
- procedure, pass(a) :: is_dev => z_oacc_hll_is_dev
- procedure, pass(a) :: set_host => z_oacc_hll_set_host
- procedure, pass(a) :: set_sync => z_oacc_hll_set_sync
- procedure, pass(a) :: set_dev => z_oacc_hll_set_dev
- procedure, pass(a) :: sync_space => z_oacc_hll_sync_space
- procedure, pass(a) :: sync => z_oacc_hll_sync
- procedure, pass(a) :: free_space => z_oacc_hll_free_space
- procedure, pass(a) :: free => z_oacc_hll_free
- procedure, pass(a) :: vect_mv => psb_z_oacc_hll_vect_mv
- procedure, pass(a) :: in_vect_sv => psb_z_oacc_hll_inner_vect_sv
- procedure, pass(a) :: csmm => psb_z_oacc_hll_csmm
- procedure, pass(a) :: csmv => psb_z_oacc_hll_csmv
- procedure, pass(a) :: scals => psb_z_oacc_hll_scals
- procedure, pass(a) :: scalv => psb_z_oacc_hll_scal
- procedure, pass(a) :: reallocate_nz => psb_z_oacc_hll_reallocate_nz
- procedure, pass(a) :: allocate_mnnz => psb_z_oacc_hll_allocate_mnnz
- procedure, pass(a) :: cp_from_coo => psb_z_oacc_hll_cp_from_coo
- procedure, pass(a) :: cp_from_fmt => psb_z_oacc_hll_cp_from_fmt
- procedure, pass(a) :: mv_from_coo => psb_z_oacc_hll_mv_from_coo
- procedure, pass(a) :: mv_from_fmt => psb_z_oacc_hll_mv_from_fmt
- procedure, pass(a) :: mold => psb_z_oacc_hll_mold
+ procedure, nopass :: get_fmt => z_oacc_hll_get_fmt
+ procedure, pass(a) :: sizeof => z_oacc_hll_sizeof
+ procedure, pass(a) :: is_host => z_oacc_hll_is_host
+ procedure, pass(a) :: is_sync => z_oacc_hll_is_sync
+ procedure, pass(a) :: is_dev => z_oacc_hll_is_dev
+ procedure, pass(a) :: set_host => z_oacc_hll_set_host
+ procedure, pass(a) :: set_sync => z_oacc_hll_set_sync
+ procedure, pass(a) :: set_dev => z_oacc_hll_set_dev
+ procedure, pass(a) :: sync_dev_space => z_oacc_hll_sync_dev_space
+ procedure, pass(a) :: sync => z_oacc_hll_sync
+ procedure, pass(a) :: free_dev_space => z_oacc_hll_free_dev_space
+ procedure, pass(a) :: free => z_oacc_hll_free
+ procedure, pass(a) :: vect_mv => psb_z_oacc_hll_vect_mv
+ procedure, pass(a) :: in_vect_sv => psb_z_oacc_hll_inner_vect_sv
+ procedure, pass(a) :: csmm => psb_z_oacc_hll_csmm
+ procedure, pass(a) :: csmv => psb_z_oacc_hll_csmv
+ procedure, pass(a) :: scals => psb_z_oacc_hll_scals
+ procedure, pass(a) :: scalv => psb_z_oacc_hll_scal
+ procedure, pass(a) :: reallocate_nz => psb_z_oacc_hll_reallocate_nz
+ procedure, pass(a) :: allocate_mnnz => psb_z_oacc_hll_allocate_mnnz
+ procedure, pass(a) :: cp_from_coo => psb_z_oacc_hll_cp_from_coo
+ procedure, pass(a) :: cp_from_fmt => psb_z_oacc_hll_cp_from_fmt
+ procedure, pass(a) :: mv_from_coo => psb_z_oacc_hll_mv_from_coo
+ procedure, pass(a) :: mv_from_fmt => psb_z_oacc_hll_mv_from_fmt
+ procedure, pass(a) :: mold => psb_z_oacc_hll_mold
end type psb_z_oacc_hll_sparse_mat
@@ -154,7 +154,7 @@ module psb_z_oacc_hll_mat_mod
contains
- subroutine z_oacc_hll_free_space(a)
+ subroutine z_oacc_hll_free_dev_space(a)
use psb_base_mod
implicit none
class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a
@@ -167,7 +167,7 @@ contains
if (allocated(a%hkoffs)) call acc_delete_finalize(a%hkoffs)
return
- end subroutine z_oacc_hll_free_space
+ end subroutine z_oacc_hll_free_dev_space
subroutine z_oacc_hll_free(a)
use psb_base_mod
@@ -175,7 +175,7 @@ contains
class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
- call a%free_space()
+ call a%free_dev_space()
call a%psb_z_hll_sparse_mat%free()
return
@@ -249,7 +249,7 @@ contains
res = 'HLLOA'
end function z_oacc_hll_get_fmt
- subroutine z_oacc_hll_sync_space(a)
+ subroutine z_oacc_hll_sync_dev_space(a)
implicit none
class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a
@@ -258,7 +258,7 @@ contains
if (allocated(a%irn)) call acc_create(a%irn)
if (allocated(a%idiag)) call acc_create(a%idiag)
if (allocated(a%hkoffs)) call acc_create(a%hkoffs)
- end subroutine z_oacc_hll_sync_space
+ end subroutine z_oacc_hll_sync_dev_space
subroutine z_oacc_hll_sync(a)
@@ -267,7 +267,7 @@ contains
class(psb_z_oacc_hll_sparse_mat), pointer :: tmpa
integer(psb_ipk_) :: info
- tmpa => a
+ tmpa => a
if (a%is_dev()) then
call acc_update_self(a%val)
call acc_update_self(a%ja)
diff --git a/openacc/psb_z_oacc_vect_mod.F90 b/openacc/psb_z_oacc_vect_mod.F90
index 5d6f07be..9e6bbb2d 100644
--- a/openacc/psb_z_oacc_vect_mod.F90
+++ b/openacc/psb_z_oacc_vect_mod.F90
@@ -15,50 +15,50 @@ module psb_z_oacc_vect_mod
integer :: state = is_host
contains
- procedure, pass(x) :: get_nrows => z_oacc_get_nrows
- procedure, nopass :: get_fmt => z_oacc_get_fmt
-
- procedure, pass(x) :: all => z_oacc_vect_all
- procedure, pass(x) :: zero => z_oacc_zero
- procedure, pass(x) :: asb_m => z_oacc_asb_m
- procedure, pass(x) :: sync => z_oacc_sync
- procedure, pass(x) :: sync_space => z_oacc_sync_space
- procedure, pass(x) :: bld_x => z_oacc_bld_x
- procedure, pass(x) :: bld_mn => z_oacc_bld_mn
- procedure, pass(x) :: free => z_oacc_vect_free
- procedure, pass(x) :: ins_a => z_oacc_ins_a
- procedure, pass(x) :: ins_v => z_oacc_ins_v
- procedure, pass(x) :: is_host => z_oacc_is_host
- procedure, pass(x) :: is_dev => z_oacc_is_dev
- procedure, pass(x) :: is_sync => z_oacc_is_sync
- procedure, pass(x) :: set_host => z_oacc_set_host
- procedure, pass(x) :: set_dev => z_oacc_set_dev
- procedure, pass(x) :: set_sync => z_oacc_set_sync
- procedure, pass(x) :: set_scal => z_oacc_set_scal
-
- procedure, pass(x) :: gthzv_x => z_oacc_gthzv_x
- procedure, pass(x) :: gthzbuf_x => z_oacc_gthzbuf
- procedure, pass(y) :: sctb => z_oacc_sctb
- procedure, pass(y) :: sctb_x => z_oacc_sctb_x
- procedure, pass(y) :: sctb_buf => z_oacc_sctb_buf
-
- procedure, pass(x) :: get_size => z_oacc_get_size
-
- procedure, pass(x) :: dot_v => z_oacc_vect_dot
- procedure, pass(x) :: dot_a => z_oacc_dot_a
- procedure, pass(y) :: axpby_v => z_oacc_axpby_v
- procedure, pass(y) :: axpby_a => z_oacc_axpby_a
- procedure, pass(z) :: upd_xyz => z_oacc_upd_xyz
- procedure, pass(y) :: mlt_a => z_oacc_mlt_a
- procedure, pass(z) :: mlt_a_2 => z_oacc_mlt_a_2
- procedure, pass(y) :: mlt_v => psb_z_oacc_mlt_v
- procedure, pass(z) :: mlt_v_2 => psb_z_oacc_mlt_v_2
- procedure, pass(x) :: scal => z_oacc_scal
- procedure, pass(x) :: nrm2 => z_oacc_nrm2
- procedure, pass(x) :: amax => z_oacc_amax
- procedure, pass(x) :: asum => z_oacc_asum
- procedure, pass(x) :: absval1 => z_oacc_absval1
- procedure, pass(x) :: absval2 => z_oacc_absval2
+ procedure, pass(x) :: get_nrows => z_oacc_get_nrows
+ procedure, nopass :: get_fmt => z_oacc_get_fmt
+
+ procedure, pass(x) :: all => z_oacc_vect_all
+ procedure, pass(x) :: zero => z_oacc_zero
+ procedure, pass(x) :: asb_m => z_oacc_asb_m
+ procedure, pass(x) :: sync => z_oacc_sync
+ procedure, pass(x) :: sync_dev_space => z_oacc_sync_dev_space
+ procedure, pass(x) :: bld_x => z_oacc_bld_x
+ procedure, pass(x) :: bld_mn => z_oacc_bld_mn
+ procedure, pass(x) :: free => z_oacc_vect_free
+ procedure, pass(x) :: ins_a => z_oacc_ins_a
+ procedure, pass(x) :: ins_v => z_oacc_ins_v
+ procedure, pass(x) :: is_host => z_oacc_is_host
+ procedure, pass(x) :: is_dev => z_oacc_is_dev
+ procedure, pass(x) :: is_sync => z_oacc_is_sync
+ procedure, pass(x) :: set_host => z_oacc_set_host
+ procedure, pass(x) :: set_dev => z_oacc_set_dev
+ procedure, pass(x) :: set_sync => z_oacc_set_sync
+ procedure, pass(x) :: set_scal => z_oacc_set_scal
+
+ procedure, pass(x) :: gthzv_x => z_oacc_gthzv_x
+ procedure, pass(x) :: gthzbuf_x => z_oacc_gthzbuf
+ procedure, pass(y) :: sctb => z_oacc_sctb
+ procedure, pass(y) :: sctb_x => z_oacc_sctb_x
+ procedure, pass(y) :: sctb_buf => z_oacc_sctb_buf
+
+ procedure, pass(x) :: get_size => z_oacc_get_size
+
+ procedure, pass(x) :: dot_v => z_oacc_vect_dot
+ procedure, pass(x) :: dot_a => z_oacc_dot_a
+ procedure, pass(y) :: axpby_v => z_oacc_axpby_v
+ procedure, pass(y) :: axpby_a => z_oacc_axpby_a
+ procedure, pass(z) :: upd_xyz => z_oacc_upd_xyz
+ procedure, pass(y) :: mlt_a => z_oacc_mlt_a
+ procedure, pass(z) :: mlt_a_2 => z_oacc_mlt_a_2
+ procedure, pass(y) :: mlt_v => psb_z_oacc_mlt_v
+ procedure, pass(z) :: mlt_v_2 => psb_z_oacc_mlt_v_2
+ procedure, pass(x) :: scal => z_oacc_scal
+ procedure, pass(x) :: nrm2 => z_oacc_nrm2
+ procedure, pass(x) :: amax => z_oacc_amax
+ procedure, pass(x) :: asum => z_oacc_asum
+ procedure, pass(x) :: absval1 => z_oacc_absval1
+ procedure, pass(x) :: absval2 => z_oacc_absval2
final :: z_oacc_final_vect_free
end type psb_z_vect_oacc
@@ -117,7 +117,7 @@ contains
integer(psb_ipk_) :: i
n = min(size(x%v), size(y%v))
- select type (yy => y)
+ select type (yy => y)
class is (psb_z_vect_oacc)
if (x%is_host()) call x%sync()
if (yy%is_host()) call yy%sync()
@@ -287,7 +287,7 @@ contains
info = psb_success_
- select type(xx => x)
+ select type(xx => x)
type is (psb_z_vect_oacc)
if ((beta /= zzero) .and. y%is_host()) call y%sync()
if (xx%is_host()) call xx%sync()
@@ -354,11 +354,11 @@ contains
info = psb_success_
gpu_done = .false.
- select type(xx => x)
+ select type(xx => x)
class is (psb_z_vect_oacc)
- select type(yy => y)
+ select type(yy => y)
class is (psb_z_vect_oacc)
- select type(zz => z)
+ select type(zz => z)
class is (psb_z_vect_oacc)
if ((beta /= zzero) .and. yy%is_host()) call yy%sync()
if ((delta /= zzero) .and. zz%is_host()) call zz%sync()
@@ -405,7 +405,7 @@ contains
return
end if
- select type(ii => idx)
+ select type(ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
if (y%is_host()) call y%sync()
@@ -432,7 +432,7 @@ contains
class(psb_z_vect_oacc) :: y
integer(psb_ipk_) :: info, ni
- select type(ii => idx)
+ select type(ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
class default
@@ -485,7 +485,7 @@ contains
return
end if
- select type(ii => idx)
+ select type(ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
class default
@@ -512,7 +512,7 @@ contains
info = 0
- select type(ii => idx)
+ select type(ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
class default
@@ -544,9 +544,9 @@ contains
if (psb_errstatus_fatal()) return
done_oacc = .false.
- select type(virl => irl)
+ select type(virl => irl)
type is (psb_i_vect_oacc)
- select type(vval => val)
+ select type(vval => val)
type is (psb_z_vect_oacc)
if (vval%is_host()) call vval%sync()
if (virl%is_host()) call virl%sync()
@@ -561,11 +561,11 @@ contains
end select
if (.not.done_oacc) then
- select type(virl => irl)
+ select type(virl => irl)
type is (psb_i_vect_oacc)
if (virl%is_dev()) call virl%sync()
end select
- select type(vval => val)
+ select type(vval => val)
type is (psb_z_vect_oacc)
if (vval%is_dev()) call vval%sync()
end select
@@ -611,7 +611,7 @@ contains
call psb_errpush(info, 'z_oacc_bld_mn', i_err=(/n, n, n, n, n/))
end if
call x%set_host()
- call x%sync_space()
+ call x%sync_dev_space()
end subroutine z_oacc_bld_mn
@@ -633,7 +633,7 @@ contains
end if
x%v(:) = this(:)
call x%set_host()
- call x%sync_space()
+ call x%sync_dev_space()
end subroutine z_oacc_bld_x
@@ -719,7 +719,7 @@ contains
res = zzero
!write(0,*) 'dot_v'
- select type(yy => y)
+ select type(yy => y)
type is (psb_z_base_vect_type)
if (x%is_dev()) call x%sync()
res = ddot(n, x%v, 1, yy%v, 1)
@@ -762,11 +762,11 @@ contains
end function z_oacc_dot_a
- subroutine z_oacc_sync_space(x)
+ subroutine z_oacc_sync_dev_space(x)
implicit none
class(psb_z_vect_oacc), intent(inout) :: x
if (allocated(x%v)) call acc_create(x%v)
- end subroutine z_oacc_sync_space
+ end subroutine z_oacc_sync_dev_space
subroutine z_oacc_sync(x)
implicit none
@@ -840,7 +840,7 @@ contains
i_err=(/n, n, n, n, n/))
end if
call x%set_host()
- call x%sync_space()
+ call x%sync_dev_space()
end subroutine z_oacc_vect_all
subroutine z_oacc_final_vect_free(x)
From f783478df3a7d5398515cc30c52bef42743f9296 Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Fri, 30 Aug 2024 16:03:04 +0200
Subject: [PATCH 36/86] Merge updates from V4
---
openacc/impl/Makefile | 136 +++++++++++++++++++-----
openacc/impl/psb_c_oacc_csr_vect_mv.F90 | 37 +++++--
openacc/impl/psb_c_oacc_ell_vect_mv.F90 | 41 +++++--
openacc/impl/psb_c_oacc_hll_vect_mv.F90 | 37 +++++--
openacc/impl/psb_c_oacc_mlt_v_2.f90 | 37 ++-----
openacc/impl/psb_d_oacc_csr_vect_mv.F90 | 37 +++++--
openacc/impl/psb_d_oacc_ell_vect_mv.F90 | 41 +++++--
openacc/impl/psb_d_oacc_hll_vect_mv.F90 | 37 +++++--
openacc/impl/psb_d_oacc_mlt_v_2.f90 | 37 ++-----
openacc/impl/psb_s_oacc_csr_vect_mv.F90 | 37 +++++--
openacc/impl/psb_s_oacc_ell_vect_mv.F90 | 41 +++++--
openacc/impl/psb_s_oacc_hll_vect_mv.F90 | 37 +++++--
openacc/impl/psb_s_oacc_mlt_v_2.f90 | 37 ++-----
openacc/impl/psb_z_oacc_csr_vect_mv.F90 | 37 +++++--
openacc/impl/psb_z_oacc_ell_vect_mv.F90 | 41 +++++--
openacc/impl/psb_z_oacc_hll_vect_mv.F90 | 37 +++++--
openacc/impl/psb_z_oacc_mlt_v_2.f90 | 37 ++-----
openacc/psb_c_oacc_csr_mat_mod.F90 | 23 ----
openacc/psb_c_oacc_ell_mat_mod.F90 | 22 ----
openacc/psb_c_oacc_hll_mat_mod.F90 | 22 ----
openacc/psb_d_oacc_csr_mat_mod.F90 | 23 ----
openacc/psb_d_oacc_ell_mat_mod.F90 | 22 ----
openacc/psb_d_oacc_hll_mat_mod.F90 | 22 ----
openacc/psb_s_oacc_csr_mat_mod.F90 | 23 ----
openacc/psb_s_oacc_ell_mat_mod.F90 | 22 ----
openacc/psb_s_oacc_hll_mat_mod.F90 | 22 ----
openacc/psb_z_oacc_csr_mat_mod.F90 | 23 ----
openacc/psb_z_oacc_ell_mat_mod.F90 | 22 ----
openacc/psb_z_oacc_hll_mat_mod.F90 | 22 ----
29 files changed, 512 insertions(+), 500 deletions(-)
diff --git a/openacc/impl/Makefile b/openacc/impl/Makefile
index 244f23b1..e460598b 100755
--- a/openacc/impl/Makefile
+++ b/openacc/impl/Makefile
@@ -13,8 +13,6 @@ LIBNAME=libpsb_openacc.a
OBJS= \
psb_s_oacc_csr_vect_mv.o \
psb_s_oacc_csr_inner_vect_sv.o \
-psb_s_oacc_csr_csmm.o \
-psb_s_oacc_csr_csmv.o \
psb_s_oacc_csr_scals.o \
psb_s_oacc_csr_scal.o \
psb_s_oacc_csr_allocate_mnnz.o \
@@ -28,8 +26,6 @@ psb_s_oacc_mlt_v_2.o \
psb_s_oacc_mlt_v.o \
psb_d_oacc_csr_vect_mv.o \
psb_d_oacc_csr_inner_vect_sv.o \
-psb_d_oacc_csr_csmm.o \
-psb_d_oacc_csr_csmv.o \
psb_d_oacc_csr_scals.o \
psb_d_oacc_csr_scal.o \
psb_d_oacc_csr_allocate_mnnz.o \
@@ -43,8 +39,6 @@ psb_d_oacc_mlt_v_2.o \
psb_d_oacc_mlt_v.o \
psb_c_oacc_csr_vect_mv.o \
psb_c_oacc_csr_inner_vect_sv.o \
-psb_c_oacc_csr_csmm.o \
-psb_c_oacc_csr_csmv.o \
psb_c_oacc_csr_scals.o \
psb_c_oacc_csr_scal.o \
psb_c_oacc_csr_allocate_mnnz.o \
@@ -58,8 +52,6 @@ psb_c_oacc_mlt_v_2.o \
psb_c_oacc_mlt_v.o \
psb_z_oacc_csr_vect_mv.o \
psb_z_oacc_csr_inner_vect_sv.o \
-psb_z_oacc_csr_csmm.o \
-psb_z_oacc_csr_csmv.o \
psb_z_oacc_csr_scals.o \
psb_z_oacc_csr_scal.o \
psb_z_oacc_csr_allocate_mnnz.o \
@@ -73,8 +65,6 @@ psb_z_oacc_mlt_v_2.o \
psb_z_oacc_mlt_v.o \
psb_s_oacc_ell_vect_mv.o \
psb_s_oacc_ell_inner_vect_sv.o \
-psb_s_oacc_ell_csmm.o \
-psb_s_oacc_ell_csmv.o \
psb_s_oacc_ell_scals.o \
psb_s_oacc_ell_scal.o \
psb_s_oacc_ell_reallocate_nz.o \
@@ -93,14 +83,10 @@ psb_s_oacc_hll_allocate_mnnz.o \
psb_s_oacc_hll_reallocate_nz.o \
psb_s_oacc_hll_scal.o \
psb_s_oacc_hll_scals.o \
-psb_s_oacc_hll_csmv.o \
-psb_s_oacc_hll_csmm.o \
psb_s_oacc_hll_inner_vect_sv.o \
psb_s_oacc_hll_vect_mv.o \
psb_d_oacc_ell_vect_mv.o \
psb_d_oacc_ell_inner_vect_sv.o \
-psb_d_oacc_ell_csmm.o \
-psb_d_oacc_ell_csmv.o \
psb_d_oacc_ell_scals.o \
psb_d_oacc_ell_scal.o \
psb_d_oacc_ell_reallocate_nz.o \
@@ -119,14 +105,10 @@ psb_d_oacc_hll_allocate_mnnz.o \
psb_d_oacc_hll_reallocate_nz.o \
psb_d_oacc_hll_scal.o \
psb_d_oacc_hll_scals.o \
-psb_d_oacc_hll_csmv.o \
-psb_d_oacc_hll_csmm.o \
psb_d_oacc_hll_inner_vect_sv.o \
psb_d_oacc_hll_vect_mv.o \
psb_c_oacc_ell_vect_mv.o \
psb_c_oacc_ell_inner_vect_sv.o \
-psb_c_oacc_ell_csmm.o \
-psb_c_oacc_ell_csmv.o \
psb_c_oacc_ell_scals.o \
psb_c_oacc_ell_scal.o \
psb_c_oacc_ell_reallocate_nz.o \
@@ -145,14 +127,10 @@ psb_c_oacc_hll_allocate_mnnz.o \
psb_c_oacc_hll_reallocate_nz.o \
psb_c_oacc_hll_scal.o \
psb_c_oacc_hll_scals.o \
-psb_c_oacc_hll_csmv.o \
-psb_c_oacc_hll_csmm.o \
psb_c_oacc_hll_inner_vect_sv.o \
psb_c_oacc_hll_vect_mv.o \
psb_z_oacc_ell_vect_mv.o \
psb_z_oacc_ell_inner_vect_sv.o \
-psb_z_oacc_ell_csmm.o \
-psb_z_oacc_ell_csmv.o \
psb_z_oacc_ell_scals.o \
psb_z_oacc_ell_scal.o \
psb_z_oacc_ell_reallocate_nz.o \
@@ -171,25 +149,127 @@ psb_z_oacc_hll_allocate_mnnz.o \
psb_z_oacc_hll_reallocate_nz.o \
psb_z_oacc_hll_scal.o \
psb_z_oacc_hll_scals.o \
-psb_z_oacc_hll_csmv.o \
-psb_z_oacc_hll_csmm.o \
+psb_z_oacc_hll_inner_vect_sv.o \
+psb_z_oacc_hll_vect_mv.o \
+psb_z_oacc_ell_vect_mv.o \
+psb_z_oacc_ell_inner_vect_sv.o \
+psb_z_oacc_ell_scals.o \
+psb_z_oacc_ell_scal.o \
+psb_z_oacc_ell_reallocate_nz.o \
+psb_z_oacc_ell_allocate_mnnz.o \
+psb_z_oacc_ell_cp_from_coo.o \
+psb_z_oacc_ell_cp_from_fmt.o \
+psb_z_oacc_ell_mv_from_coo.o \
+psb_z_oacc_ell_mv_from_fmt.o \
+psb_z_oacc_ell_mold.o \
+psb_z_oacc_hll_mold.o \
+psb_z_oacc_hll_mv_from_fmt.o \
+psb_z_oacc_hll_mv_from_coo.o \
+psb_z_oacc_hll_cp_from_fmt.o \
+psb_z_oacc_hll_cp_from_coo.o \
+psb_z_oacc_hll_allocate_mnnz.o \
+psb_z_oacc_hll_reallocate_nz.o \
+psb_z_oacc_hll_scal.o \
+psb_z_oacc_hll_scals.o \
psb_z_oacc_hll_inner_vect_sv.o \
psb_z_oacc_hll_vect_mv.o
+
objs: $(OBJS)
lib: objs
ar cur ../$(LIBNAME) $(OBJS)
+psb_s_oacc_csr_vect_mv.o psb_s_oacc_csr_inner_vect_sv.o \
+ psb_s_oacc_csr_scals.o \
+ psb_s_oacc_csr_scal.o psb_s_oacc_csr_allocate_mnnz.o \
+ psb_s_oacc_csr_reallocate_nz.o psb_s_oacc_csr_cp_from_coo.o \
+ psb_s_oacc_csr_cp_from_fmt.o psb_s_oacc_csr_mv_from_coo.o \
+ psb_s_oacc_csr_mv_from_fmt.o psb_s_oacc_csr_mold.o: $(UP)/psb_s_oacc_csr_mat_mod.o $(UP)/psb_s_oacc_vect_mod.o
-#psb_d_oacc_csr_vect_mv.o psb_d_oacc_csr_inner_vect_sv.o \
- psb_d_oacc_csr_csmm.o psb_d_oacc_csr_csmv.o psb_d_oacc_csr_scals.o \
+psb_s_oacc_ell_vect_mv.o psb_s_oacc_ell_inner_vect_sv.o \
+ psb_s_oacc_ell_scals.o \
+ psb_s_oacc_ell_scal.o psb_s_oacc_ell_allocate_mnnz.o \
+ psb_s_oacc_ell_reallocate_nz.o psb_s_oacc_ell_cp_from_coo.o \
+ psb_s_oacc_ell_cp_from_fmt.o psb_s_oacc_ell_mv_from_coo.o \
+ psb_s_oacc_ell_mv_from_fmt.o psb_s_oacc_ell_mold.o: $(UP)/psb_s_oacc_ell_mat_mod.o $(UP)/psb_s_oacc_vect_mod.o
+
+psb_s_oacc_hll_vect_mv.o psb_s_oacc_hll_inner_vect_sv.o \
+ psb_s_oacc_hll_scals.o \
+ psb_s_oacc_hll_scal.o psb_s_oacc_hll_allocate_mnnz.o \
+ psb_s_oacc_hll_reallocate_nz.o psb_s_oacc_hll_cp_from_coo.o \
+ psb_s_oacc_hll_cp_from_fmt.o psb_s_oacc_hll_mv_from_coo.o \
+ psb_s_oacc_hll_mv_from_fmt.o psb_s_oacc_hll_mold.o: $(UP)/psb_s_oacc_hll_mat_mod.o $(UP)/psb_s_oacc_vect_mod.o
+
+
+psb_d_oacc_csr_vect_mv.o psb_d_oacc_csr_inner_vect_sv.o \
+ psb_d_oacc_csr_scals.o \
psb_d_oacc_csr_scal.o psb_d_oacc_csr_allocate_mnnz.o \
psb_d_oacc_csr_reallocate_nz.o psb_d_oacc_csr_cp_from_coo.o \
psb_d_oacc_csr_cp_from_fmt.o psb_d_oacc_csr_mv_from_coo.o \
- psb_d_oacc_csr_mv_from_fmt.o psb_d_oacc_csr_mold.o: $(UP)/psb_d_oacc_csr_mat_mod.o
+ psb_d_oacc_csr_mv_from_fmt.o psb_d_oacc_csr_mold.o: $(UP)/psb_d_oacc_csr_mat_mod.o $(UP)/psb_d_oacc_vect_mod.o
+
+psb_d_oacc_ell_vect_mv.o psb_d_oacc_ell_inner_vect_sv.o \
+ psb_d_oacc_ell_scals.o \
+ psb_d_oacc_ell_scal.o psb_d_oacc_ell_allocate_mnnz.o \
+ psb_d_oacc_ell_reallocate_nz.o psb_d_oacc_ell_cp_from_coo.o \
+ psb_d_oacc_ell_cp_from_fmt.o psb_d_oacc_ell_mv_from_coo.o \
+ psb_d_oacc_ell_mv_from_fmt.o psb_d_oacc_ell_mold.o: $(UP)/psb_d_oacc_ell_mat_mod.o $(UP)/psb_d_oacc_vect_mod.o
+
+psb_d_oacc_hll_vect_mv.o psb_d_oacc_hll_inner_vect_sv.o \
+ psb_d_oacc_hll_scals.o \
+ psb_d_oacc_hll_scal.o psb_d_oacc_hll_allocate_mnnz.o \
+ psb_d_oacc_hll_reallocate_nz.o psb_d_oacc_hll_cp_from_coo.o \
+ psb_d_oacc_hll_cp_from_fmt.o psb_d_oacc_hll_mv_from_coo.o \
+ psb_d_oacc_hll_mv_from_fmt.o psb_d_oacc_hll_mold.o: $(UP)/psb_d_oacc_hll_mat_mod.o $(UP)/psb_d_oacc_vect_mod.o
+
+psb_c_oacc_csr_vect_mv.o psb_c_oacc_csr_inner_vect_sv.o \
+ psb_c_oacc_csr_scals.o \
+ psb_c_oacc_csr_scal.o psb_c_oacc_csr_allocate_mnnz.o \
+ psb_c_oacc_csr_reallocate_nz.o psb_c_oacc_csr_cp_from_coo.o \
+ psb_c_oacc_csr_cp_from_fmt.o psb_c_oacc_csr_mv_from_coo.o \
+ psb_c_oacc_csr_mv_from_fmt.o psb_c_oacc_csr_mold.o: $(UP)/psb_c_oacc_csr_mat_mod.o $(UP)/psb_c_oacc_vect_mod.o
+
+psb_c_oacc_ell_vect_mv.o psb_c_oacc_ell_inner_vect_sv.o \
+ psb_c_oacc_ell_scals.o \
+ psb_c_oacc_ell_scal.o psb_c_oacc_ell_allocate_mnnz.o \
+ psb_c_oacc_ell_reallocate_nz.o psb_c_oacc_ell_cp_from_coo.o \
+ psb_c_oacc_ell_cp_from_fmt.o psb_c_oacc_ell_mv_from_coo.o \
+ psb_c_oacc_ell_mv_from_fmt.o psb_c_oacc_ell_mold.o: $(UP)/psb_c_oacc_ell_mat_mod.o $(UP)/psb_c_oacc_vect_mod.o
+
+psb_c_oacc_hll_vect_mv.o psb_c_oacc_hll_inner_vect_sv.o \
+ psb_c_oacc_hll_scals.o \
+ psb_c_oacc_hll_scal.o psb_c_oacc_hll_allocate_mnnz.o \
+ psb_c_oacc_hll_reallocate_nz.o psb_c_oacc_hll_cp_from_coo.o \
+ psb_c_oacc_hll_cp_from_fmt.o psb_c_oacc_hll_mv_from_coo.o \
+ psb_c_oacc_hll_mv_from_fmt.o psb_c_oacc_hll_mold.o: $(UP)/psb_c_oacc_hll_mat_mod.o $(UP)/psb_c_oacc_vect_mod.o
+
+
+psb_z_oacc_csr_vect_mv.o psb_z_oacc_csr_inner_vect_sv.o \
+ psb_z_oacc_csr_scals.o \
+ psb_z_oacc_csr_scal.o psb_z_oacc_csr_allocate_mnnz.o \
+ psb_z_oacc_csr_reallocate_nz.o psb_z_oacc_csr_cp_from_coo.o \
+ psb_z_oacc_csr_cp_from_fmt.o psb_z_oacc_csr_mv_from_coo.o \
+ psb_z_oacc_csr_mv_from_fmt.o psb_z_oacc_csr_mold.o: $(UP)/psb_z_oacc_csr_mat_mod.o $(UP)/psb_z_oacc_vect_mod.o
+
+psb_z_oacc_ell_vect_mv.o psb_z_oacc_ell_inner_vect_sv.o \
+ psb_z_oacc_ell_scals.o \
+ psb_z_oacc_ell_scal.o psb_z_oacc_ell_allocate_mnnz.o \
+ psb_z_oacc_ell_reallocate_nz.o psb_z_oacc_ell_cp_from_coo.o \
+ psb_z_oacc_ell_cp_from_fmt.o psb_z_oacc_ell_mv_from_coo.o \
+ psb_z_oacc_ell_mv_from_fmt.o psb_z_oacc_ell_mold.o: $(UP)/psb_z_oacc_ell_mat_mod.o $(UP)/psb_z_oacc_vect_mod.o
+
+psb_z_oacc_hll_vect_mv.o psb_z_oacc_hll_inner_vect_sv.o \
+ psb_z_oacc_hll_scals.o \
+ psb_z_oacc_hll_scal.o psb_z_oacc_hll_allocate_mnnz.o \
+ psb_z_oacc_hll_reallocate_nz.o psb_z_oacc_hll_cp_from_coo.o \
+ psb_z_oacc_hll_cp_from_fmt.o psb_z_oacc_hll_mv_from_coo.o \
+ psb_z_oacc_hll_mv_from_fmt.o psb_z_oacc_hll_mold.o: $(UP)/psb_z_oacc_hll_mat_mod.o $(UP)/psb_z_oacc_vect_mod.o
-#psb_d_oacc_mlt_v_2.o psb_d_oacc_mlt_v.o: $(UP)/psb_d_oacc_vect_mod.o
+psb_s_oacc_mlt_v_2.o psb_s_oacc_mlt_v.o: $(UP)/psb_s_oacc_vect_mod.o
+psb_d_oacc_mlt_v_2.o psb_d_oacc_mlt_v.o: $(UP)/psb_d_oacc_vect_mod.o
+psb_c_oacc_mlt_v_2.o psb_c_oacc_mlt_v.o: $(UP)/psb_c_oacc_vect_mod.o
+psb_z_oacc_mlt_v_2.o psb_z_oacc_mlt_v.o: $(UP)/psb_z_oacc_vect_mod.o
clean:
diff --git a/openacc/impl/psb_c_oacc_csr_vect_mv.F90 b/openacc/impl/psb_c_oacc_csr_vect_mv.F90
index db56d9fc..3c6f6494 100644
--- a/openacc/impl/psb_c_oacc_csr_vect_mv.F90
+++ b/openacc/impl/psb_c_oacc_csr_vect_mv.F90
@@ -11,6 +11,8 @@ contains
character, optional, intent(in) :: trans
integer(psb_ipk_) :: m, n
+ character :: trans_
+ logical :: device_done, tra
info = psb_success_
m = a%get_nrows()
@@ -21,14 +23,35 @@ contains
info = psb_err_invalid_mat_state_
return
end if
+ device_done = .false.
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
- if (a%is_host()) call a%sync()
- if (x%is_host()) call x%sync()
- if (y%is_host()) call y%sync()
-
- call inner_spmv(m, n, alpha, a%val, a%ja, a%irp, x%v, beta, y%v, info)
- call y%set_dev()
-
+ if (.not.tra) then
+ select type(xx => x)
+ class is (psb_c_vect_oacc)
+ select type (yy => y)
+ class is (psb_c_vect_oacc)
+ if (a%is_host()) call a%sync()
+ if (xx%is_host()) call xx%sync()
+ if (yy%is_host()) call yy%sync()
+ call inner_spmv(m, n, alpha, a%val, a%ja, a%irp, x%v, beta, y%v, info)
+ call y%set_dev()
+ device_done = .true.
+ end select
+ end select
+ end if
+
+ if (.not.device_done) then
+ if (x%is_dev()) call x%sync()
+ if (y%is_dev()) call y%sync()
+ call a%psb_c_csr_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans)
+ call y%set_host()
+ end if
contains
subroutine inner_spmv(m, n, alpha, val, ja, irp, x, beta, y, info)
diff --git a/openacc/impl/psb_c_oacc_ell_vect_mv.F90 b/openacc/impl/psb_c_oacc_ell_vect_mv.F90
index f3e78d98..8113297b 100644
--- a/openacc/impl/psb_c_oacc_ell_vect_mv.F90
+++ b/openacc/impl/psb_c_oacc_ell_vect_mv.F90
@@ -11,6 +11,8 @@ contains
character, optional, intent(in) :: trans
integer(psb_ipk_) :: m, n, nzt, nc
+ character :: trans_
+ logical :: device_done, tra
info = psb_success_
m = a%get_nrows()
@@ -18,19 +20,40 @@ contains
nzt = a%nzt
nc = size(a%ja,2)
if ((n /= size(x%v)) .or. (m /= size(y%v))) then
- write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
+ write(0,*) 'oellmv Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_
return
end if
+ device_done = .false.
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
- if (a%is_host()) call a%sync()
- if (x%is_host()) call x%sync()
- if (y%is_host()) call y%sync()
-
- call inner_spmv(m, n, nc, alpha, a%val, a%ja, x%v, beta, y%v, info)
-
- call y%set_dev()
-
+ if (.not.tra) then
+ select type(xx => x)
+ class is (psb_c_vect_oacc)
+ select type (yy => y)
+ class is (psb_c_vect_oacc)
+ if (a%is_host()) call a%sync()
+ if (xx%is_host()) call xx%sync()
+ if (yy%is_host()) call yy%sync()
+ call inner_spmv(m, n, nc, alpha, a%val, a%ja, x%v, beta, y%v, info)
+ call y%set_dev()
+ device_done = .true.
+ end select
+ end select
+ end if
+
+ if (.not.device_done) then
+ if (x%is_dev()) call x%sync()
+ if (y%is_dev()) call y%sync()
+ call a%psb_c_ell_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans)
+ call y%set_host()
+ end if
+
contains
subroutine inner_spmv(m, n, nc, alpha, val, ja, x, beta, y, info)
diff --git a/openacc/impl/psb_c_oacc_hll_vect_mv.F90 b/openacc/impl/psb_c_oacc_hll_vect_mv.F90
index 68141e42..551b1a29 100644
--- a/openacc/impl/psb_c_oacc_hll_vect_mv.F90
+++ b/openacc/impl/psb_c_oacc_hll_vect_mv.F90
@@ -11,6 +11,8 @@ contains
character, optional, intent(in) :: trans
integer(psb_ipk_) :: m, n, nhacks, hksz
+ character :: trans_
+ logical :: device_done, tra
info = psb_success_
m = a%get_nrows()
@@ -23,14 +25,35 @@ contains
info = psb_err_invalid_mat_state_
return
end if
+ device_done = .false.
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
- if (a%is_host()) call a%sync()
- if (x%is_host()) call x%sync()
- if (y%is_host()) call y%sync()
-
- call inner_spmv(m, nhacks, hksz, alpha, a%val, a%ja, a%hkoffs, x%v, beta, y%v, info)
- call y%set_dev()
-
+ if (.not.tra) then
+ select type(xx => x)
+ class is (psb_c_vect_oacc)
+ select type (yy => y)
+ class is (psb_c_vect_oacc)
+ if (a%is_host()) call a%sync()
+ if (xx%is_host()) call xx%sync()
+ if (yy%is_host()) call yy%sync()
+ call inner_spmv(m, nhacks, hksz, alpha, a%val, a%ja, a%hkoffs, x%v, beta, y%v, info)
+ call y%set_dev()
+ device_done = .true.
+ end select
+ end select
+ end if
+
+ if (.not.device_done) then
+ if (x%is_dev()) call x%sync()
+ if (y%is_dev()) call y%sync()
+ call a%psb_c_hll_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans)
+ call y%set_host()
+ end if
contains
subroutine inner_spmv(m, nhacks, hksz, alpha, val, ja, hkoffs, x, beta, y, info)
diff --git a/openacc/impl/psb_c_oacc_mlt_v_2.f90 b/openacc/impl/psb_c_oacc_mlt_v_2.f90
index b47cd1ae..ed0fc88e 100644
--- a/openacc/impl/psb_c_oacc_mlt_v_2.f90
+++ b/openacc/impl/psb_c_oacc_mlt_v_2.f90
@@ -9,10 +9,11 @@ subroutine psb_c_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
integer(psb_ipk_), intent(out) :: info
character(len=1), intent(in), optional :: conjgx, conjgy
integer(psb_ipk_) :: i, n
- logical :: conjgx_, conjgy_
+ logical :: conjgx_, conjgy_, device_done
conjgx_ = .false.
conjgy_ = .false.
+ device_done = .false.
if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C')
if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C')
@@ -27,31 +28,10 @@ subroutine psb_c_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
if ((beta /= czero) .and. (z%is_host())) call z%sync()
call c_inner_oacc_mlt_v_2(n,alpha, xx%v, yy%v, beta, z%v, info, conjgx_, conjgy_)
call z%set_dev()
- class default
- if (xx%is_dev()) call xx%sync()
- if (yy%is_dev()) call yy%sync()
- if ((beta /= czero) .and. (z%is_dev())) call z%sync()
- !call c_inner_oacc_mlt_v_2(n,alpha, xx%v, yy%v, beta, z%v, info, conjgx_, conjgy_)
- if (conjgx_.and.conjgy_) then
- do i = 1, n
- z%v(i) = alpha * conjg(xx%v(i)) * conjg(yy%v(i)) + beta * z%v(i)
- end do
- else if (conjgx_.and.(.not.conjgy_)) then
- do i = 1, n
- z%v(i) = alpha * conjg(xx%v(i)) * (yy%v(i)) + beta * z%v(i)
- end do
- else if ((.not.conjgx_).and.(conjgy_)) then
- do i = 1, n
- z%v(i) = alpha * (xx%v(i)) * conjg(yy%v(i)) + beta * z%v(i)
- end do
- else
- do i = 1, n
- z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
- end do
- end if
- call z%set_host()
+ device_done = .true.
end select
- class default
+ end select
+ if (.not.device_done) then
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
if ((beta /= czero) .and. (z%is_dev())) call z%sync()
@@ -73,13 +53,14 @@ subroutine psb_c_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
end do
end if
call z%set_host()
- end select
+ end if
+
contains
subroutine c_inner_oacc_mlt_v_2(n,alpha, x, y, beta, z, info, conjgx, conjgy)
implicit none
integer(psb_ipk_), intent(in) :: n
-complex(psb_spk_), intent(in) :: alpha, beta
-complex(psb_spk_), intent(inout) :: x(:), y(:), z(:)
+ complex(psb_spk_), intent(in) :: alpha, beta
+ complex(psb_spk_), intent(inout) :: x(:), y(:), z(:)
integer(psb_ipk_), intent(out) :: info
logical, intent(in) :: conjgx, conjgy
diff --git a/openacc/impl/psb_d_oacc_csr_vect_mv.F90 b/openacc/impl/psb_d_oacc_csr_vect_mv.F90
index 0001cc76..596f2b17 100644
--- a/openacc/impl/psb_d_oacc_csr_vect_mv.F90
+++ b/openacc/impl/psb_d_oacc_csr_vect_mv.F90
@@ -11,6 +11,8 @@ contains
character, optional, intent(in) :: trans
integer(psb_ipk_) :: m, n
+ character :: trans_
+ logical :: device_done, tra
info = psb_success_
m = a%get_nrows()
@@ -21,14 +23,35 @@ contains
info = psb_err_invalid_mat_state_
return
end if
+ device_done = .false.
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
- if (a%is_host()) call a%sync()
- if (x%is_host()) call x%sync()
- if (y%is_host()) call y%sync()
-
- call inner_spmv(m, n, alpha, a%val, a%ja, a%irp, x%v, beta, y%v, info)
- call y%set_dev()
-
+ if (.not.tra) then
+ select type(xx => x)
+ class is (psb_d_vect_oacc)
+ select type (yy => y)
+ class is (psb_d_vect_oacc)
+ if (a%is_host()) call a%sync()
+ if (xx%is_host()) call xx%sync()
+ if (yy%is_host()) call yy%sync()
+ call inner_spmv(m, n, alpha, a%val, a%ja, a%irp, x%v, beta, y%v, info)
+ call y%set_dev()
+ device_done = .true.
+ end select
+ end select
+ end if
+
+ if (.not.device_done) then
+ if (x%is_dev()) call x%sync()
+ if (y%is_dev()) call y%sync()
+ call a%psb_d_csr_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans)
+ call y%set_host()
+ end if
contains
subroutine inner_spmv(m, n, alpha, val, ja, irp, x, beta, y, info)
diff --git a/openacc/impl/psb_d_oacc_ell_vect_mv.F90 b/openacc/impl/psb_d_oacc_ell_vect_mv.F90
index 4239d049..ddd4bfc8 100644
--- a/openacc/impl/psb_d_oacc_ell_vect_mv.F90
+++ b/openacc/impl/psb_d_oacc_ell_vect_mv.F90
@@ -11,6 +11,8 @@ contains
character, optional, intent(in) :: trans
integer(psb_ipk_) :: m, n, nzt, nc
+ character :: trans_
+ logical :: device_done, tra
info = psb_success_
m = a%get_nrows()
@@ -18,19 +20,40 @@ contains
nzt = a%nzt
nc = size(a%ja,2)
if ((n /= size(x%v)) .or. (m /= size(y%v))) then
- write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
+ write(0,*) 'oellmv Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_
return
end if
+ device_done = .false.
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
- if (a%is_host()) call a%sync()
- if (x%is_host()) call x%sync()
- if (y%is_host()) call y%sync()
-
- call inner_spmv(m, n, nc, alpha, a%val, a%ja, x%v, beta, y%v, info)
-
- call y%set_dev()
-
+ if (.not.tra) then
+ select type(xx => x)
+ class is (psb_d_vect_oacc)
+ select type (yy => y)
+ class is (psb_d_vect_oacc)
+ if (a%is_host()) call a%sync()
+ if (xx%is_host()) call xx%sync()
+ if (yy%is_host()) call yy%sync()
+ call inner_spmv(m, n, nc, alpha, a%val, a%ja, x%v, beta, y%v, info)
+ call y%set_dev()
+ device_done = .true.
+ end select
+ end select
+ end if
+
+ if (.not.device_done) then
+ if (x%is_dev()) call x%sync()
+ if (y%is_dev()) call y%sync()
+ call a%psb_d_ell_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans)
+ call y%set_host()
+ end if
+
contains
subroutine inner_spmv(m, n, nc, alpha, val, ja, x, beta, y, info)
diff --git a/openacc/impl/psb_d_oacc_hll_vect_mv.F90 b/openacc/impl/psb_d_oacc_hll_vect_mv.F90
index e7c47b7a..f971d61a 100644
--- a/openacc/impl/psb_d_oacc_hll_vect_mv.F90
+++ b/openacc/impl/psb_d_oacc_hll_vect_mv.F90
@@ -11,6 +11,8 @@ contains
character, optional, intent(in) :: trans
integer(psb_ipk_) :: m, n, nhacks, hksz
+ character :: trans_
+ logical :: device_done, tra
info = psb_success_
m = a%get_nrows()
@@ -23,14 +25,35 @@ contains
info = psb_err_invalid_mat_state_
return
end if
+ device_done = .false.
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
- if (a%is_host()) call a%sync()
- if (x%is_host()) call x%sync()
- if (y%is_host()) call y%sync()
-
- call inner_spmv(m, nhacks, hksz, alpha, a%val, a%ja, a%hkoffs, x%v, beta, y%v, info)
- call y%set_dev()
-
+ if (.not.tra) then
+ select type(xx => x)
+ class is (psb_d_vect_oacc)
+ select type (yy => y)
+ class is (psb_d_vect_oacc)
+ if (a%is_host()) call a%sync()
+ if (xx%is_host()) call xx%sync()
+ if (yy%is_host()) call yy%sync()
+ call inner_spmv(m, nhacks, hksz, alpha, a%val, a%ja, a%hkoffs, x%v, beta, y%v, info)
+ call y%set_dev()
+ device_done = .true.
+ end select
+ end select
+ end if
+
+ if (.not.device_done) then
+ if (x%is_dev()) call x%sync()
+ if (y%is_dev()) call y%sync()
+ call a%psb_d_hll_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans)
+ call y%set_host()
+ end if
contains
subroutine inner_spmv(m, nhacks, hksz, alpha, val, ja, hkoffs, x, beta, y, info)
diff --git a/openacc/impl/psb_d_oacc_mlt_v_2.f90 b/openacc/impl/psb_d_oacc_mlt_v_2.f90
index ce460924..7850329a 100644
--- a/openacc/impl/psb_d_oacc_mlt_v_2.f90
+++ b/openacc/impl/psb_d_oacc_mlt_v_2.f90
@@ -9,10 +9,11 @@ subroutine psb_d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
integer(psb_ipk_), intent(out) :: info
character(len=1), intent(in), optional :: conjgx, conjgy
integer(psb_ipk_) :: i, n
- logical :: conjgx_, conjgy_
+ logical :: conjgx_, conjgy_, device_done
conjgx_ = .false.
conjgy_ = .false.
+ device_done = .false.
if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C')
if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C')
@@ -27,31 +28,10 @@ subroutine psb_d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
if ((beta /= dzero) .and. (z%is_host())) call z%sync()
call d_inner_oacc_mlt_v_2(n,alpha, xx%v, yy%v, beta, z%v, info, conjgx_, conjgy_)
call z%set_dev()
- class default
- if (xx%is_dev()) call xx%sync()
- if (yy%is_dev()) call yy%sync()
- if ((beta /= dzero) .and. (z%is_dev())) call z%sync()
- !call d_inner_oacc_mlt_v_2(n,alpha, xx%v, yy%v, beta, z%v, info, conjgx_, conjgy_)
- if (conjgx_.and.conjgy_) then
- do i = 1, n
- z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
- end do
- else if (conjgx_.and.(.not.conjgy_)) then
- do i = 1, n
- z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
- end do
- else if ((.not.conjgx_).and.(conjgy_)) then
- do i = 1, n
- z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
- end do
- else
- do i = 1, n
- z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
- end do
- end if
- call z%set_host()
+ device_done = .true.
end select
- class default
+ end select
+ if (.not.device_done) then
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
if ((beta /= dzero) .and. (z%is_dev())) call z%sync()
@@ -73,13 +53,14 @@ subroutine psb_d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
end do
end if
call z%set_host()
- end select
+ end if
+
contains
subroutine d_inner_oacc_mlt_v_2(n,alpha, x, y, beta, z, info, conjgx, conjgy)
implicit none
integer(psb_ipk_), intent(in) :: n
-real(psb_dpk_), intent(in) :: alpha, beta
-real(psb_dpk_), intent(inout) :: x(:), y(:), z(:)
+ real(psb_dpk_), intent(in) :: alpha, beta
+ real(psb_dpk_), intent(inout) :: x(:), y(:), z(:)
integer(psb_ipk_), intent(out) :: info
logical, intent(in) :: conjgx, conjgy
diff --git a/openacc/impl/psb_s_oacc_csr_vect_mv.F90 b/openacc/impl/psb_s_oacc_csr_vect_mv.F90
index 13ee1651..2799bd05 100644
--- a/openacc/impl/psb_s_oacc_csr_vect_mv.F90
+++ b/openacc/impl/psb_s_oacc_csr_vect_mv.F90
@@ -11,6 +11,8 @@ contains
character, optional, intent(in) :: trans
integer(psb_ipk_) :: m, n
+ character :: trans_
+ logical :: device_done, tra
info = psb_success_
m = a%get_nrows()
@@ -21,14 +23,35 @@ contains
info = psb_err_invalid_mat_state_
return
end if
+ device_done = .false.
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
- if (a%is_host()) call a%sync()
- if (x%is_host()) call x%sync()
- if (y%is_host()) call y%sync()
-
- call inner_spmv(m, n, alpha, a%val, a%ja, a%irp, x%v, beta, y%v, info)
- call y%set_dev()
-
+ if (.not.tra) then
+ select type(xx => x)
+ class is (psb_s_vect_oacc)
+ select type (yy => y)
+ class is (psb_s_vect_oacc)
+ if (a%is_host()) call a%sync()
+ if (xx%is_host()) call xx%sync()
+ if (yy%is_host()) call yy%sync()
+ call inner_spmv(m, n, alpha, a%val, a%ja, a%irp, x%v, beta, y%v, info)
+ call y%set_dev()
+ device_done = .true.
+ end select
+ end select
+ end if
+
+ if (.not.device_done) then
+ if (x%is_dev()) call x%sync()
+ if (y%is_dev()) call y%sync()
+ call a%psb_s_csr_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans)
+ call y%set_host()
+ end if
contains
subroutine inner_spmv(m, n, alpha, val, ja, irp, x, beta, y, info)
diff --git a/openacc/impl/psb_s_oacc_ell_vect_mv.F90 b/openacc/impl/psb_s_oacc_ell_vect_mv.F90
index bbbdd6a3..81166643 100644
--- a/openacc/impl/psb_s_oacc_ell_vect_mv.F90
+++ b/openacc/impl/psb_s_oacc_ell_vect_mv.F90
@@ -11,6 +11,8 @@ contains
character, optional, intent(in) :: trans
integer(psb_ipk_) :: m, n, nzt, nc
+ character :: trans_
+ logical :: device_done, tra
info = psb_success_
m = a%get_nrows()
@@ -18,19 +20,40 @@ contains
nzt = a%nzt
nc = size(a%ja,2)
if ((n /= size(x%v)) .or. (m /= size(y%v))) then
- write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
+ write(0,*) 'oellmv Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_
return
end if
+ device_done = .false.
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
- if (a%is_host()) call a%sync()
- if (x%is_host()) call x%sync()
- if (y%is_host()) call y%sync()
-
- call inner_spmv(m, n, nc, alpha, a%val, a%ja, x%v, beta, y%v, info)
-
- call y%set_dev()
-
+ if (.not.tra) then
+ select type(xx => x)
+ class is (psb_s_vect_oacc)
+ select type (yy => y)
+ class is (psb_s_vect_oacc)
+ if (a%is_host()) call a%sync()
+ if (xx%is_host()) call xx%sync()
+ if (yy%is_host()) call yy%sync()
+ call inner_spmv(m, n, nc, alpha, a%val, a%ja, x%v, beta, y%v, info)
+ call y%set_dev()
+ device_done = .true.
+ end select
+ end select
+ end if
+
+ if (.not.device_done) then
+ if (x%is_dev()) call x%sync()
+ if (y%is_dev()) call y%sync()
+ call a%psb_s_ell_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans)
+ call y%set_host()
+ end if
+
contains
subroutine inner_spmv(m, n, nc, alpha, val, ja, x, beta, y, info)
diff --git a/openacc/impl/psb_s_oacc_hll_vect_mv.F90 b/openacc/impl/psb_s_oacc_hll_vect_mv.F90
index efe9a9ca..e289f07c 100644
--- a/openacc/impl/psb_s_oacc_hll_vect_mv.F90
+++ b/openacc/impl/psb_s_oacc_hll_vect_mv.F90
@@ -11,6 +11,8 @@ contains
character, optional, intent(in) :: trans
integer(psb_ipk_) :: m, n, nhacks, hksz
+ character :: trans_
+ logical :: device_done, tra
info = psb_success_
m = a%get_nrows()
@@ -23,14 +25,35 @@ contains
info = psb_err_invalid_mat_state_
return
end if
+ device_done = .false.
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
- if (a%is_host()) call a%sync()
- if (x%is_host()) call x%sync()
- if (y%is_host()) call y%sync()
-
- call inner_spmv(m, nhacks, hksz, alpha, a%val, a%ja, a%hkoffs, x%v, beta, y%v, info)
- call y%set_dev()
-
+ if (.not.tra) then
+ select type(xx => x)
+ class is (psb_s_vect_oacc)
+ select type (yy => y)
+ class is (psb_s_vect_oacc)
+ if (a%is_host()) call a%sync()
+ if (xx%is_host()) call xx%sync()
+ if (yy%is_host()) call yy%sync()
+ call inner_spmv(m, nhacks, hksz, alpha, a%val, a%ja, a%hkoffs, x%v, beta, y%v, info)
+ call y%set_dev()
+ device_done = .true.
+ end select
+ end select
+ end if
+
+ if (.not.device_done) then
+ if (x%is_dev()) call x%sync()
+ if (y%is_dev()) call y%sync()
+ call a%psb_s_hll_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans)
+ call y%set_host()
+ end if
contains
subroutine inner_spmv(m, nhacks, hksz, alpha, val, ja, hkoffs, x, beta, y, info)
diff --git a/openacc/impl/psb_s_oacc_mlt_v_2.f90 b/openacc/impl/psb_s_oacc_mlt_v_2.f90
index 2ce7fe53..b97f2f08 100644
--- a/openacc/impl/psb_s_oacc_mlt_v_2.f90
+++ b/openacc/impl/psb_s_oacc_mlt_v_2.f90
@@ -9,10 +9,11 @@ subroutine psb_s_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
integer(psb_ipk_), intent(out) :: info
character(len=1), intent(in), optional :: conjgx, conjgy
integer(psb_ipk_) :: i, n
- logical :: conjgx_, conjgy_
+ logical :: conjgx_, conjgy_, device_done
conjgx_ = .false.
conjgy_ = .false.
+ device_done = .false.
if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C')
if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C')
@@ -27,31 +28,10 @@ subroutine psb_s_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
if ((beta /= szero) .and. (z%is_host())) call z%sync()
call s_inner_oacc_mlt_v_2(n,alpha, xx%v, yy%v, beta, z%v, info, conjgx_, conjgy_)
call z%set_dev()
- class default
- if (xx%is_dev()) call xx%sync()
- if (yy%is_dev()) call yy%sync()
- if ((beta /= szero) .and. (z%is_dev())) call z%sync()
- !call s_inner_oacc_mlt_v_2(n,alpha, xx%v, yy%v, beta, z%v, info, conjgx_, conjgy_)
- if (conjgx_.and.conjgy_) then
- do i = 1, n
- z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
- end do
- else if (conjgx_.and.(.not.conjgy_)) then
- do i = 1, n
- z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
- end do
- else if ((.not.conjgx_).and.(conjgy_)) then
- do i = 1, n
- z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
- end do
- else
- do i = 1, n
- z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
- end do
- end if
- call z%set_host()
+ device_done = .true.
end select
- class default
+ end select
+ if (.not.device_done) then
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
if ((beta /= szero) .and. (z%is_dev())) call z%sync()
@@ -73,13 +53,14 @@ subroutine psb_s_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
end do
end if
call z%set_host()
- end select
+ end if
+
contains
subroutine s_inner_oacc_mlt_v_2(n,alpha, x, y, beta, z, info, conjgx, conjgy)
implicit none
integer(psb_ipk_), intent(in) :: n
-real(psb_spk_), intent(in) :: alpha, beta
-real(psb_spk_), intent(inout) :: x(:), y(:), z(:)
+ real(psb_spk_), intent(in) :: alpha, beta
+ real(psb_spk_), intent(inout) :: x(:), y(:), z(:)
integer(psb_ipk_), intent(out) :: info
logical, intent(in) :: conjgx, conjgy
diff --git a/openacc/impl/psb_z_oacc_csr_vect_mv.F90 b/openacc/impl/psb_z_oacc_csr_vect_mv.F90
index cb34dce1..75cc693b 100644
--- a/openacc/impl/psb_z_oacc_csr_vect_mv.F90
+++ b/openacc/impl/psb_z_oacc_csr_vect_mv.F90
@@ -11,6 +11,8 @@ contains
character, optional, intent(in) :: trans
integer(psb_ipk_) :: m, n
+ character :: trans_
+ logical :: device_done, tra
info = psb_success_
m = a%get_nrows()
@@ -21,14 +23,35 @@ contains
info = psb_err_invalid_mat_state_
return
end if
+ device_done = .false.
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
- if (a%is_host()) call a%sync()
- if (x%is_host()) call x%sync()
- if (y%is_host()) call y%sync()
-
- call inner_spmv(m, n, alpha, a%val, a%ja, a%irp, x%v, beta, y%v, info)
- call y%set_dev()
-
+ if (.not.tra) then
+ select type(xx => x)
+ class is (psb_z_vect_oacc)
+ select type (yy => y)
+ class is (psb_z_vect_oacc)
+ if (a%is_host()) call a%sync()
+ if (xx%is_host()) call xx%sync()
+ if (yy%is_host()) call yy%sync()
+ call inner_spmv(m, n, alpha, a%val, a%ja, a%irp, x%v, beta, y%v, info)
+ call y%set_dev()
+ device_done = .true.
+ end select
+ end select
+ end if
+
+ if (.not.device_done) then
+ if (x%is_dev()) call x%sync()
+ if (y%is_dev()) call y%sync()
+ call a%psb_z_csr_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans)
+ call y%set_host()
+ end if
contains
subroutine inner_spmv(m, n, alpha, val, ja, irp, x, beta, y, info)
diff --git a/openacc/impl/psb_z_oacc_ell_vect_mv.F90 b/openacc/impl/psb_z_oacc_ell_vect_mv.F90
index ad8460b8..8d442c1d 100644
--- a/openacc/impl/psb_z_oacc_ell_vect_mv.F90
+++ b/openacc/impl/psb_z_oacc_ell_vect_mv.F90
@@ -11,6 +11,8 @@ contains
character, optional, intent(in) :: trans
integer(psb_ipk_) :: m, n, nzt, nc
+ character :: trans_
+ logical :: device_done, tra
info = psb_success_
m = a%get_nrows()
@@ -18,19 +20,40 @@ contains
nzt = a%nzt
nc = size(a%ja,2)
if ((n /= size(x%v)) .or. (m /= size(y%v))) then
- write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
+ write(0,*) 'oellmv Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_
return
end if
+ device_done = .false.
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
- if (a%is_host()) call a%sync()
- if (x%is_host()) call x%sync()
- if (y%is_host()) call y%sync()
-
- call inner_spmv(m, n, nc, alpha, a%val, a%ja, x%v, beta, y%v, info)
-
- call y%set_dev()
-
+ if (.not.tra) then
+ select type(xx => x)
+ class is (psb_z_vect_oacc)
+ select type (yy => y)
+ class is (psb_z_vect_oacc)
+ if (a%is_host()) call a%sync()
+ if (xx%is_host()) call xx%sync()
+ if (yy%is_host()) call yy%sync()
+ call inner_spmv(m, n, nc, alpha, a%val, a%ja, x%v, beta, y%v, info)
+ call y%set_dev()
+ device_done = .true.
+ end select
+ end select
+ end if
+
+ if (.not.device_done) then
+ if (x%is_dev()) call x%sync()
+ if (y%is_dev()) call y%sync()
+ call a%psb_z_ell_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans)
+ call y%set_host()
+ end if
+
contains
subroutine inner_spmv(m, n, nc, alpha, val, ja, x, beta, y, info)
diff --git a/openacc/impl/psb_z_oacc_hll_vect_mv.F90 b/openacc/impl/psb_z_oacc_hll_vect_mv.F90
index dbadf034..e373d6ff 100644
--- a/openacc/impl/psb_z_oacc_hll_vect_mv.F90
+++ b/openacc/impl/psb_z_oacc_hll_vect_mv.F90
@@ -11,6 +11,8 @@ contains
character, optional, intent(in) :: trans
integer(psb_ipk_) :: m, n, nhacks, hksz
+ character :: trans_
+ logical :: device_done, tra
info = psb_success_
m = a%get_nrows()
@@ -23,14 +25,35 @@ contains
info = psb_err_invalid_mat_state_
return
end if
+ device_done = .false.
+ if (present(trans)) then
+ trans_ = trans
+ else
+ trans_ = 'N'
+ end if
+ tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
- if (a%is_host()) call a%sync()
- if (x%is_host()) call x%sync()
- if (y%is_host()) call y%sync()
-
- call inner_spmv(m, nhacks, hksz, alpha, a%val, a%ja, a%hkoffs, x%v, beta, y%v, info)
- call y%set_dev()
-
+ if (.not.tra) then
+ select type(xx => x)
+ class is (psb_z_vect_oacc)
+ select type (yy => y)
+ class is (psb_z_vect_oacc)
+ if (a%is_host()) call a%sync()
+ if (xx%is_host()) call xx%sync()
+ if (yy%is_host()) call yy%sync()
+ call inner_spmv(m, nhacks, hksz, alpha, a%val, a%ja, a%hkoffs, x%v, beta, y%v, info)
+ call y%set_dev()
+ device_done = .true.
+ end select
+ end select
+ end if
+
+ if (.not.device_done) then
+ if (x%is_dev()) call x%sync()
+ if (y%is_dev()) call y%sync()
+ call a%psb_z_hll_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans)
+ call y%set_host()
+ end if
contains
subroutine inner_spmv(m, nhacks, hksz, alpha, val, ja, hkoffs, x, beta, y, info)
diff --git a/openacc/impl/psb_z_oacc_mlt_v_2.f90 b/openacc/impl/psb_z_oacc_mlt_v_2.f90
index f69d863c..c13b348c 100644
--- a/openacc/impl/psb_z_oacc_mlt_v_2.f90
+++ b/openacc/impl/psb_z_oacc_mlt_v_2.f90
@@ -9,10 +9,11 @@ subroutine psb_z_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
integer(psb_ipk_), intent(out) :: info
character(len=1), intent(in), optional :: conjgx, conjgy
integer(psb_ipk_) :: i, n
- logical :: conjgx_, conjgy_
+ logical :: conjgx_, conjgy_, device_done
conjgx_ = .false.
conjgy_ = .false.
+ device_done = .false.
if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C')
if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C')
@@ -27,31 +28,10 @@ subroutine psb_z_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
if ((beta /= zzero) .and. (z%is_host())) call z%sync()
call z_inner_oacc_mlt_v_2(n,alpha, xx%v, yy%v, beta, z%v, info, conjgx_, conjgy_)
call z%set_dev()
- class default
- if (xx%is_dev()) call xx%sync()
- if (yy%is_dev()) call yy%sync()
- if ((beta /= zzero) .and. (z%is_dev())) call z%sync()
- !call z_inner_oacc_mlt_v_2(n,alpha, xx%v, yy%v, beta, z%v, info, conjgx_, conjgy_)
- if (conjgx_.and.conjgy_) then
- do i = 1, n
- z%v(i) = alpha * conjg(xx%v(i)) * conjg(yy%v(i)) + beta * z%v(i)
- end do
- else if (conjgx_.and.(.not.conjgy_)) then
- do i = 1, n
- z%v(i) = alpha * conjg(xx%v(i)) * (yy%v(i)) + beta * z%v(i)
- end do
- else if ((.not.conjgx_).and.(conjgy_)) then
- do i = 1, n
- z%v(i) = alpha * (xx%v(i)) * conjg(yy%v(i)) + beta * z%v(i)
- end do
- else
- do i = 1, n
- z%v(i) = alpha * (xx%v(i)) * (yy%v(i)) + beta * z%v(i)
- end do
- end if
- call z%set_host()
+ device_done = .true.
end select
- class default
+ end select
+ if (.not.device_done) then
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
if ((beta /= zzero) .and. (z%is_dev())) call z%sync()
@@ -73,13 +53,14 @@ subroutine psb_z_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
end do
end if
call z%set_host()
- end select
+ end if
+
contains
subroutine z_inner_oacc_mlt_v_2(n,alpha, x, y, beta, z, info, conjgx, conjgy)
implicit none
integer(psb_ipk_), intent(in) :: n
-complex(psb_dpk_), intent(in) :: alpha, beta
-complex(psb_dpk_), intent(inout) :: x(:), y(:), z(:)
+ complex(psb_dpk_), intent(in) :: alpha, beta
+ complex(psb_dpk_), intent(inout) :: x(:), y(:), z(:)
integer(psb_ipk_), intent(out) :: info
logical, intent(in) :: conjgx, conjgy
diff --git a/openacc/psb_c_oacc_csr_mat_mod.F90 b/openacc/psb_c_oacc_csr_mat_mod.F90
index a1f6e2c3..59794335 100644
--- a/openacc/psb_c_oacc_csr_mat_mod.F90
+++ b/openacc/psb_c_oacc_csr_mat_mod.F90
@@ -4,7 +4,6 @@ module psb_c_oacc_csr_mat_mod
use openacc
use psb_c_mat_mod
use psb_c_oacc_vect_mod
- !use oaccsparse_mod
integer(psb_ipk_), parameter, private :: is_host = -1
integer(psb_ipk_), parameter, private :: is_sync = 0
@@ -17,8 +16,6 @@ module psb_c_oacc_csr_mat_mod
procedure, pass(a) :: sizeof => c_oacc_csr_sizeof
procedure, pass(a) :: vect_mv => psb_c_oacc_csr_vect_mv
procedure, pass(a) :: in_vect_sv => psb_c_oacc_csr_inner_vect_sv
- procedure, pass(a) :: csmm => psb_c_oacc_csr_csmm
- procedure, pass(a) :: csmv => psb_c_oacc_csr_csmv
procedure, pass(a) :: scals => psb_c_oacc_csr_scals
procedure, pass(a) :: scalv => psb_c_oacc_csr_scal
procedure, pass(a) :: reallocate_nz => psb_c_oacc_csr_reallocate_nz
@@ -93,26 +90,6 @@ module psb_c_oacc_csr_mat_mod
end subroutine psb_c_oacc_csr_inner_vect_sv
end interface
- interface
- module subroutine psb_c_oacc_csr_csmm(alpha, a, x, beta, y, info, trans)
- class(psb_c_oacc_csr_sparse_mat), intent(in) :: a
- complex(psb_spk_), intent(in) :: alpha, beta, x(:,:)
- complex(psb_spk_), intent(inout) :: y(:,:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
- end subroutine psb_c_oacc_csr_csmm
- end interface
-
- interface
- module subroutine psb_c_oacc_csr_csmv(alpha, a, x, beta, y, info, trans)
- class(psb_c_oacc_csr_sparse_mat), intent(in) :: a
- complex(psb_spk_), intent(in) :: alpha, beta, x(:)
- complex(psb_spk_), intent(inout) :: y(:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
- end subroutine psb_c_oacc_csr_csmv
- end interface
-
interface
module subroutine psb_c_oacc_csr_scals(d, a, info)
class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
diff --git a/openacc/psb_c_oacc_ell_mat_mod.F90 b/openacc/psb_c_oacc_ell_mat_mod.F90
index 996d4628..f0559d26 100644
--- a/openacc/psb_c_oacc_ell_mat_mod.F90
+++ b/openacc/psb_c_oacc_ell_mat_mod.F90
@@ -26,8 +26,6 @@ module psb_c_oacc_ell_mat_mod
procedure, pass(a) :: free => c_oacc_ell_free
procedure, pass(a) :: vect_mv => psb_c_oacc_ell_vect_mv
procedure, pass(a) :: in_vect_sv => psb_c_oacc_ell_inner_vect_sv
- procedure, pass(a) :: csmm => psb_c_oacc_ell_csmm
- procedure, pass(a) :: csmv => psb_c_oacc_ell_csmv
procedure, pass(a) :: scals => psb_c_oacc_ell_scals
procedure, pass(a) :: scalv => psb_c_oacc_ell_scal
procedure, pass(a) :: reallocate_nz => psb_c_oacc_ell_reallocate_nz
@@ -92,26 +90,6 @@ module psb_c_oacc_ell_mat_mod
end subroutine psb_c_oacc_ell_inner_vect_sv
end interface
- interface
- module subroutine psb_c_oacc_ell_csmm(alpha, a, x, beta, y, info, trans)
- class(psb_c_oacc_ell_sparse_mat), intent(in) :: a
- complex(psb_spk_), intent(in) :: alpha, beta, x(:,:)
- complex(psb_spk_), intent(inout) :: y(:,:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
- end subroutine psb_c_oacc_ell_csmm
- end interface
-
- interface
- module subroutine psb_c_oacc_ell_csmv(alpha, a, x, beta, y, info, trans)
- class(psb_c_oacc_ell_sparse_mat), intent(in) :: a
- complex(psb_spk_), intent(in) :: alpha, beta, x(:)
- complex(psb_spk_), intent(inout) :: y(:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
- end subroutine psb_c_oacc_ell_csmv
- end interface
-
interface
module subroutine psb_c_oacc_ell_scals(d, a, info)
class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
diff --git a/openacc/psb_c_oacc_hll_mat_mod.F90 b/openacc/psb_c_oacc_hll_mat_mod.F90
index ebcbf29e..93436224 100644
--- a/openacc/psb_c_oacc_hll_mat_mod.F90
+++ b/openacc/psb_c_oacc_hll_mat_mod.F90
@@ -26,8 +26,6 @@ module psb_c_oacc_hll_mat_mod
procedure, pass(a) :: free => c_oacc_hll_free
procedure, pass(a) :: vect_mv => psb_c_oacc_hll_vect_mv
procedure, pass(a) :: in_vect_sv => psb_c_oacc_hll_inner_vect_sv
- procedure, pass(a) :: csmm => psb_c_oacc_hll_csmm
- procedure, pass(a) :: csmv => psb_c_oacc_hll_csmv
procedure, pass(a) :: scals => psb_c_oacc_hll_scals
procedure, pass(a) :: scalv => psb_c_oacc_hll_scal
procedure, pass(a) :: reallocate_nz => psb_c_oacc_hll_reallocate_nz
@@ -92,26 +90,6 @@ module psb_c_oacc_hll_mat_mod
end subroutine psb_c_oacc_hll_inner_vect_sv
end interface
- interface
- module subroutine psb_c_oacc_hll_csmm(alpha, a, x, beta, y, info, trans)
- class(psb_c_oacc_hll_sparse_mat), intent(in) :: a
- complex(psb_spk_), intent(in) :: alpha, beta, x(:,:)
- complex(psb_spk_), intent(inout) :: y(:,:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
- end subroutine psb_c_oacc_hll_csmm
- end interface
-
- interface
- module subroutine psb_c_oacc_hll_csmv(alpha, a, x, beta, y, info, trans)
- class(psb_c_oacc_hll_sparse_mat), intent(in) :: a
- complex(psb_spk_), intent(in) :: alpha, beta, x(:)
- complex(psb_spk_), intent(inout) :: y(:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
- end subroutine psb_c_oacc_hll_csmv
- end interface
-
interface
module subroutine psb_c_oacc_hll_scals(d, a, info)
class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
diff --git a/openacc/psb_d_oacc_csr_mat_mod.F90 b/openacc/psb_d_oacc_csr_mat_mod.F90
index a9d72eae..08b75575 100644
--- a/openacc/psb_d_oacc_csr_mat_mod.F90
+++ b/openacc/psb_d_oacc_csr_mat_mod.F90
@@ -4,7 +4,6 @@ module psb_d_oacc_csr_mat_mod
use openacc
use psb_d_mat_mod
use psb_d_oacc_vect_mod
- !use oaccsparse_mod
integer(psb_ipk_), parameter, private :: is_host = -1
integer(psb_ipk_), parameter, private :: is_sync = 0
@@ -17,8 +16,6 @@ module psb_d_oacc_csr_mat_mod
procedure, pass(a) :: sizeof => d_oacc_csr_sizeof
procedure, pass(a) :: vect_mv => psb_d_oacc_csr_vect_mv
procedure, pass(a) :: in_vect_sv => psb_d_oacc_csr_inner_vect_sv
- procedure, pass(a) :: csmm => psb_d_oacc_csr_csmm
- procedure, pass(a) :: csmv => psb_d_oacc_csr_csmv
procedure, pass(a) :: scals => psb_d_oacc_csr_scals
procedure, pass(a) :: scalv => psb_d_oacc_csr_scal
procedure, pass(a) :: reallocate_nz => psb_d_oacc_csr_reallocate_nz
@@ -93,26 +90,6 @@ module psb_d_oacc_csr_mat_mod
end subroutine psb_d_oacc_csr_inner_vect_sv
end interface
- interface
- module subroutine psb_d_oacc_csr_csmm(alpha, a, x, beta, y, info, trans)
- class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
- real(psb_dpk_), intent(in) :: alpha, beta, x(:,:)
- real(psb_dpk_), intent(inout) :: y(:,:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
- end subroutine psb_d_oacc_csr_csmm
- end interface
-
- interface
- module subroutine psb_d_oacc_csr_csmv(alpha, a, x, beta, y, info, trans)
- class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
- real(psb_dpk_), intent(in) :: alpha, beta, x(:)
- real(psb_dpk_), intent(inout) :: y(:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
- end subroutine psb_d_oacc_csr_csmv
- end interface
-
interface
module subroutine psb_d_oacc_csr_scals(d, a, info)
class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
diff --git a/openacc/psb_d_oacc_ell_mat_mod.F90 b/openacc/psb_d_oacc_ell_mat_mod.F90
index 1409a1d6..3e25f576 100644
--- a/openacc/psb_d_oacc_ell_mat_mod.F90
+++ b/openacc/psb_d_oacc_ell_mat_mod.F90
@@ -26,8 +26,6 @@ module psb_d_oacc_ell_mat_mod
procedure, pass(a) :: free => d_oacc_ell_free
procedure, pass(a) :: vect_mv => psb_d_oacc_ell_vect_mv
procedure, pass(a) :: in_vect_sv => psb_d_oacc_ell_inner_vect_sv
- procedure, pass(a) :: csmm => psb_d_oacc_ell_csmm
- procedure, pass(a) :: csmv => psb_d_oacc_ell_csmv
procedure, pass(a) :: scals => psb_d_oacc_ell_scals
procedure, pass(a) :: scalv => psb_d_oacc_ell_scal
procedure, pass(a) :: reallocate_nz => psb_d_oacc_ell_reallocate_nz
@@ -92,26 +90,6 @@ module psb_d_oacc_ell_mat_mod
end subroutine psb_d_oacc_ell_inner_vect_sv
end interface
- interface
- module subroutine psb_d_oacc_ell_csmm(alpha, a, x, beta, y, info, trans)
- class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
- real(psb_dpk_), intent(in) :: alpha, beta, x(:,:)
- real(psb_dpk_), intent(inout) :: y(:,:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
- end subroutine psb_d_oacc_ell_csmm
- end interface
-
- interface
- module subroutine psb_d_oacc_ell_csmv(alpha, a, x, beta, y, info, trans)
- class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
- real(psb_dpk_), intent(in) :: alpha, beta, x(:)
- real(psb_dpk_), intent(inout) :: y(:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
- end subroutine psb_d_oacc_ell_csmv
- end interface
-
interface
module subroutine psb_d_oacc_ell_scals(d, a, info)
class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
diff --git a/openacc/psb_d_oacc_hll_mat_mod.F90 b/openacc/psb_d_oacc_hll_mat_mod.F90
index 15ae055e..084987e5 100644
--- a/openacc/psb_d_oacc_hll_mat_mod.F90
+++ b/openacc/psb_d_oacc_hll_mat_mod.F90
@@ -26,8 +26,6 @@ module psb_d_oacc_hll_mat_mod
procedure, pass(a) :: free => d_oacc_hll_free
procedure, pass(a) :: vect_mv => psb_d_oacc_hll_vect_mv
procedure, pass(a) :: in_vect_sv => psb_d_oacc_hll_inner_vect_sv
- procedure, pass(a) :: csmm => psb_d_oacc_hll_csmm
- procedure, pass(a) :: csmv => psb_d_oacc_hll_csmv
procedure, pass(a) :: scals => psb_d_oacc_hll_scals
procedure, pass(a) :: scalv => psb_d_oacc_hll_scal
procedure, pass(a) :: reallocate_nz => psb_d_oacc_hll_reallocate_nz
@@ -92,26 +90,6 @@ module psb_d_oacc_hll_mat_mod
end subroutine psb_d_oacc_hll_inner_vect_sv
end interface
- interface
- module subroutine psb_d_oacc_hll_csmm(alpha, a, x, beta, y, info, trans)
- class(psb_d_oacc_hll_sparse_mat), intent(in) :: a
- real(psb_dpk_), intent(in) :: alpha, beta, x(:,:)
- real(psb_dpk_), intent(inout) :: y(:,:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
- end subroutine psb_d_oacc_hll_csmm
- end interface
-
- interface
- module subroutine psb_d_oacc_hll_csmv(alpha, a, x, beta, y, info, trans)
- class(psb_d_oacc_hll_sparse_mat), intent(in) :: a
- real(psb_dpk_), intent(in) :: alpha, beta, x(:)
- real(psb_dpk_), intent(inout) :: y(:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
- end subroutine psb_d_oacc_hll_csmv
- end interface
-
interface
module subroutine psb_d_oacc_hll_scals(d, a, info)
class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
diff --git a/openacc/psb_s_oacc_csr_mat_mod.F90 b/openacc/psb_s_oacc_csr_mat_mod.F90
index 64a5f9a1..16668d25 100644
--- a/openacc/psb_s_oacc_csr_mat_mod.F90
+++ b/openacc/psb_s_oacc_csr_mat_mod.F90
@@ -4,7 +4,6 @@ module psb_s_oacc_csr_mat_mod
use openacc
use psb_s_mat_mod
use psb_s_oacc_vect_mod
- !use oaccsparse_mod
integer(psb_ipk_), parameter, private :: is_host = -1
integer(psb_ipk_), parameter, private :: is_sync = 0
@@ -17,8 +16,6 @@ module psb_s_oacc_csr_mat_mod
procedure, pass(a) :: sizeof => s_oacc_csr_sizeof
procedure, pass(a) :: vect_mv => psb_s_oacc_csr_vect_mv
procedure, pass(a) :: in_vect_sv => psb_s_oacc_csr_inner_vect_sv
- procedure, pass(a) :: csmm => psb_s_oacc_csr_csmm
- procedure, pass(a) :: csmv => psb_s_oacc_csr_csmv
procedure, pass(a) :: scals => psb_s_oacc_csr_scals
procedure, pass(a) :: scalv => psb_s_oacc_csr_scal
procedure, pass(a) :: reallocate_nz => psb_s_oacc_csr_reallocate_nz
@@ -93,26 +90,6 @@ module psb_s_oacc_csr_mat_mod
end subroutine psb_s_oacc_csr_inner_vect_sv
end interface
- interface
- module subroutine psb_s_oacc_csr_csmm(alpha, a, x, beta, y, info, trans)
- class(psb_s_oacc_csr_sparse_mat), intent(in) :: a
- real(psb_spk_), intent(in) :: alpha, beta, x(:,:)
- real(psb_spk_), intent(inout) :: y(:,:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
- end subroutine psb_s_oacc_csr_csmm
- end interface
-
- interface
- module subroutine psb_s_oacc_csr_csmv(alpha, a, x, beta, y, info, trans)
- class(psb_s_oacc_csr_sparse_mat), intent(in) :: a
- real(psb_spk_), intent(in) :: alpha, beta, x(:)
- real(psb_spk_), intent(inout) :: y(:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
- end subroutine psb_s_oacc_csr_csmv
- end interface
-
interface
module subroutine psb_s_oacc_csr_scals(d, a, info)
class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
diff --git a/openacc/psb_s_oacc_ell_mat_mod.F90 b/openacc/psb_s_oacc_ell_mat_mod.F90
index 2893c3e5..dcfc1850 100644
--- a/openacc/psb_s_oacc_ell_mat_mod.F90
+++ b/openacc/psb_s_oacc_ell_mat_mod.F90
@@ -26,8 +26,6 @@ module psb_s_oacc_ell_mat_mod
procedure, pass(a) :: free => s_oacc_ell_free
procedure, pass(a) :: vect_mv => psb_s_oacc_ell_vect_mv
procedure, pass(a) :: in_vect_sv => psb_s_oacc_ell_inner_vect_sv
- procedure, pass(a) :: csmm => psb_s_oacc_ell_csmm
- procedure, pass(a) :: csmv => psb_s_oacc_ell_csmv
procedure, pass(a) :: scals => psb_s_oacc_ell_scals
procedure, pass(a) :: scalv => psb_s_oacc_ell_scal
procedure, pass(a) :: reallocate_nz => psb_s_oacc_ell_reallocate_nz
@@ -92,26 +90,6 @@ module psb_s_oacc_ell_mat_mod
end subroutine psb_s_oacc_ell_inner_vect_sv
end interface
- interface
- module subroutine psb_s_oacc_ell_csmm(alpha, a, x, beta, y, info, trans)
- class(psb_s_oacc_ell_sparse_mat), intent(in) :: a
- real(psb_spk_), intent(in) :: alpha, beta, x(:,:)
- real(psb_spk_), intent(inout) :: y(:,:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
- end subroutine psb_s_oacc_ell_csmm
- end interface
-
- interface
- module subroutine psb_s_oacc_ell_csmv(alpha, a, x, beta, y, info, trans)
- class(psb_s_oacc_ell_sparse_mat), intent(in) :: a
- real(psb_spk_), intent(in) :: alpha, beta, x(:)
- real(psb_spk_), intent(inout) :: y(:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
- end subroutine psb_s_oacc_ell_csmv
- end interface
-
interface
module subroutine psb_s_oacc_ell_scals(d, a, info)
class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
diff --git a/openacc/psb_s_oacc_hll_mat_mod.F90 b/openacc/psb_s_oacc_hll_mat_mod.F90
index 583a281e..0e7362f2 100644
--- a/openacc/psb_s_oacc_hll_mat_mod.F90
+++ b/openacc/psb_s_oacc_hll_mat_mod.F90
@@ -26,8 +26,6 @@ module psb_s_oacc_hll_mat_mod
procedure, pass(a) :: free => s_oacc_hll_free
procedure, pass(a) :: vect_mv => psb_s_oacc_hll_vect_mv
procedure, pass(a) :: in_vect_sv => psb_s_oacc_hll_inner_vect_sv
- procedure, pass(a) :: csmm => psb_s_oacc_hll_csmm
- procedure, pass(a) :: csmv => psb_s_oacc_hll_csmv
procedure, pass(a) :: scals => psb_s_oacc_hll_scals
procedure, pass(a) :: scalv => psb_s_oacc_hll_scal
procedure, pass(a) :: reallocate_nz => psb_s_oacc_hll_reallocate_nz
@@ -92,26 +90,6 @@ module psb_s_oacc_hll_mat_mod
end subroutine psb_s_oacc_hll_inner_vect_sv
end interface
- interface
- module subroutine psb_s_oacc_hll_csmm(alpha, a, x, beta, y, info, trans)
- class(psb_s_oacc_hll_sparse_mat), intent(in) :: a
- real(psb_spk_), intent(in) :: alpha, beta, x(:,:)
- real(psb_spk_), intent(inout) :: y(:,:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
- end subroutine psb_s_oacc_hll_csmm
- end interface
-
- interface
- module subroutine psb_s_oacc_hll_csmv(alpha, a, x, beta, y, info, trans)
- class(psb_s_oacc_hll_sparse_mat), intent(in) :: a
- real(psb_spk_), intent(in) :: alpha, beta, x(:)
- real(psb_spk_), intent(inout) :: y(:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
- end subroutine psb_s_oacc_hll_csmv
- end interface
-
interface
module subroutine psb_s_oacc_hll_scals(d, a, info)
class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a
diff --git a/openacc/psb_z_oacc_csr_mat_mod.F90 b/openacc/psb_z_oacc_csr_mat_mod.F90
index 47973825..fdb59b4b 100644
--- a/openacc/psb_z_oacc_csr_mat_mod.F90
+++ b/openacc/psb_z_oacc_csr_mat_mod.F90
@@ -4,7 +4,6 @@ module psb_z_oacc_csr_mat_mod
use openacc
use psb_z_mat_mod
use psb_z_oacc_vect_mod
- !use oaccsparse_mod
integer(psb_ipk_), parameter, private :: is_host = -1
integer(psb_ipk_), parameter, private :: is_sync = 0
@@ -17,8 +16,6 @@ module psb_z_oacc_csr_mat_mod
procedure, pass(a) :: sizeof => z_oacc_csr_sizeof
procedure, pass(a) :: vect_mv => psb_z_oacc_csr_vect_mv
procedure, pass(a) :: in_vect_sv => psb_z_oacc_csr_inner_vect_sv
- procedure, pass(a) :: csmm => psb_z_oacc_csr_csmm
- procedure, pass(a) :: csmv => psb_z_oacc_csr_csmv
procedure, pass(a) :: scals => psb_z_oacc_csr_scals
procedure, pass(a) :: scalv => psb_z_oacc_csr_scal
procedure, pass(a) :: reallocate_nz => psb_z_oacc_csr_reallocate_nz
@@ -93,26 +90,6 @@ module psb_z_oacc_csr_mat_mod
end subroutine psb_z_oacc_csr_inner_vect_sv
end interface
- interface
- module subroutine psb_z_oacc_csr_csmm(alpha, a, x, beta, y, info, trans)
- class(psb_z_oacc_csr_sparse_mat), intent(in) :: a
- complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:)
- complex(psb_dpk_), intent(inout) :: y(:,:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
- end subroutine psb_z_oacc_csr_csmm
- end interface
-
- interface
- module subroutine psb_z_oacc_csr_csmv(alpha, a, x, beta, y, info, trans)
- class(psb_z_oacc_csr_sparse_mat), intent(in) :: a
- complex(psb_dpk_), intent(in) :: alpha, beta, x(:)
- complex(psb_dpk_), intent(inout) :: y(:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
- end subroutine psb_z_oacc_csr_csmv
- end interface
-
interface
module subroutine psb_z_oacc_csr_scals(d, a, info)
class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a
diff --git a/openacc/psb_z_oacc_ell_mat_mod.F90 b/openacc/psb_z_oacc_ell_mat_mod.F90
index 4013ee9d..9d09d43d 100644
--- a/openacc/psb_z_oacc_ell_mat_mod.F90
+++ b/openacc/psb_z_oacc_ell_mat_mod.F90
@@ -26,8 +26,6 @@ module psb_z_oacc_ell_mat_mod
procedure, pass(a) :: free => z_oacc_ell_free
procedure, pass(a) :: vect_mv => psb_z_oacc_ell_vect_mv
procedure, pass(a) :: in_vect_sv => psb_z_oacc_ell_inner_vect_sv
- procedure, pass(a) :: csmm => psb_z_oacc_ell_csmm
- procedure, pass(a) :: csmv => psb_z_oacc_ell_csmv
procedure, pass(a) :: scals => psb_z_oacc_ell_scals
procedure, pass(a) :: scalv => psb_z_oacc_ell_scal
procedure, pass(a) :: reallocate_nz => psb_z_oacc_ell_reallocate_nz
@@ -92,26 +90,6 @@ module psb_z_oacc_ell_mat_mod
end subroutine psb_z_oacc_ell_inner_vect_sv
end interface
- interface
- module subroutine psb_z_oacc_ell_csmm(alpha, a, x, beta, y, info, trans)
- class(psb_z_oacc_ell_sparse_mat), intent(in) :: a
- complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:)
- complex(psb_dpk_), intent(inout) :: y(:,:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
- end subroutine psb_z_oacc_ell_csmm
- end interface
-
- interface
- module subroutine psb_z_oacc_ell_csmv(alpha, a, x, beta, y, info, trans)
- class(psb_z_oacc_ell_sparse_mat), intent(in) :: a
- complex(psb_dpk_), intent(in) :: alpha, beta, x(:)
- complex(psb_dpk_), intent(inout) :: y(:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
- end subroutine psb_z_oacc_ell_csmv
- end interface
-
interface
module subroutine psb_z_oacc_ell_scals(d, a, info)
class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a
diff --git a/openacc/psb_z_oacc_hll_mat_mod.F90 b/openacc/psb_z_oacc_hll_mat_mod.F90
index 289b78ac..9eb7d08c 100644
--- a/openacc/psb_z_oacc_hll_mat_mod.F90
+++ b/openacc/psb_z_oacc_hll_mat_mod.F90
@@ -26,8 +26,6 @@ module psb_z_oacc_hll_mat_mod
procedure, pass(a) :: free => z_oacc_hll_free
procedure, pass(a) :: vect_mv => psb_z_oacc_hll_vect_mv
procedure, pass(a) :: in_vect_sv => psb_z_oacc_hll_inner_vect_sv
- procedure, pass(a) :: csmm => psb_z_oacc_hll_csmm
- procedure, pass(a) :: csmv => psb_z_oacc_hll_csmv
procedure, pass(a) :: scals => psb_z_oacc_hll_scals
procedure, pass(a) :: scalv => psb_z_oacc_hll_scal
procedure, pass(a) :: reallocate_nz => psb_z_oacc_hll_reallocate_nz
@@ -92,26 +90,6 @@ module psb_z_oacc_hll_mat_mod
end subroutine psb_z_oacc_hll_inner_vect_sv
end interface
- interface
- module subroutine psb_z_oacc_hll_csmm(alpha, a, x, beta, y, info, trans)
- class(psb_z_oacc_hll_sparse_mat), intent(in) :: a
- complex(psb_dpk_), intent(in) :: alpha, beta, x(:,:)
- complex(psb_dpk_), intent(inout) :: y(:,:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
- end subroutine psb_z_oacc_hll_csmm
- end interface
-
- interface
- module subroutine psb_z_oacc_hll_csmv(alpha, a, x, beta, y, info, trans)
- class(psb_z_oacc_hll_sparse_mat), intent(in) :: a
- complex(psb_dpk_), intent(in) :: alpha, beta, x(:)
- complex(psb_dpk_), intent(inout) :: y(:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
- end subroutine psb_z_oacc_hll_csmv
- end interface
-
interface
module subroutine psb_z_oacc_hll_scals(d, a, info)
class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a
From 6236f3489cde833eb81232bf97b4cf7005e21183 Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Fri, 30 Aug 2024 16:05:08 +0200
Subject: [PATCH 37/86] Remove obsolete files
---
openacc/impl/psb_c_oacc_csr_csmm.F90 | 86 ----------------------------
openacc/impl/psb_c_oacc_csr_csmv.F90 | 81 --------------------------
openacc/impl/psb_c_oacc_ell_csmm.F90 | 86 ----------------------------
openacc/impl/psb_c_oacc_ell_csmv.F90 | 82 --------------------------
openacc/impl/psb_c_oacc_hll_csmm.F90 | 86 ----------------------------
openacc/impl/psb_c_oacc_hll_csmv.F90 | 84 ---------------------------
openacc/impl/psb_d_oacc_csr_csmm.F90 | 86 ----------------------------
openacc/impl/psb_d_oacc_csr_csmv.F90 | 81 --------------------------
openacc/impl/psb_d_oacc_ell_csmm.F90 | 86 ----------------------------
openacc/impl/psb_d_oacc_ell_csmv.F90 | 82 --------------------------
openacc/impl/psb_d_oacc_hll_csmm.F90 | 86 ----------------------------
openacc/impl/psb_d_oacc_hll_csmv.F90 | 84 ---------------------------
openacc/impl/psb_s_oacc_csr_csmm.F90 | 86 ----------------------------
openacc/impl/psb_s_oacc_csr_csmv.F90 | 81 --------------------------
openacc/impl/psb_s_oacc_ell_csmm.F90 | 86 ----------------------------
openacc/impl/psb_s_oacc_ell_csmv.F90 | 82 --------------------------
openacc/impl/psb_s_oacc_hll_csmm.F90 | 86 ----------------------------
openacc/impl/psb_s_oacc_hll_csmv.F90 | 84 ---------------------------
openacc/impl/psb_z_oacc_csr_csmm.F90 | 86 ----------------------------
openacc/impl/psb_z_oacc_csr_csmv.F90 | 81 --------------------------
openacc/impl/psb_z_oacc_ell_csmm.F90 | 86 ----------------------------
openacc/impl/psb_z_oacc_ell_csmv.F90 | 82 --------------------------
openacc/impl/psb_z_oacc_hll_csmm.F90 | 86 ----------------------------
openacc/impl/psb_z_oacc_hll_csmv.F90 | 84 ---------------------------
24 files changed, 2020 deletions(-)
delete mode 100644 openacc/impl/psb_c_oacc_csr_csmm.F90
delete mode 100644 openacc/impl/psb_c_oacc_csr_csmv.F90
delete mode 100644 openacc/impl/psb_c_oacc_ell_csmm.F90
delete mode 100644 openacc/impl/psb_c_oacc_ell_csmv.F90
delete mode 100644 openacc/impl/psb_c_oacc_hll_csmm.F90
delete mode 100644 openacc/impl/psb_c_oacc_hll_csmv.F90
delete mode 100644 openacc/impl/psb_d_oacc_csr_csmm.F90
delete mode 100644 openacc/impl/psb_d_oacc_csr_csmv.F90
delete mode 100644 openacc/impl/psb_d_oacc_ell_csmm.F90
delete mode 100644 openacc/impl/psb_d_oacc_ell_csmv.F90
delete mode 100644 openacc/impl/psb_d_oacc_hll_csmm.F90
delete mode 100644 openacc/impl/psb_d_oacc_hll_csmv.F90
delete mode 100644 openacc/impl/psb_s_oacc_csr_csmm.F90
delete mode 100644 openacc/impl/psb_s_oacc_csr_csmv.F90
delete mode 100644 openacc/impl/psb_s_oacc_ell_csmm.F90
delete mode 100644 openacc/impl/psb_s_oacc_ell_csmv.F90
delete mode 100644 openacc/impl/psb_s_oacc_hll_csmm.F90
delete mode 100644 openacc/impl/psb_s_oacc_hll_csmv.F90
delete mode 100644 openacc/impl/psb_z_oacc_csr_csmm.F90
delete mode 100644 openacc/impl/psb_z_oacc_csr_csmv.F90
delete mode 100644 openacc/impl/psb_z_oacc_ell_csmm.F90
delete mode 100644 openacc/impl/psb_z_oacc_ell_csmv.F90
delete mode 100644 openacc/impl/psb_z_oacc_hll_csmm.F90
delete mode 100644 openacc/impl/psb_z_oacc_hll_csmv.F90
diff --git a/openacc/impl/psb_c_oacc_csr_csmm.F90 b/openacc/impl/psb_c_oacc_csr_csmm.F90
deleted file mode 100644
index 5bf0bad4..00000000
--- a/openacc/impl/psb_c_oacc_csr_csmm.F90
+++ /dev/null
@@ -1,86 +0,0 @@
-submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_csmm_impl
- use psb_base_mod
-contains
- module subroutine psb_c_oacc_csr_csmm(alpha, a, x, beta, y, info, trans)
- implicit none
- class(psb_c_oacc_csr_sparse_mat), intent(in) :: a
- complex(psb_spk_), intent(in) :: alpha, beta
- complex(psb_spk_), intent(in) :: x(:,:)
- complex(psb_spk_), intent(inout) :: y(:,:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
-
- character :: trans_
- integer(psb_ipk_) :: i, j, m, n,k, nxy
- logical :: tra
- integer(psb_ipk_) :: err_act
- character(len=20) :: name = 'c_oacc_csmm'
- logical, parameter :: debug = .false.
-
- info = psb_success_
- call psb_erractionsave(err_act)
-
- if (present(trans)) then
- trans_ = trans
- else
- trans_ = 'N'
- end if
-
- if (.not.a%is_asb()) then
- info = psb_err_invalid_mat_state_
- call psb_errpush(info, name)
- goto 9999
- endif
- tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
-
- if (tra) then
- m = a%get_ncols()
- n = a%get_nrows()
- else
- n = a%get_ncols()
- m = a%get_nrows()
- end if
-
- if (size(x,1) < n) then
- info = 36
- call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
- goto 9999
- end if
-
- if (size(y,1) < m) then
- info = 36
- call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
- goto 9999
- end if
-
- if (tra) then
- call a%psb_c_csr_sparse_mat%spmm(alpha, x, beta, y, info, trans)
- else
- nxy = min(size(x,2), size(y,2))
-
- !$acc parallel loop collapse(2) present(a, x, y)
- do j = 1, nxy
- do i = 1, m
- y(i,j) = beta * y(i,j)
- end do
- end do
-
- !$acc parallel loop collapse(2) present(a, x, y)
- do j = 1, nxy
- do i = 1, n
- do k = a%irp(i), a%irp(i+1) - 1
- y(a%ja(k), j) = y(a%ja(k), j) + alpha * a%val(k) * x(i, j)
- end do
- end do
- end do
- endif
-
- call psb_erractionrestore(err_act)
- return
-
-9999 call psb_error_handler(err_act)
- return
-
- end subroutine psb_c_oacc_csr_csmm
-end submodule psb_c_oacc_csr_csmm_impl
-
diff --git a/openacc/impl/psb_c_oacc_csr_csmv.F90 b/openacc/impl/psb_c_oacc_csr_csmv.F90
deleted file mode 100644
index e5d5f24e..00000000
--- a/openacc/impl/psb_c_oacc_csr_csmv.F90
+++ /dev/null
@@ -1,81 +0,0 @@
-submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_csmv_impl
- use psb_base_mod
-contains
- module subroutine psb_c_oacc_csr_csmv(alpha, a, x, beta, y, info, trans)
- implicit none
- class(psb_c_oacc_csr_sparse_mat), intent(in) :: a
- complex(psb_spk_), intent(in) :: alpha, beta
- complex(psb_spk_), intent(in) :: x(:)
- complex(psb_spk_), intent(inout) :: y(:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
-
- character :: trans_
- integer(psb_ipk_) :: i, j, m, n
- logical :: tra
- integer(psb_ipk_) :: err_act
- character(len=20) :: name = 'c_oacc_csmv'
- logical, parameter :: debug = .false.
-
- call psb_erractionsave(err_act)
- info = psb_success_
-
- if (present(trans)) then
- trans_ = trans
- else
- trans_ = 'N'
- end if
-
- if (.not.a%is_asb()) then
- info = psb_err_invalid_mat_state_
- call psb_errpush(info, name)
- goto 9999
- endif
-
- tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
-
- if (tra) then
- m = a%get_ncols()
- n = a%get_nrows()
- else
- n = a%get_ncols()
- m = a%get_nrows()
- end if
-
- if (size(x,1) < n) then
- info = 36
- call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
- goto 9999
- end if
-
- if (size(y,1) < m) then
- info = 36
- call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
- goto 9999
- end if
-
- if (tra) then
- call a%psb_c_csr_sparse_mat%spmm(alpha, x, beta, y, info, trans)
- else
- !$acc parallel loop present(a, x, y)
- do i = 1, m
- y(i) = beta * y(i)
- end do
-
- !$acc parallel loop present(a, x, y)
- do i = 1, n
- do j = a%irp(i), a%irp(i+1) - 1
- y(a%ja(j)) = y(a%ja(j)) + alpha * a%val(j) * x(i)
- end do
- end do
- endif
-
- call psb_erractionrestore(err_act)
- return
-
-9999 call psb_error_handler(err_act)
- return
-
- end subroutine psb_c_oacc_csr_csmv
-end submodule psb_c_oacc_csr_csmv_impl
-
diff --git a/openacc/impl/psb_c_oacc_ell_csmm.F90 b/openacc/impl/psb_c_oacc_ell_csmm.F90
deleted file mode 100644
index 01ed0d8b..00000000
--- a/openacc/impl/psb_c_oacc_ell_csmm.F90
+++ /dev/null
@@ -1,86 +0,0 @@
-submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_csmm_impl
- use psb_base_mod
-contains
- module subroutine psb_c_oacc_ell_csmm(alpha, a, x, beta, y, info, trans)
- implicit none
- class(psb_c_oacc_ell_sparse_mat), intent(in) :: a
- complex(psb_spk_), intent(in) :: alpha, beta
- complex(psb_spk_), intent(in) :: x(:,:)
- complex(psb_spk_), intent(inout) :: y(:,:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
-
- character :: trans_
- integer(psb_ipk_) :: i, j, m, n, k, nxy, nzt
- logical :: tra
- integer(psb_ipk_) :: err_act
- character(len=20) :: name = 'c_oacc_ell_csmm'
- logical, parameter :: debug = .false.
-
- info = psb_success_
- call psb_erractionsave(err_act)
-
- if (present(trans)) then
- trans_ = trans
- else
- trans_ = 'N'
- end if
-
- if (.not.a%is_asb()) then
- info = psb_err_invalid_mat_state_
- call psb_errpush(info, name)
- goto 9999
- endif
- tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
-
- if (tra) then
- m = a%get_ncols()
- n = a%get_nrows()
- else
- n = a%get_ncols()
- m = a%get_nrows()
- end if
-
- if (size(x,1) < n) then
- info = 36
- call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
- goto 9999
- end if
-
- if (size(y,1) < m) then
- info = 36
- call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
- goto 9999
- end if
-
- if (tra) then
- call a%psb_c_ell_sparse_mat%spmm(alpha, x, beta, y, info, trans)
- else
- nxy = min(size(x,2), size(y,2))
- nzt = a%nzt
-
- !$acc parallel loop collapse(2) present(a, x, y)
- do j = 1, nxy
- do i = 1, m
- y(i,j) = beta * y(i,j)
- end do
- end do
-
- !$acc parallel loop collapse(2) present(a, x, y)
- do j = 1, nxy
- do i = 1, n
- do k = 1, nzt
- y(i, j) = y(i, j) + alpha * a%val(i, k) * x(a%ja(i, k), j)
- end do
- end do
- end do
- endif
-
- call psb_erractionrestore(err_act)
- return
-
-9999 call psb_error_handler(err_act)
- return
-
- end subroutine psb_c_oacc_ell_csmm
-end submodule psb_c_oacc_ell_csmm_impl
diff --git a/openacc/impl/psb_c_oacc_ell_csmv.F90 b/openacc/impl/psb_c_oacc_ell_csmv.F90
deleted file mode 100644
index dd5c1207..00000000
--- a/openacc/impl/psb_c_oacc_ell_csmv.F90
+++ /dev/null
@@ -1,82 +0,0 @@
-submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_csmv_impl
- use psb_base_mod
-contains
- module subroutine psb_c_oacc_ell_csmv(alpha, a, x, beta, y, info, trans)
- implicit none
- class(psb_c_oacc_ell_sparse_mat), intent(in) :: a
- complex(psb_spk_), intent(in) :: alpha, beta
- complex(psb_spk_), intent(in) :: x(:)
- complex(psb_spk_), intent(inout) :: y(:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
-
- character :: trans_
- integer(psb_ipk_) :: i, j, m, n, nzt
- logical :: tra
- integer(psb_ipk_) :: err_act
- character(len=20) :: name = 'c_oacc_ell_csmv'
- logical, parameter :: debug = .false.
-
- call psb_erractionsave(err_act)
- info = psb_success_
-
- if (present(trans)) then
- trans_ = trans
- else
- trans_ = 'N'
- end if
-
- if (.not.a%is_asb()) then
- info = psb_err_invalid_mat_state_
- call psb_errpush(info, name)
- goto 9999
- endif
-
- tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
-
- if (tra) then
- m = a%get_ncols()
- n = a%get_nrows()
- else
- n = a%get_ncols()
- m = a%get_nrows()
- end if
-
- if (size(x,1) < n) then
- info = 36
- call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
- goto 9999
- end if
-
- if (size(y,1) < m) then
- info = 36
- call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
- goto 9999
- end if
-
- if (tra) then
- call a%psb_c_ell_sparse_mat%spmm(alpha, x, beta, y, info, trans)
- else
- nzt = a%nzt
-
- !$acc parallel loop present(a, x, y)
- do i = 1, m
- y(i) = beta * y(i)
- end do
-
- !$acc parallel loop present(a, x, y)
- do i = 1, m
- do j = 1, nzt
- y(i) = y(i) + alpha * a%val(i, j) * x(a%ja(i, j))
- end do
- end do
- endif
-
- call psb_erractionrestore(err_act)
- return
-
-9999 call psb_error_handler(err_act)
- return
-
- end subroutine psb_c_oacc_ell_csmv
-end submodule psb_c_oacc_ell_csmv_impl
diff --git a/openacc/impl/psb_c_oacc_hll_csmm.F90 b/openacc/impl/psb_c_oacc_hll_csmm.F90
deleted file mode 100644
index 6b0fc637..00000000
--- a/openacc/impl/psb_c_oacc_hll_csmm.F90
+++ /dev/null
@@ -1,86 +0,0 @@
-submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_csmm_impl
- use psb_base_mod
-contains
- module subroutine psb_c_oacc_hll_csmm(alpha, a, x, beta, y, info, trans)
- implicit none
- class(psb_c_oacc_hll_sparse_mat), intent(in) :: a
- complex(psb_spk_), intent(in) :: alpha, beta
- complex(psb_spk_), intent(in) :: x(:,:)
- complex(psb_spk_), intent(inout) :: y(:,:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
-
- character :: trans_
- integer(psb_ipk_) :: i, j, m, n, k, nxy, nhacks
- logical :: tra
- integer(psb_ipk_) :: err_act
- character(len=20) :: name = 'c_oacc_hll_csmm'
- logical, parameter :: debug = .false.
-
- info = psb_success_
- call psb_erractionsave(err_act)
-
- if (present(trans)) then
- trans_ = trans
- else
- trans_ = 'N'
- end if
-
- if (.not.a%is_asb()) then
- info = psb_err_invalid_mat_state_
- call psb_errpush(info, name)
- goto 9999
- endif
- tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
-
- if (tra) then
- m = a%get_ncols()
- n = a%get_nrows()
- else
- n = a%get_ncols()
- m = a%get_nrows()
- end if
-
- if (size(x,1) < n) then
- info = 36
- call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
- goto 9999
- end if
-
- if (size(y,1) < m) then
- info = 36
- call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
- goto 9999
- end if
-
- if (tra) then
- call a%psb_c_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans)
- else
- nxy = min(size(x,2), size(y,2))
- nhacks = (a%get_nrows() + a%hksz - 1) / a%hksz
-
- !$acc parallel loop collapse(2) present(a, x, y)
- do j = 1, nxy
- do i = 1, m
- y(i,j) = beta * y(i,j)
- end do
- end do
-
- !$acc parallel loop present(a, x, y)
- do j = 1, nxy
- do k = 1, nhacks
- do i = a%hkoffs(k), a%hkoffs(k + 1) - 1
- y(a%irn(i), j) = y(a%irn(i), j) + alpha * a%val(i) * x(a%ja(i), j)
- end do
- end do
- end do
- endif
-
- call psb_erractionrestore(err_act)
- return
-
-9999 call psb_error_handler(err_act)
- return
-
- end subroutine psb_c_oacc_hll_csmm
-end submodule psb_c_oacc_hll_csmm_impl
diff --git a/openacc/impl/psb_c_oacc_hll_csmv.F90 b/openacc/impl/psb_c_oacc_hll_csmv.F90
deleted file mode 100644
index f32e37b7..00000000
--- a/openacc/impl/psb_c_oacc_hll_csmv.F90
+++ /dev/null
@@ -1,84 +0,0 @@
-submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_csmv_impl
- use psb_base_mod
-contains
- module subroutine psb_c_oacc_hll_csmv(alpha, a, x, beta, y, info, trans)
- implicit none
- class(psb_c_oacc_hll_sparse_mat), intent(in) :: a
- complex(psb_spk_), intent(in) :: alpha, beta
- complex(psb_spk_), intent(in) :: x(:)
- complex(psb_spk_), intent(inout) :: y(:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
-
- character :: trans_
- integer(psb_ipk_) :: i, j, m, n, hksz, nhacks
- logical :: tra
- integer(psb_ipk_) :: err_act
- character(len=20) :: name = 'c_oacc_hll_csmv'
- logical, parameter :: debug = .false.
-
- call psb_erractionsave(err_act)
- info = psb_success_
-
- if (present(trans)) then
- trans_ = trans
- else
- trans_ = 'N'
- end if
-
- if (.not.a%is_asb()) then
- info = psb_err_invalid_mat_state_
- call psb_errpush(info, name)
- goto 9999
- endif
-
- tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
-
- if (tra) then
- m = a%get_ncols()
- n = a%get_nrows()
- else
- n = a%get_ncols()
- m = a%get_nrows()
- end if
-
- if (size(x,1) < n) then
- info = 36
- call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
- goto 9999
- end if
-
- if (size(y,1) < m) then
- info = 36
- call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
- goto 9999
- end if
-
- if (tra) then
- call a%psb_c_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans)
- else
- hksz = a%hksz
- nhacks = (a%get_nrows() + hksz - 1) / hksz
-
- !$acc parallel loop present(a, x, y)
- do i = 1, m
- y(i) = beta * y(i)
- end do
- ! This loop nest cannot be run with collapse, since
- ! the inner loop extent varies.
- !$acc parallel loop present(a, x, y)
- do i = 1, nhacks
- do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
- y(a%irn(j)) = y(a%irn(j)) + alpha * a%val(j) * x(a%ja(j))
- end do
- end do
- endif
-
- call psb_erractionrestore(err_act)
- return
-
-9999 call psb_error_handler(err_act)
- return
-
- end subroutine psb_c_oacc_hll_csmv
-end submodule psb_c_oacc_hll_csmv_impl
diff --git a/openacc/impl/psb_d_oacc_csr_csmm.F90 b/openacc/impl/psb_d_oacc_csr_csmm.F90
deleted file mode 100644
index 6ab87175..00000000
--- a/openacc/impl/psb_d_oacc_csr_csmm.F90
+++ /dev/null
@@ -1,86 +0,0 @@
-submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_csmm_impl
- use psb_base_mod
-contains
- module subroutine psb_d_oacc_csr_csmm(alpha, a, x, beta, y, info, trans)
- implicit none
- class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
- real(psb_dpk_), intent(in) :: alpha, beta
- real(psb_dpk_), intent(in) :: x(:,:)
- real(psb_dpk_), intent(inout) :: y(:,:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
-
- character :: trans_
- integer(psb_ipk_) :: i, j, m, n,k, nxy
- logical :: tra
- integer(psb_ipk_) :: err_act
- character(len=20) :: name = 'd_oacc_csmm'
- logical, parameter :: debug = .false.
-
- info = psb_success_
- call psb_erractionsave(err_act)
-
- if (present(trans)) then
- trans_ = trans
- else
- trans_ = 'N'
- end if
-
- if (.not.a%is_asb()) then
- info = psb_err_invalid_mat_state_
- call psb_errpush(info, name)
- goto 9999
- endif
- tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
-
- if (tra) then
- m = a%get_ncols()
- n = a%get_nrows()
- else
- n = a%get_ncols()
- m = a%get_nrows()
- end if
-
- if (size(x,1) < n) then
- info = 36
- call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
- goto 9999
- end if
-
- if (size(y,1) < m) then
- info = 36
- call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
- goto 9999
- end if
-
- if (tra) then
- call a%psb_d_csr_sparse_mat%spmm(alpha, x, beta, y, info, trans)
- else
- nxy = min(size(x,2), size(y,2))
-
- !$acc parallel loop collapse(2) present(a, x, y)
- do j = 1, nxy
- do i = 1, m
- y(i,j) = beta * y(i,j)
- end do
- end do
-
- !$acc parallel loop collapse(2) present(a, x, y)
- do j = 1, nxy
- do i = 1, n
- do k = a%irp(i), a%irp(i+1) - 1
- y(a%ja(k), j) = y(a%ja(k), j) + alpha * a%val(k) * x(i, j)
- end do
- end do
- end do
- endif
-
- call psb_erractionrestore(err_act)
- return
-
-9999 call psb_error_handler(err_act)
- return
-
- end subroutine psb_d_oacc_csr_csmm
-end submodule psb_d_oacc_csr_csmm_impl
-
diff --git a/openacc/impl/psb_d_oacc_csr_csmv.F90 b/openacc/impl/psb_d_oacc_csr_csmv.F90
deleted file mode 100644
index 205a7a59..00000000
--- a/openacc/impl/psb_d_oacc_csr_csmv.F90
+++ /dev/null
@@ -1,81 +0,0 @@
-submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_csmv_impl
- use psb_base_mod
-contains
- module subroutine psb_d_oacc_csr_csmv(alpha, a, x, beta, y, info, trans)
- implicit none
- class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
- real(psb_dpk_), intent(in) :: alpha, beta
- real(psb_dpk_), intent(in) :: x(:)
- real(psb_dpk_), intent(inout) :: y(:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
-
- character :: trans_
- integer(psb_ipk_) :: i, j, m, n
- logical :: tra
- integer(psb_ipk_) :: err_act
- character(len=20) :: name = 'd_oacc_csmv'
- logical, parameter :: debug = .false.
-
- call psb_erractionsave(err_act)
- info = psb_success_
-
- if (present(trans)) then
- trans_ = trans
- else
- trans_ = 'N'
- end if
-
- if (.not.a%is_asb()) then
- info = psb_err_invalid_mat_state_
- call psb_errpush(info, name)
- goto 9999
- endif
-
- tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
-
- if (tra) then
- m = a%get_ncols()
- n = a%get_nrows()
- else
- n = a%get_ncols()
- m = a%get_nrows()
- end if
-
- if (size(x,1) < n) then
- info = 36
- call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
- goto 9999
- end if
-
- if (size(y,1) < m) then
- info = 36
- call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
- goto 9999
- end if
-
- if (tra) then
- call a%psb_d_csr_sparse_mat%spmm(alpha, x, beta, y, info, trans)
- else
- !$acc parallel loop present(a, x, y)
- do i = 1, m
- y(i) = beta * y(i)
- end do
-
- !$acc parallel loop present(a, x, y)
- do i = 1, n
- do j = a%irp(i), a%irp(i+1) - 1
- y(a%ja(j)) = y(a%ja(j)) + alpha * a%val(j) * x(i)
- end do
- end do
- endif
-
- call psb_erractionrestore(err_act)
- return
-
-9999 call psb_error_handler(err_act)
- return
-
- end subroutine psb_d_oacc_csr_csmv
-end submodule psb_d_oacc_csr_csmv_impl
-
diff --git a/openacc/impl/psb_d_oacc_ell_csmm.F90 b/openacc/impl/psb_d_oacc_ell_csmm.F90
deleted file mode 100644
index 6515a306..00000000
--- a/openacc/impl/psb_d_oacc_ell_csmm.F90
+++ /dev/null
@@ -1,86 +0,0 @@
-submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_csmm_impl
- use psb_base_mod
-contains
- module subroutine psb_d_oacc_ell_csmm(alpha, a, x, beta, y, info, trans)
- implicit none
- class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
- real(psb_dpk_), intent(in) :: alpha, beta
- real(psb_dpk_), intent(in) :: x(:,:)
- real(psb_dpk_), intent(inout) :: y(:,:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
-
- character :: trans_
- integer(psb_ipk_) :: i, j, m, n, k, nxy, nzt
- logical :: tra
- integer(psb_ipk_) :: err_act
- character(len=20) :: name = 'd_oacc_ell_csmm'
- logical, parameter :: debug = .false.
-
- info = psb_success_
- call psb_erractionsave(err_act)
-
- if (present(trans)) then
- trans_ = trans
- else
- trans_ = 'N'
- end if
-
- if (.not.a%is_asb()) then
- info = psb_err_invalid_mat_state_
- call psb_errpush(info, name)
- goto 9999
- endif
- tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
-
- if (tra) then
- m = a%get_ncols()
- n = a%get_nrows()
- else
- n = a%get_ncols()
- m = a%get_nrows()
- end if
-
- if (size(x,1) < n) then
- info = 36
- call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
- goto 9999
- end if
-
- if (size(y,1) < m) then
- info = 36
- call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
- goto 9999
- end if
-
- if (tra) then
- call a%psb_d_ell_sparse_mat%spmm(alpha, x, beta, y, info, trans)
- else
- nxy = min(size(x,2), size(y,2))
- nzt = a%nzt
-
- !$acc parallel loop collapse(2) present(a, x, y)
- do j = 1, nxy
- do i = 1, m
- y(i,j) = beta * y(i,j)
- end do
- end do
-
- !$acc parallel loop collapse(2) present(a, x, y)
- do j = 1, nxy
- do i = 1, n
- do k = 1, nzt
- y(i, j) = y(i, j) + alpha * a%val(i, k) * x(a%ja(i, k), j)
- end do
- end do
- end do
- endif
-
- call psb_erractionrestore(err_act)
- return
-
-9999 call psb_error_handler(err_act)
- return
-
- end subroutine psb_d_oacc_ell_csmm
-end submodule psb_d_oacc_ell_csmm_impl
diff --git a/openacc/impl/psb_d_oacc_ell_csmv.F90 b/openacc/impl/psb_d_oacc_ell_csmv.F90
deleted file mode 100644
index 8d5e2aaa..00000000
--- a/openacc/impl/psb_d_oacc_ell_csmv.F90
+++ /dev/null
@@ -1,82 +0,0 @@
-submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_csmv_impl
- use psb_base_mod
-contains
- module subroutine psb_d_oacc_ell_csmv(alpha, a, x, beta, y, info, trans)
- implicit none
- class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
- real(psb_dpk_), intent(in) :: alpha, beta
- real(psb_dpk_), intent(in) :: x(:)
- real(psb_dpk_), intent(inout) :: y(:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
-
- character :: trans_
- integer(psb_ipk_) :: i, j, m, n, nzt
- logical :: tra
- integer(psb_ipk_) :: err_act
- character(len=20) :: name = 'd_oacc_ell_csmv'
- logical, parameter :: debug = .false.
-
- call psb_erractionsave(err_act)
- info = psb_success_
-
- if (present(trans)) then
- trans_ = trans
- else
- trans_ = 'N'
- end if
-
- if (.not.a%is_asb()) then
- info = psb_err_invalid_mat_state_
- call psb_errpush(info, name)
- goto 9999
- endif
-
- tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
-
- if (tra) then
- m = a%get_ncols()
- n = a%get_nrows()
- else
- n = a%get_ncols()
- m = a%get_nrows()
- end if
-
- if (size(x,1) < n) then
- info = 36
- call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
- goto 9999
- end if
-
- if (size(y,1) < m) then
- info = 36
- call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
- goto 9999
- end if
-
- if (tra) then
- call a%psb_d_ell_sparse_mat%spmm(alpha, x, beta, y, info, trans)
- else
- nzt = a%nzt
-
- !$acc parallel loop present(a, x, y)
- do i = 1, m
- y(i) = beta * y(i)
- end do
-
- !$acc parallel loop present(a, x, y)
- do i = 1, m
- do j = 1, nzt
- y(i) = y(i) + alpha * a%val(i, j) * x(a%ja(i, j))
- end do
- end do
- endif
-
- call psb_erractionrestore(err_act)
- return
-
-9999 call psb_error_handler(err_act)
- return
-
- end subroutine psb_d_oacc_ell_csmv
-end submodule psb_d_oacc_ell_csmv_impl
diff --git a/openacc/impl/psb_d_oacc_hll_csmm.F90 b/openacc/impl/psb_d_oacc_hll_csmm.F90
deleted file mode 100644
index 2dd6b53b..00000000
--- a/openacc/impl/psb_d_oacc_hll_csmm.F90
+++ /dev/null
@@ -1,86 +0,0 @@
-submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_csmm_impl
- use psb_base_mod
-contains
- module subroutine psb_d_oacc_hll_csmm(alpha, a, x, beta, y, info, trans)
- implicit none
- class(psb_d_oacc_hll_sparse_mat), intent(in) :: a
- real(psb_dpk_), intent(in) :: alpha, beta
- real(psb_dpk_), intent(in) :: x(:,:)
- real(psb_dpk_), intent(inout) :: y(:,:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
-
- character :: trans_
- integer(psb_ipk_) :: i, j, m, n, k, nxy, nhacks
- logical :: tra
- integer(psb_ipk_) :: err_act
- character(len=20) :: name = 'd_oacc_hll_csmm'
- logical, parameter :: debug = .false.
-
- info = psb_success_
- call psb_erractionsave(err_act)
-
- if (present(trans)) then
- trans_ = trans
- else
- trans_ = 'N'
- end if
-
- if (.not.a%is_asb()) then
- info = psb_err_invalid_mat_state_
- call psb_errpush(info, name)
- goto 9999
- endif
- tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
-
- if (tra) then
- m = a%get_ncols()
- n = a%get_nrows()
- else
- n = a%get_ncols()
- m = a%get_nrows()
- end if
-
- if (size(x,1) < n) then
- info = 36
- call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
- goto 9999
- end if
-
- if (size(y,1) < m) then
- info = 36
- call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
- goto 9999
- end if
-
- if (tra) then
- call a%psb_d_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans)
- else
- nxy = min(size(x,2), size(y,2))
- nhacks = (a%get_nrows() + a%hksz - 1) / a%hksz
-
- !$acc parallel loop collapse(2) present(a, x, y)
- do j = 1, nxy
- do i = 1, m
- y(i,j) = beta * y(i,j)
- end do
- end do
-
- !$acc parallel loop present(a, x, y)
- do j = 1, nxy
- do k = 1, nhacks
- do i = a%hkoffs(k), a%hkoffs(k + 1) - 1
- y(a%irn(i), j) = y(a%irn(i), j) + alpha * a%val(i) * x(a%ja(i), j)
- end do
- end do
- end do
- endif
-
- call psb_erractionrestore(err_act)
- return
-
-9999 call psb_error_handler(err_act)
- return
-
- end subroutine psb_d_oacc_hll_csmm
-end submodule psb_d_oacc_hll_csmm_impl
diff --git a/openacc/impl/psb_d_oacc_hll_csmv.F90 b/openacc/impl/psb_d_oacc_hll_csmv.F90
deleted file mode 100644
index d38fca61..00000000
--- a/openacc/impl/psb_d_oacc_hll_csmv.F90
+++ /dev/null
@@ -1,84 +0,0 @@
-submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_csmv_impl
- use psb_base_mod
-contains
- module subroutine psb_d_oacc_hll_csmv(alpha, a, x, beta, y, info, trans)
- implicit none
- class(psb_d_oacc_hll_sparse_mat), intent(in) :: a
- real(psb_dpk_), intent(in) :: alpha, beta
- real(psb_dpk_), intent(in) :: x(:)
- real(psb_dpk_), intent(inout) :: y(:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
-
- character :: trans_
- integer(psb_ipk_) :: i, j, m, n, hksz, nhacks
- logical :: tra
- integer(psb_ipk_) :: err_act
- character(len=20) :: name = 'd_oacc_hll_csmv'
- logical, parameter :: debug = .false.
-
- call psb_erractionsave(err_act)
- info = psb_success_
-
- if (present(trans)) then
- trans_ = trans
- else
- trans_ = 'N'
- end if
-
- if (.not.a%is_asb()) then
- info = psb_err_invalid_mat_state_
- call psb_errpush(info, name)
- goto 9999
- endif
-
- tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
-
- if (tra) then
- m = a%get_ncols()
- n = a%get_nrows()
- else
- n = a%get_ncols()
- m = a%get_nrows()
- end if
-
- if (size(x,1) < n) then
- info = 36
- call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
- goto 9999
- end if
-
- if (size(y,1) < m) then
- info = 36
- call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
- goto 9999
- end if
-
- if (tra) then
- call a%psb_d_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans)
- else
- hksz = a%hksz
- nhacks = (a%get_nrows() + hksz - 1) / hksz
-
- !$acc parallel loop present(a, x, y)
- do i = 1, m
- y(i) = beta * y(i)
- end do
- ! This loop nest cannot be run with collapse, since
- ! the inner loop extent varies.
- !$acc parallel loop present(a, x, y)
- do i = 1, nhacks
- do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
- y(a%irn(j)) = y(a%irn(j)) + alpha * a%val(j) * x(a%ja(j))
- end do
- end do
- endif
-
- call psb_erractionrestore(err_act)
- return
-
-9999 call psb_error_handler(err_act)
- return
-
- end subroutine psb_d_oacc_hll_csmv
-end submodule psb_d_oacc_hll_csmv_impl
diff --git a/openacc/impl/psb_s_oacc_csr_csmm.F90 b/openacc/impl/psb_s_oacc_csr_csmm.F90
deleted file mode 100644
index bb8283bf..00000000
--- a/openacc/impl/psb_s_oacc_csr_csmm.F90
+++ /dev/null
@@ -1,86 +0,0 @@
-submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_csmm_impl
- use psb_base_mod
-contains
- module subroutine psb_s_oacc_csr_csmm(alpha, a, x, beta, y, info, trans)
- implicit none
- class(psb_s_oacc_csr_sparse_mat), intent(in) :: a
- real(psb_spk_), intent(in) :: alpha, beta
- real(psb_spk_), intent(in) :: x(:,:)
- real(psb_spk_), intent(inout) :: y(:,:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
-
- character :: trans_
- integer(psb_ipk_) :: i, j, m, n,k, nxy
- logical :: tra
- integer(psb_ipk_) :: err_act
- character(len=20) :: name = 's_oacc_csmm'
- logical, parameter :: debug = .false.
-
- info = psb_success_
- call psb_erractionsave(err_act)
-
- if (present(trans)) then
- trans_ = trans
- else
- trans_ = 'N'
- end if
-
- if (.not.a%is_asb()) then
- info = psb_err_invalid_mat_state_
- call psb_errpush(info, name)
- goto 9999
- endif
- tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
-
- if (tra) then
- m = a%get_ncols()
- n = a%get_nrows()
- else
- n = a%get_ncols()
- m = a%get_nrows()
- end if
-
- if (size(x,1) < n) then
- info = 36
- call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
- goto 9999
- end if
-
- if (size(y,1) < m) then
- info = 36
- call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
- goto 9999
- end if
-
- if (tra) then
- call a%psb_s_csr_sparse_mat%spmm(alpha, x, beta, y, info, trans)
- else
- nxy = min(size(x,2), size(y,2))
-
- !$acc parallel loop collapse(2) present(a, x, y)
- do j = 1, nxy
- do i = 1, m
- y(i,j) = beta * y(i,j)
- end do
- end do
-
- !$acc parallel loop collapse(2) present(a, x, y)
- do j = 1, nxy
- do i = 1, n
- do k = a%irp(i), a%irp(i+1) - 1
- y(a%ja(k), j) = y(a%ja(k), j) + alpha * a%val(k) * x(i, j)
- end do
- end do
- end do
- endif
-
- call psb_erractionrestore(err_act)
- return
-
-9999 call psb_error_handler(err_act)
- return
-
- end subroutine psb_s_oacc_csr_csmm
-end submodule psb_s_oacc_csr_csmm_impl
-
diff --git a/openacc/impl/psb_s_oacc_csr_csmv.F90 b/openacc/impl/psb_s_oacc_csr_csmv.F90
deleted file mode 100644
index c224dc0e..00000000
--- a/openacc/impl/psb_s_oacc_csr_csmv.F90
+++ /dev/null
@@ -1,81 +0,0 @@
-submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_csmv_impl
- use psb_base_mod
-contains
- module subroutine psb_s_oacc_csr_csmv(alpha, a, x, beta, y, info, trans)
- implicit none
- class(psb_s_oacc_csr_sparse_mat), intent(in) :: a
- real(psb_spk_), intent(in) :: alpha, beta
- real(psb_spk_), intent(in) :: x(:)
- real(psb_spk_), intent(inout) :: y(:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
-
- character :: trans_
- integer(psb_ipk_) :: i, j, m, n
- logical :: tra
- integer(psb_ipk_) :: err_act
- character(len=20) :: name = 's_oacc_csmv'
- logical, parameter :: debug = .false.
-
- call psb_erractionsave(err_act)
- info = psb_success_
-
- if (present(trans)) then
- trans_ = trans
- else
- trans_ = 'N'
- end if
-
- if (.not.a%is_asb()) then
- info = psb_err_invalid_mat_state_
- call psb_errpush(info, name)
- goto 9999
- endif
-
- tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
-
- if (tra) then
- m = a%get_ncols()
- n = a%get_nrows()
- else
- n = a%get_ncols()
- m = a%get_nrows()
- end if
-
- if (size(x,1) < n) then
- info = 36
- call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
- goto 9999
- end if
-
- if (size(y,1) < m) then
- info = 36
- call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
- goto 9999
- end if
-
- if (tra) then
- call a%psb_s_csr_sparse_mat%spmm(alpha, x, beta, y, info, trans)
- else
- !$acc parallel loop present(a, x, y)
- do i = 1, m
- y(i) = beta * y(i)
- end do
-
- !$acc parallel loop present(a, x, y)
- do i = 1, n
- do j = a%irp(i), a%irp(i+1) - 1
- y(a%ja(j)) = y(a%ja(j)) + alpha * a%val(j) * x(i)
- end do
- end do
- endif
-
- call psb_erractionrestore(err_act)
- return
-
-9999 call psb_error_handler(err_act)
- return
-
- end subroutine psb_s_oacc_csr_csmv
-end submodule psb_s_oacc_csr_csmv_impl
-
diff --git a/openacc/impl/psb_s_oacc_ell_csmm.F90 b/openacc/impl/psb_s_oacc_ell_csmm.F90
deleted file mode 100644
index 63219384..00000000
--- a/openacc/impl/psb_s_oacc_ell_csmm.F90
+++ /dev/null
@@ -1,86 +0,0 @@
-submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_csmm_impl
- use psb_base_mod
-contains
- module subroutine psb_s_oacc_ell_csmm(alpha, a, x, beta, y, info, trans)
- implicit none
- class(psb_s_oacc_ell_sparse_mat), intent(in) :: a
- real(psb_spk_), intent(in) :: alpha, beta
- real(psb_spk_), intent(in) :: x(:,:)
- real(psb_spk_), intent(inout) :: y(:,:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
-
- character :: trans_
- integer(psb_ipk_) :: i, j, m, n, k, nxy, nzt
- logical :: tra
- integer(psb_ipk_) :: err_act
- character(len=20) :: name = 's_oacc_ell_csmm'
- logical, parameter :: debug = .false.
-
- info = psb_success_
- call psb_erractionsave(err_act)
-
- if (present(trans)) then
- trans_ = trans
- else
- trans_ = 'N'
- end if
-
- if (.not.a%is_asb()) then
- info = psb_err_invalid_mat_state_
- call psb_errpush(info, name)
- goto 9999
- endif
- tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
-
- if (tra) then
- m = a%get_ncols()
- n = a%get_nrows()
- else
- n = a%get_ncols()
- m = a%get_nrows()
- end if
-
- if (size(x,1) < n) then
- info = 36
- call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
- goto 9999
- end if
-
- if (size(y,1) < m) then
- info = 36
- call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
- goto 9999
- end if
-
- if (tra) then
- call a%psb_s_ell_sparse_mat%spmm(alpha, x, beta, y, info, trans)
- else
- nxy = min(size(x,2), size(y,2))
- nzt = a%nzt
-
- !$acc parallel loop collapse(2) present(a, x, y)
- do j = 1, nxy
- do i = 1, m
- y(i,j) = beta * y(i,j)
- end do
- end do
-
- !$acc parallel loop collapse(2) present(a, x, y)
- do j = 1, nxy
- do i = 1, n
- do k = 1, nzt
- y(i, j) = y(i, j) + alpha * a%val(i, k) * x(a%ja(i, k), j)
- end do
- end do
- end do
- endif
-
- call psb_erractionrestore(err_act)
- return
-
-9999 call psb_error_handler(err_act)
- return
-
- end subroutine psb_s_oacc_ell_csmm
-end submodule psb_s_oacc_ell_csmm_impl
diff --git a/openacc/impl/psb_s_oacc_ell_csmv.F90 b/openacc/impl/psb_s_oacc_ell_csmv.F90
deleted file mode 100644
index d4aaa9d4..00000000
--- a/openacc/impl/psb_s_oacc_ell_csmv.F90
+++ /dev/null
@@ -1,82 +0,0 @@
-submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_csmv_impl
- use psb_base_mod
-contains
- module subroutine psb_s_oacc_ell_csmv(alpha, a, x, beta, y, info, trans)
- implicit none
- class(psb_s_oacc_ell_sparse_mat), intent(in) :: a
- real(psb_spk_), intent(in) :: alpha, beta
- real(psb_spk_), intent(in) :: x(:)
- real(psb_spk_), intent(inout) :: y(:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
-
- character :: trans_
- integer(psb_ipk_) :: i, j, m, n, nzt
- logical :: tra
- integer(psb_ipk_) :: err_act
- character(len=20) :: name = 's_oacc_ell_csmv'
- logical, parameter :: debug = .false.
-
- call psb_erractionsave(err_act)
- info = psb_success_
-
- if (present(trans)) then
- trans_ = trans
- else
- trans_ = 'N'
- end if
-
- if (.not.a%is_asb()) then
- info = psb_err_invalid_mat_state_
- call psb_errpush(info, name)
- goto 9999
- endif
-
- tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
-
- if (tra) then
- m = a%get_ncols()
- n = a%get_nrows()
- else
- n = a%get_ncols()
- m = a%get_nrows()
- end if
-
- if (size(x,1) < n) then
- info = 36
- call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
- goto 9999
- end if
-
- if (size(y,1) < m) then
- info = 36
- call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
- goto 9999
- end if
-
- if (tra) then
- call a%psb_s_ell_sparse_mat%spmm(alpha, x, beta, y, info, trans)
- else
- nzt = a%nzt
-
- !$acc parallel loop present(a, x, y)
- do i = 1, m
- y(i) = beta * y(i)
- end do
-
- !$acc parallel loop present(a, x, y)
- do i = 1, m
- do j = 1, nzt
- y(i) = y(i) + alpha * a%val(i, j) * x(a%ja(i, j))
- end do
- end do
- endif
-
- call psb_erractionrestore(err_act)
- return
-
-9999 call psb_error_handler(err_act)
- return
-
- end subroutine psb_s_oacc_ell_csmv
-end submodule psb_s_oacc_ell_csmv_impl
diff --git a/openacc/impl/psb_s_oacc_hll_csmm.F90 b/openacc/impl/psb_s_oacc_hll_csmm.F90
deleted file mode 100644
index 803071eb..00000000
--- a/openacc/impl/psb_s_oacc_hll_csmm.F90
+++ /dev/null
@@ -1,86 +0,0 @@
-submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_csmm_impl
- use psb_base_mod
-contains
- module subroutine psb_s_oacc_hll_csmm(alpha, a, x, beta, y, info, trans)
- implicit none
- class(psb_s_oacc_hll_sparse_mat), intent(in) :: a
- real(psb_spk_), intent(in) :: alpha, beta
- real(psb_spk_), intent(in) :: x(:,:)
- real(psb_spk_), intent(inout) :: y(:,:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
-
- character :: trans_
- integer(psb_ipk_) :: i, j, m, n, k, nxy, nhacks
- logical :: tra
- integer(psb_ipk_) :: err_act
- character(len=20) :: name = 's_oacc_hll_csmm'
- logical, parameter :: debug = .false.
-
- info = psb_success_
- call psb_erractionsave(err_act)
-
- if (present(trans)) then
- trans_ = trans
- else
- trans_ = 'N'
- end if
-
- if (.not.a%is_asb()) then
- info = psb_err_invalid_mat_state_
- call psb_errpush(info, name)
- goto 9999
- endif
- tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
-
- if (tra) then
- m = a%get_ncols()
- n = a%get_nrows()
- else
- n = a%get_ncols()
- m = a%get_nrows()
- end if
-
- if (size(x,1) < n) then
- info = 36
- call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
- goto 9999
- end if
-
- if (size(y,1) < m) then
- info = 36
- call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
- goto 9999
- end if
-
- if (tra) then
- call a%psb_s_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans)
- else
- nxy = min(size(x,2), size(y,2))
- nhacks = (a%get_nrows() + a%hksz - 1) / a%hksz
-
- !$acc parallel loop collapse(2) present(a, x, y)
- do j = 1, nxy
- do i = 1, m
- y(i,j) = beta * y(i,j)
- end do
- end do
-
- !$acc parallel loop present(a, x, y)
- do j = 1, nxy
- do k = 1, nhacks
- do i = a%hkoffs(k), a%hkoffs(k + 1) - 1
- y(a%irn(i), j) = y(a%irn(i), j) + alpha * a%val(i) * x(a%ja(i), j)
- end do
- end do
- end do
- endif
-
- call psb_erractionrestore(err_act)
- return
-
-9999 call psb_error_handler(err_act)
- return
-
- end subroutine psb_s_oacc_hll_csmm
-end submodule psb_s_oacc_hll_csmm_impl
diff --git a/openacc/impl/psb_s_oacc_hll_csmv.F90 b/openacc/impl/psb_s_oacc_hll_csmv.F90
deleted file mode 100644
index b3c0cae8..00000000
--- a/openacc/impl/psb_s_oacc_hll_csmv.F90
+++ /dev/null
@@ -1,84 +0,0 @@
-submodule (psb_s_oacc_hll_mat_mod) psb_s_oacc_hll_csmv_impl
- use psb_base_mod
-contains
- module subroutine psb_s_oacc_hll_csmv(alpha, a, x, beta, y, info, trans)
- implicit none
- class(psb_s_oacc_hll_sparse_mat), intent(in) :: a
- real(psb_spk_), intent(in) :: alpha, beta
- real(psb_spk_), intent(in) :: x(:)
- real(psb_spk_), intent(inout) :: y(:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
-
- character :: trans_
- integer(psb_ipk_) :: i, j, m, n, hksz, nhacks
- logical :: tra
- integer(psb_ipk_) :: err_act
- character(len=20) :: name = 's_oacc_hll_csmv'
- logical, parameter :: debug = .false.
-
- call psb_erractionsave(err_act)
- info = psb_success_
-
- if (present(trans)) then
- trans_ = trans
- else
- trans_ = 'N'
- end if
-
- if (.not.a%is_asb()) then
- info = psb_err_invalid_mat_state_
- call psb_errpush(info, name)
- goto 9999
- endif
-
- tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
-
- if (tra) then
- m = a%get_ncols()
- n = a%get_nrows()
- else
- n = a%get_ncols()
- m = a%get_nrows()
- end if
-
- if (size(x,1) < n) then
- info = 36
- call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
- goto 9999
- end if
-
- if (size(y,1) < m) then
- info = 36
- call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
- goto 9999
- end if
-
- if (tra) then
- call a%psb_s_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans)
- else
- hksz = a%hksz
- nhacks = (a%get_nrows() + hksz - 1) / hksz
-
- !$acc parallel loop present(a, x, y)
- do i = 1, m
- y(i) = beta * y(i)
- end do
- ! This loop nest cannot be run with collapse, since
- ! the inner loop extent varies.
- !$acc parallel loop present(a, x, y)
- do i = 1, nhacks
- do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
- y(a%irn(j)) = y(a%irn(j)) + alpha * a%val(j) * x(a%ja(j))
- end do
- end do
- endif
-
- call psb_erractionrestore(err_act)
- return
-
-9999 call psb_error_handler(err_act)
- return
-
- end subroutine psb_s_oacc_hll_csmv
-end submodule psb_s_oacc_hll_csmv_impl
diff --git a/openacc/impl/psb_z_oacc_csr_csmm.F90 b/openacc/impl/psb_z_oacc_csr_csmm.F90
deleted file mode 100644
index 97a38deb..00000000
--- a/openacc/impl/psb_z_oacc_csr_csmm.F90
+++ /dev/null
@@ -1,86 +0,0 @@
-submodule (psb_z_oacc_csr_mat_mod) psb_z_oacc_csr_csmm_impl
- use psb_base_mod
-contains
- module subroutine psb_z_oacc_csr_csmm(alpha, a, x, beta, y, info, trans)
- implicit none
- class(psb_z_oacc_csr_sparse_mat), intent(in) :: a
- complex(psb_dpk_), intent(in) :: alpha, beta
- complex(psb_dpk_), intent(in) :: x(:,:)
- complex(psb_dpk_), intent(inout) :: y(:,:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
-
- character :: trans_
- integer(psb_ipk_) :: i, j, m, n,k, nxy
- logical :: tra
- integer(psb_ipk_) :: err_act
- character(len=20) :: name = 'z_oacc_csmm'
- logical, parameter :: debug = .false.
-
- info = psb_success_
- call psb_erractionsave(err_act)
-
- if (present(trans)) then
- trans_ = trans
- else
- trans_ = 'N'
- end if
-
- if (.not.a%is_asb()) then
- info = psb_err_invalid_mat_state_
- call psb_errpush(info, name)
- goto 9999
- endif
- tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
-
- if (tra) then
- m = a%get_ncols()
- n = a%get_nrows()
- else
- n = a%get_ncols()
- m = a%get_nrows()
- end if
-
- if (size(x,1) < n) then
- info = 36
- call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
- goto 9999
- end if
-
- if (size(y,1) < m) then
- info = 36
- call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
- goto 9999
- end if
-
- if (tra) then
- call a%psb_z_csr_sparse_mat%spmm(alpha, x, beta, y, info, trans)
- else
- nxy = min(size(x,2), size(y,2))
-
- !$acc parallel loop collapse(2) present(a, x, y)
- do j = 1, nxy
- do i = 1, m
- y(i,j) = beta * y(i,j)
- end do
- end do
-
- !$acc parallel loop collapse(2) present(a, x, y)
- do j = 1, nxy
- do i = 1, n
- do k = a%irp(i), a%irp(i+1) - 1
- y(a%ja(k), j) = y(a%ja(k), j) + alpha * a%val(k) * x(i, j)
- end do
- end do
- end do
- endif
-
- call psb_erractionrestore(err_act)
- return
-
-9999 call psb_error_handler(err_act)
- return
-
- end subroutine psb_z_oacc_csr_csmm
-end submodule psb_z_oacc_csr_csmm_impl
-
diff --git a/openacc/impl/psb_z_oacc_csr_csmv.F90 b/openacc/impl/psb_z_oacc_csr_csmv.F90
deleted file mode 100644
index 8def3c76..00000000
--- a/openacc/impl/psb_z_oacc_csr_csmv.F90
+++ /dev/null
@@ -1,81 +0,0 @@
-submodule (psb_z_oacc_csr_mat_mod) psb_z_oacc_csr_csmv_impl
- use psb_base_mod
-contains
- module subroutine psb_z_oacc_csr_csmv(alpha, a, x, beta, y, info, trans)
- implicit none
- class(psb_z_oacc_csr_sparse_mat), intent(in) :: a
- complex(psb_dpk_), intent(in) :: alpha, beta
- complex(psb_dpk_), intent(in) :: x(:)
- complex(psb_dpk_), intent(inout) :: y(:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
-
- character :: trans_
- integer(psb_ipk_) :: i, j, m, n
- logical :: tra
- integer(psb_ipk_) :: err_act
- character(len=20) :: name = 'z_oacc_csmv'
- logical, parameter :: debug = .false.
-
- call psb_erractionsave(err_act)
- info = psb_success_
-
- if (present(trans)) then
- trans_ = trans
- else
- trans_ = 'N'
- end if
-
- if (.not.a%is_asb()) then
- info = psb_err_invalid_mat_state_
- call psb_errpush(info, name)
- goto 9999
- endif
-
- tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
-
- if (tra) then
- m = a%get_ncols()
- n = a%get_nrows()
- else
- n = a%get_ncols()
- m = a%get_nrows()
- end if
-
- if (size(x,1) < n) then
- info = 36
- call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
- goto 9999
- end if
-
- if (size(y,1) < m) then
- info = 36
- call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
- goto 9999
- end if
-
- if (tra) then
- call a%psb_z_csr_sparse_mat%spmm(alpha, x, beta, y, info, trans)
- else
- !$acc parallel loop present(a, x, y)
- do i = 1, m
- y(i) = beta * y(i)
- end do
-
- !$acc parallel loop present(a, x, y)
- do i = 1, n
- do j = a%irp(i), a%irp(i+1) - 1
- y(a%ja(j)) = y(a%ja(j)) + alpha * a%val(j) * x(i)
- end do
- end do
- endif
-
- call psb_erractionrestore(err_act)
- return
-
-9999 call psb_error_handler(err_act)
- return
-
- end subroutine psb_z_oacc_csr_csmv
-end submodule psb_z_oacc_csr_csmv_impl
-
diff --git a/openacc/impl/psb_z_oacc_ell_csmm.F90 b/openacc/impl/psb_z_oacc_ell_csmm.F90
deleted file mode 100644
index 406ca8c5..00000000
--- a/openacc/impl/psb_z_oacc_ell_csmm.F90
+++ /dev/null
@@ -1,86 +0,0 @@
-submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_csmm_impl
- use psb_base_mod
-contains
- module subroutine psb_z_oacc_ell_csmm(alpha, a, x, beta, y, info, trans)
- implicit none
- class(psb_z_oacc_ell_sparse_mat), intent(in) :: a
- complex(psb_dpk_), intent(in) :: alpha, beta
- complex(psb_dpk_), intent(in) :: x(:,:)
- complex(psb_dpk_), intent(inout) :: y(:,:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
-
- character :: trans_
- integer(psb_ipk_) :: i, j, m, n, k, nxy, nzt
- logical :: tra
- integer(psb_ipk_) :: err_act
- character(len=20) :: name = 'z_oacc_ell_csmm'
- logical, parameter :: debug = .false.
-
- info = psb_success_
- call psb_erractionsave(err_act)
-
- if (present(trans)) then
- trans_ = trans
- else
- trans_ = 'N'
- end if
-
- if (.not.a%is_asb()) then
- info = psb_err_invalid_mat_state_
- call psb_errpush(info, name)
- goto 9999
- endif
- tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
-
- if (tra) then
- m = a%get_ncols()
- n = a%get_nrows()
- else
- n = a%get_ncols()
- m = a%get_nrows()
- end if
-
- if (size(x,1) < n) then
- info = 36
- call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
- goto 9999
- end if
-
- if (size(y,1) < m) then
- info = 36
- call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
- goto 9999
- end if
-
- if (tra) then
- call a%psb_z_ell_sparse_mat%spmm(alpha, x, beta, y, info, trans)
- else
- nxy = min(size(x,2), size(y,2))
- nzt = a%nzt
-
- !$acc parallel loop collapse(2) present(a, x, y)
- do j = 1, nxy
- do i = 1, m
- y(i,j) = beta * y(i,j)
- end do
- end do
-
- !$acc parallel loop collapse(2) present(a, x, y)
- do j = 1, nxy
- do i = 1, n
- do k = 1, nzt
- y(i, j) = y(i, j) + alpha * a%val(i, k) * x(a%ja(i, k), j)
- end do
- end do
- end do
- endif
-
- call psb_erractionrestore(err_act)
- return
-
-9999 call psb_error_handler(err_act)
- return
-
- end subroutine psb_z_oacc_ell_csmm
-end submodule psb_z_oacc_ell_csmm_impl
diff --git a/openacc/impl/psb_z_oacc_ell_csmv.F90 b/openacc/impl/psb_z_oacc_ell_csmv.F90
deleted file mode 100644
index 502dd4f1..00000000
--- a/openacc/impl/psb_z_oacc_ell_csmv.F90
+++ /dev/null
@@ -1,82 +0,0 @@
-submodule (psb_z_oacc_ell_mat_mod) psb_z_oacc_ell_csmv_impl
- use psb_base_mod
-contains
- module subroutine psb_z_oacc_ell_csmv(alpha, a, x, beta, y, info, trans)
- implicit none
- class(psb_z_oacc_ell_sparse_mat), intent(in) :: a
- complex(psb_dpk_), intent(in) :: alpha, beta
- complex(psb_dpk_), intent(in) :: x(:)
- complex(psb_dpk_), intent(inout) :: y(:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
-
- character :: trans_
- integer(psb_ipk_) :: i, j, m, n, nzt
- logical :: tra
- integer(psb_ipk_) :: err_act
- character(len=20) :: name = 'z_oacc_ell_csmv'
- logical, parameter :: debug = .false.
-
- call psb_erractionsave(err_act)
- info = psb_success_
-
- if (present(trans)) then
- trans_ = trans
- else
- trans_ = 'N'
- end if
-
- if (.not.a%is_asb()) then
- info = psb_err_invalid_mat_state_
- call psb_errpush(info, name)
- goto 9999
- endif
-
- tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
-
- if (tra) then
- m = a%get_ncols()
- n = a%get_nrows()
- else
- n = a%get_ncols()
- m = a%get_nrows()
- end if
-
- if (size(x,1) < n) then
- info = 36
- call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
- goto 9999
- end if
-
- if (size(y,1) < m) then
- info = 36
- call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
- goto 9999
- end if
-
- if (tra) then
- call a%psb_z_ell_sparse_mat%spmm(alpha, x, beta, y, info, trans)
- else
- nzt = a%nzt
-
- !$acc parallel loop present(a, x, y)
- do i = 1, m
- y(i) = beta * y(i)
- end do
-
- !$acc parallel loop present(a, x, y)
- do i = 1, m
- do j = 1, nzt
- y(i) = y(i) + alpha * a%val(i, j) * x(a%ja(i, j))
- end do
- end do
- endif
-
- call psb_erractionrestore(err_act)
- return
-
-9999 call psb_error_handler(err_act)
- return
-
- end subroutine psb_z_oacc_ell_csmv
-end submodule psb_z_oacc_ell_csmv_impl
diff --git a/openacc/impl/psb_z_oacc_hll_csmm.F90 b/openacc/impl/psb_z_oacc_hll_csmm.F90
deleted file mode 100644
index 3cfe5b32..00000000
--- a/openacc/impl/psb_z_oacc_hll_csmm.F90
+++ /dev/null
@@ -1,86 +0,0 @@
-submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_csmm_impl
- use psb_base_mod
-contains
- module subroutine psb_z_oacc_hll_csmm(alpha, a, x, beta, y, info, trans)
- implicit none
- class(psb_z_oacc_hll_sparse_mat), intent(in) :: a
- complex(psb_dpk_), intent(in) :: alpha, beta
- complex(psb_dpk_), intent(in) :: x(:,:)
- complex(psb_dpk_), intent(inout) :: y(:,:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
-
- character :: trans_
- integer(psb_ipk_) :: i, j, m, n, k, nxy, nhacks
- logical :: tra
- integer(psb_ipk_) :: err_act
- character(len=20) :: name = 'z_oacc_hll_csmm'
- logical, parameter :: debug = .false.
-
- info = psb_success_
- call psb_erractionsave(err_act)
-
- if (present(trans)) then
- trans_ = trans
- else
- trans_ = 'N'
- end if
-
- if (.not.a%is_asb()) then
- info = psb_err_invalid_mat_state_
- call psb_errpush(info, name)
- goto 9999
- endif
- tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
-
- if (tra) then
- m = a%get_ncols()
- n = a%get_nrows()
- else
- n = a%get_ncols()
- m = a%get_nrows()
- end if
-
- if (size(x,1) < n) then
- info = 36
- call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
- goto 9999
- end if
-
- if (size(y,1) < m) then
- info = 36
- call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
- goto 9999
- end if
-
- if (tra) then
- call a%psb_z_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans)
- else
- nxy = min(size(x,2), size(y,2))
- nhacks = (a%get_nrows() + a%hksz - 1) / a%hksz
-
- !$acc parallel loop collapse(2) present(a, x, y)
- do j = 1, nxy
- do i = 1, m
- y(i,j) = beta * y(i,j)
- end do
- end do
-
- !$acc parallel loop present(a, x, y)
- do j = 1, nxy
- do k = 1, nhacks
- do i = a%hkoffs(k), a%hkoffs(k + 1) - 1
- y(a%irn(i), j) = y(a%irn(i), j) + alpha * a%val(i) * x(a%ja(i), j)
- end do
- end do
- end do
- endif
-
- call psb_erractionrestore(err_act)
- return
-
-9999 call psb_error_handler(err_act)
- return
-
- end subroutine psb_z_oacc_hll_csmm
-end submodule psb_z_oacc_hll_csmm_impl
diff --git a/openacc/impl/psb_z_oacc_hll_csmv.F90 b/openacc/impl/psb_z_oacc_hll_csmv.F90
deleted file mode 100644
index 923bc061..00000000
--- a/openacc/impl/psb_z_oacc_hll_csmv.F90
+++ /dev/null
@@ -1,84 +0,0 @@
-submodule (psb_z_oacc_hll_mat_mod) psb_z_oacc_hll_csmv_impl
- use psb_base_mod
-contains
- module subroutine psb_z_oacc_hll_csmv(alpha, a, x, beta, y, info, trans)
- implicit none
- class(psb_z_oacc_hll_sparse_mat), intent(in) :: a
- complex(psb_dpk_), intent(in) :: alpha, beta
- complex(psb_dpk_), intent(in) :: x(:)
- complex(psb_dpk_), intent(inout) :: y(:)
- integer(psb_ipk_), intent(out) :: info
- character, optional, intent(in) :: trans
-
- character :: trans_
- integer(psb_ipk_) :: i, j, m, n, hksz, nhacks
- logical :: tra
- integer(psb_ipk_) :: err_act
- character(len=20) :: name = 'z_oacc_hll_csmv'
- logical, parameter :: debug = .false.
-
- call psb_erractionsave(err_act)
- info = psb_success_
-
- if (present(trans)) then
- trans_ = trans
- else
- trans_ = 'N'
- end if
-
- if (.not.a%is_asb()) then
- info = psb_err_invalid_mat_state_
- call psb_errpush(info, name)
- goto 9999
- endif
-
- tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
-
- if (tra) then
- m = a%get_ncols()
- n = a%get_nrows()
- else
- n = a%get_ncols()
- m = a%get_nrows()
- end if
-
- if (size(x,1) < n) then
- info = 36
- call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/))
- goto 9999
- end if
-
- if (size(y,1) < m) then
- info = 36
- call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/))
- goto 9999
- end if
-
- if (tra) then
- call a%psb_z_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans)
- else
- hksz = a%hksz
- nhacks = (a%get_nrows() + hksz - 1) / hksz
-
- !$acc parallel loop present(a, x, y)
- do i = 1, m
- y(i) = beta * y(i)
- end do
- ! This loop nest cannot be run with collapse, since
- ! the inner loop extent varies.
- !$acc parallel loop present(a, x, y)
- do i = 1, nhacks
- do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
- y(a%irn(j)) = y(a%irn(j)) + alpha * a%val(j) * x(a%ja(j))
- end do
- end do
- endif
-
- call psb_erractionrestore(err_act)
- return
-
-9999 call psb_error_handler(err_act)
- return
-
- end subroutine psb_z_oacc_hll_csmv
-end submodule psb_z_oacc_hll_csmv_impl
From bcbe0c89c758086725433b91bdaed38a7868db32 Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Tue, 10 Sep 2024 17:37:35 +0200
Subject: [PATCH 38/86] Backporting fixes from version 4
---
openacc/Makefile | 7 +-
openacc/impl/psb_c_oacc_csr_vect_mv.F90 | 2 +-
openacc/impl/psb_c_oacc_ell_vect_mv.F90 | 2 +-
openacc/impl/psb_c_oacc_hll_vect_mv.F90 | 2 +-
openacc/impl/psb_d_oacc_csr_vect_mv.F90 | 2 +-
openacc/impl/psb_d_oacc_ell_vect_mv.F90 | 2 +-
openacc/impl/psb_d_oacc_hll_vect_mv.F90 | 2 +-
openacc/impl/psb_s_oacc_csr_vect_mv.F90 | 2 +-
openacc/impl/psb_s_oacc_ell_vect_mv.F90 | 2 +-
openacc/impl/psb_s_oacc_hll_vect_mv.F90 | 2 +-
openacc/impl/psb_z_oacc_csr_vect_mv.F90 | 2 +-
openacc/impl/psb_z_oacc_ell_vect_mv.F90 | 2 +-
openacc/impl/psb_z_oacc_hll_vect_mv.F90 | 2 +-
openacc/psb_c_oacc_vect_mod.F90 | 231 ++++++++++++++++++------
openacc/psb_d_oacc_vect_mod.F90 | 231 ++++++++++++++++++------
openacc/psb_i_oacc_vect_mod.F90 | 213 ++++++++++++++++------
openacc/psb_l_oacc_vect_mod.F90 | 213 ++++++++++++++++------
openacc/psb_oacc_env_mod.F90 | 33 ++--
openacc/psb_s_oacc_vect_mod.F90 | 231 ++++++++++++++++++------
openacc/psb_z_oacc_vect_mod.F90 | 231 ++++++++++++++++++------
20 files changed, 1048 insertions(+), 366 deletions(-)
diff --git a/openacc/Makefile b/openacc/Makefile
index cdcc9f71..f60a810d 100644
--- a/openacc/Makefile
+++ b/openacc/Makefile
@@ -58,9 +58,10 @@ psb_oacc_mod.o : psb_i_oacc_vect_mod.o psb_l_oacc_vect_mod.o \
psb_z_oacc_ell_mat_mod.o psb_z_oacc_hll_mat_mod.o \
psb_oacc_env_mod.o
-psb_s_oacc_vect_mod.o psb_d_oacc_vect_mod.o\
- psb_c_oacc_vect_mod.o psb_z_oacc_vect_mod.o: psb_i_oacc_vect_mod.o psb_l_oacc_vect_mod.o
-psb_l_oacc_vect_mod.o: psb_i_oacc_vect_mod.o
+psb_s_oacc_vect_mod.o psb_d_oacc_vect_mod.o \
+ psb_c_oacc_vect_mod.o psb_z_oacc_vect_mod.o: psb_i_oacc_vect_mod.o psb_l_oacc_vect_mod.o psb_oacc_env_mod.o
+psb_l_oacc_vect_mod.o: psb_i_oacc_vect_mod.o psb_oacc_env_mod.o
+psb_i_oacc_vect_mod.o: psb_oacc_env_mod.o
psb_s_oacc_csr_mat_mod.o psb_s_oacc_ell_mat_mod.o psb_s_oacc_hll_mat_mod.o: psb_s_oacc_vect_mod.o
diff --git a/openacc/impl/psb_c_oacc_csr_vect_mv.F90 b/openacc/impl/psb_c_oacc_csr_vect_mv.F90
index 3c6f6494..c1030094 100644
--- a/openacc/impl/psb_c_oacc_csr_vect_mv.F90
+++ b/openacc/impl/psb_c_oacc_csr_vect_mv.F90
@@ -18,7 +18,7 @@ contains
m = a%get_nrows()
n = a%get_ncols()
- if ((n /= size(x%v)) .or. (m /= size(y%v))) then
+ if ((n > size(x%v)) .or. (m > size(y%v))) then
write(0,*) 'ocsrmv Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_
return
diff --git a/openacc/impl/psb_c_oacc_ell_vect_mv.F90 b/openacc/impl/psb_c_oacc_ell_vect_mv.F90
index 8113297b..7a39c031 100644
--- a/openacc/impl/psb_c_oacc_ell_vect_mv.F90
+++ b/openacc/impl/psb_c_oacc_ell_vect_mv.F90
@@ -19,7 +19,7 @@ contains
n = a%get_ncols()
nzt = a%nzt
nc = size(a%ja,2)
- if ((n /= size(x%v)) .or. (m /= size(y%v))) then
+ if ((n > size(x%v)) .or. (m > size(y%v))) then
write(0,*) 'oellmv Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_
return
diff --git a/openacc/impl/psb_c_oacc_hll_vect_mv.F90 b/openacc/impl/psb_c_oacc_hll_vect_mv.F90
index 551b1a29..494ed149 100644
--- a/openacc/impl/psb_c_oacc_hll_vect_mv.F90
+++ b/openacc/impl/psb_c_oacc_hll_vect_mv.F90
@@ -20,7 +20,7 @@ contains
nhacks = size(a%hkoffs) - 1
hksz = a%hksz
- if ((n /= size(x%v)) .or. (m /= size(y%v))) then
+ if ((n > size(x%v)) .or. (m > size(y%v))) then
write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_
return
diff --git a/openacc/impl/psb_d_oacc_csr_vect_mv.F90 b/openacc/impl/psb_d_oacc_csr_vect_mv.F90
index 596f2b17..a2efdc3e 100644
--- a/openacc/impl/psb_d_oacc_csr_vect_mv.F90
+++ b/openacc/impl/psb_d_oacc_csr_vect_mv.F90
@@ -18,7 +18,7 @@ contains
m = a%get_nrows()
n = a%get_ncols()
- if ((n /= size(x%v)) .or. (m /= size(y%v))) then
+ if ((n > size(x%v)) .or. (m > size(y%v))) then
write(0,*) 'ocsrmv Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_
return
diff --git a/openacc/impl/psb_d_oacc_ell_vect_mv.F90 b/openacc/impl/psb_d_oacc_ell_vect_mv.F90
index ddd4bfc8..b233669d 100644
--- a/openacc/impl/psb_d_oacc_ell_vect_mv.F90
+++ b/openacc/impl/psb_d_oacc_ell_vect_mv.F90
@@ -19,7 +19,7 @@ contains
n = a%get_ncols()
nzt = a%nzt
nc = size(a%ja,2)
- if ((n /= size(x%v)) .or. (m /= size(y%v))) then
+ if ((n > size(x%v)) .or. (m > size(y%v))) then
write(0,*) 'oellmv Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_
return
diff --git a/openacc/impl/psb_d_oacc_hll_vect_mv.F90 b/openacc/impl/psb_d_oacc_hll_vect_mv.F90
index f971d61a..150ade8e 100644
--- a/openacc/impl/psb_d_oacc_hll_vect_mv.F90
+++ b/openacc/impl/psb_d_oacc_hll_vect_mv.F90
@@ -20,7 +20,7 @@ contains
nhacks = size(a%hkoffs) - 1
hksz = a%hksz
- if ((n /= size(x%v)) .or. (m /= size(y%v))) then
+ if ((n > size(x%v)) .or. (m > size(y%v))) then
write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_
return
diff --git a/openacc/impl/psb_s_oacc_csr_vect_mv.F90 b/openacc/impl/psb_s_oacc_csr_vect_mv.F90
index 2799bd05..5d3cc30c 100644
--- a/openacc/impl/psb_s_oacc_csr_vect_mv.F90
+++ b/openacc/impl/psb_s_oacc_csr_vect_mv.F90
@@ -18,7 +18,7 @@ contains
m = a%get_nrows()
n = a%get_ncols()
- if ((n /= size(x%v)) .or. (m /= size(y%v))) then
+ if ((n > size(x%v)) .or. (m > size(y%v))) then
write(0,*) 'ocsrmv Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_
return
diff --git a/openacc/impl/psb_s_oacc_ell_vect_mv.F90 b/openacc/impl/psb_s_oacc_ell_vect_mv.F90
index 81166643..76b1fe5b 100644
--- a/openacc/impl/psb_s_oacc_ell_vect_mv.F90
+++ b/openacc/impl/psb_s_oacc_ell_vect_mv.F90
@@ -19,7 +19,7 @@ contains
n = a%get_ncols()
nzt = a%nzt
nc = size(a%ja,2)
- if ((n /= size(x%v)) .or. (m /= size(y%v))) then
+ if ((n > size(x%v)) .or. (m > size(y%v))) then
write(0,*) 'oellmv Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_
return
diff --git a/openacc/impl/psb_s_oacc_hll_vect_mv.F90 b/openacc/impl/psb_s_oacc_hll_vect_mv.F90
index e289f07c..e1d42252 100644
--- a/openacc/impl/psb_s_oacc_hll_vect_mv.F90
+++ b/openacc/impl/psb_s_oacc_hll_vect_mv.F90
@@ -20,7 +20,7 @@ contains
nhacks = size(a%hkoffs) - 1
hksz = a%hksz
- if ((n /= size(x%v)) .or. (m /= size(y%v))) then
+ if ((n > size(x%v)) .or. (m > size(y%v))) then
write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_
return
diff --git a/openacc/impl/psb_z_oacc_csr_vect_mv.F90 b/openacc/impl/psb_z_oacc_csr_vect_mv.F90
index 75cc693b..b312b6b7 100644
--- a/openacc/impl/psb_z_oacc_csr_vect_mv.F90
+++ b/openacc/impl/psb_z_oacc_csr_vect_mv.F90
@@ -18,7 +18,7 @@ contains
m = a%get_nrows()
n = a%get_ncols()
- if ((n /= size(x%v)) .or. (m /= size(y%v))) then
+ if ((n > size(x%v)) .or. (m > size(y%v))) then
write(0,*) 'ocsrmv Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_
return
diff --git a/openacc/impl/psb_z_oacc_ell_vect_mv.F90 b/openacc/impl/psb_z_oacc_ell_vect_mv.F90
index 8d442c1d..53283689 100644
--- a/openacc/impl/psb_z_oacc_ell_vect_mv.F90
+++ b/openacc/impl/psb_z_oacc_ell_vect_mv.F90
@@ -19,7 +19,7 @@ contains
n = a%get_ncols()
nzt = a%nzt
nc = size(a%ja,2)
- if ((n /= size(x%v)) .or. (m /= size(y%v))) then
+ if ((n > size(x%v)) .or. (m > size(y%v))) then
write(0,*) 'oellmv Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_
return
diff --git a/openacc/impl/psb_z_oacc_hll_vect_mv.F90 b/openacc/impl/psb_z_oacc_hll_vect_mv.F90
index e373d6ff..350592bc 100644
--- a/openacc/impl/psb_z_oacc_hll_vect_mv.F90
+++ b/openacc/impl/psb_z_oacc_hll_vect_mv.F90
@@ -20,7 +20,7 @@ contains
nhacks = size(a%hkoffs) - 1
hksz = a%hksz
- if ((n /= size(x%v)) .or. (m /= size(y%v))) then
+ if ((n > size(x%v)) .or. (m > size(y%v))) then
write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_
return
diff --git a/openacc/psb_c_oacc_vect_mod.F90 b/openacc/psb_c_oacc_vect_mod.F90
index 95c45646..4e2cca9e 100644
--- a/openacc/psb_c_oacc_vect_mod.F90
+++ b/openacc/psb_c_oacc_vect_mod.F90
@@ -3,6 +3,8 @@ module psb_c_oacc_vect_mod
use openacc
use psb_const_mod
use psb_error_mod
+ use psb_realloc_mod
+ use psb_oacc_env_mod
use psb_c_vect_mod
use psb_i_vect_mod
use psb_i_oacc_vect_mod
@@ -26,6 +28,8 @@ module psb_c_oacc_vect_mod
procedure, pass(x) :: bld_x => c_oacc_bld_x
procedure, pass(x) :: bld_mn => c_oacc_bld_mn
procedure, pass(x) :: free => c_oacc_vect_free
+ procedure, pass(x) :: free_buffer => c_oacc_vect_free_buffer
+ procedure, pass(x) :: maybe_free_buffer => c_oacc_vect_maybe_free_buffer
procedure, pass(x) :: ins_a => c_oacc_ins_a
procedure, pass(x) :: ins_v => c_oacc_ins_v
procedure, pass(x) :: is_host => c_oacc_is_host
@@ -36,11 +40,13 @@ module psb_c_oacc_vect_mod
procedure, pass(x) :: set_sync => c_oacc_set_sync
procedure, pass(x) :: set_scal => c_oacc_set_scal
+ procedure, pass(x) :: new_buffer => c_oacc_new_buffer
procedure, pass(x) :: gthzv_x => c_oacc_gthzv_x
- procedure, pass(x) :: gthzbuf_x => c_oacc_gthzbuf
+ procedure, pass(x) :: gthzbuf => c_oacc_gthzbuf
procedure, pass(y) :: sctb => c_oacc_sctb
procedure, pass(y) :: sctb_x => c_oacc_sctb_x
procedure, pass(y) :: sctb_buf => c_oacc_sctb_buf
+ procedure, nopass :: device_wait => c_oacc_device_wait
procedure, pass(x) :: get_size => c_oacc_get_size
@@ -87,6 +93,11 @@ module psb_c_oacc_vect_mod
contains
+ subroutine c_oacc_device_wait()
+ implicit none
+ call acc_wait_all()
+ end subroutine c_oacc_device_wait
+
subroutine c_oacc_absval1(x)
implicit none
class(psb_c_vect_oacc), intent(inout) :: x
@@ -181,13 +192,17 @@ contains
!$acc parallel loop reduction(max:mx)
do i = 1, n
if (abs(x(i)) > mx) mx = abs(x(i))
- end do
- sum = szero
- !$acc parallel loop reduction(+:sum)
- do i = 1, n
- sum = sum + abs(x(i)/mx)**2
end do
- res = mx*sqrt(sum)
+ if (mx == szero) then
+ res = mx
+ else
+ sum = szero
+ !$acc parallel loop reduction(+:sum)
+ do i = 1, n
+ sum = sum + abs(x(i)/mx)**2
+ end do
+ res = mx*sqrt(sum)
+ end if
end function c_inner_oacc_nrm2
end function c_oacc_nrm2
@@ -398,29 +413,44 @@ contains
class(psb_i_base_vect_type) :: idx
complex(psb_spk_) :: beta
class(psb_c_vect_oacc) :: y
- integer(psb_ipk_) :: info
-
+ integer(psb_ipk_) :: info, k
+ logical :: acc_done
if (.not.allocated(y%combuf)) then
call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
return
end if
+ acc_done = .false.
select type(ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
if (y%is_host()) call y%sync()
+ !$acc update device(y%combuf)
+ call inner_sctb(n,y%combuf(i:i+n-1),beta,y%v,ii%v(i:i+n-1))
+ call y%set_dev()
+ acc_done = .true.
+ end select
- !$acc parallel loop
- do i = 1, n
- y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i)
+ if (.not.acc_done) then
+ if (idx%is_dev()) call idx%sync()
+ if (y%is_dev()) call y%sync()
+ do k = 1, n
+ y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + y%combuf(k)
end do
+ end if
- class default
- !$acc parallel loop
- do i = 1, n
- y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i)
+ contains
+ subroutine inner_sctb(n,x,beta,y,idx)
+ integer(psb_ipk_) :: n, idx(:)
+ complex(psb_spk_) :: beta,x(:), y(:)
+ integer(psb_ipk_) :: k
+ !$acc parallel loop
+ do k = 1, n
+ y(idx(k)) = x(k) + beta *y(idx(k))
end do
- end select
+ !$acc end parallel loop
+ end subroutine inner_sctb
+
end subroutine c_oacc_sctb_buf
subroutine c_oacc_sctb_x(i, n, idx, x, beta, y)
@@ -430,24 +460,41 @@ contains
class(psb_i_base_vect_type) :: idx
complex(psb_spk_) :: beta, x(:)
class(psb_c_vect_oacc) :: y
- integer(psb_ipk_) :: info, ni
+ integer(psb_ipk_) :: info, ni, k
+ logical :: acc_done
+ acc_done = .false.
select type(ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
- class default
- call psb_errpush(info, 'c_oacc_sctb_x')
- return
+ if (y%is_host()) call y%sync()
+ if (acc_is_present(x)) then
+ call inner_sctb(n,x(i:i+n-1),beta,y%v,idx%v(i:i+n-1))
+ acc_done = .true.
+ call y%set_dev()
+ end if
end select
+ if (.not.acc_done) then
+ if (idx%is_dev()) call idx%sync()
+ if (y%is_dev()) call y%sync()
+ do k = 1, n
+ y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + x(k+i-1)
+ end do
+ call y%set_host()
+ end if
- if (y%is_host()) call y%sync()
-
- !$acc parallel loop
- do i = 1, n
- y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i)
- end do
-
- call y%set_dev()
+ contains
+ subroutine inner_sctb(n,x,beta,y,idx)
+ integer(psb_ipk_) :: n, idx(:)
+ complex(psb_spk_) :: beta, x(:), y(:)
+ integer(psb_ipk_) :: k
+ !$acc parallel loop
+ do k = 1, n
+ y(idx(k)) = x(k) + beta *y(idx(k))
+ end do
+ !$acc end parallel loop
+ end subroutine inner_sctb
+
end subroutine c_oacc_sctb_x
subroutine c_oacc_sctb(n, idx, x, beta, y)
@@ -463,7 +510,6 @@ contains
if (n == 0) return
if (y%is_dev()) call y%sync()
- !$acc parallel loop
do i = 1, n
y%v(idx(i)) = beta * y%v(idx(i)) + x(i)
end do
@@ -477,30 +523,48 @@ contains
integer(psb_ipk_) :: i, n
class(psb_i_base_vect_type) :: idx
class(psb_c_vect_oacc) :: x
- integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: info,k
+ logical :: acc_done
info = 0
+ acc_done = .false.
+
if (.not.allocated(x%combuf)) then
call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
return
end if
- select type(ii => idx)
+ select type (ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
- class default
- call psb_errpush(info, 'c_oacc_gthzbuf')
- return
+ if (x%is_host()) call x%sync()
+ call inner_gth(n,x%v,x%combuf(i:i+n-1),ii%v(i:i+n-1))
+ acc_done = .true.
end select
- if (x%is_host()) call x%sync()
+ if (.not.acc_done) then
+ if (idx%is_dev()) call idx%sync()
+ if (x%is_dev()) call x%sync()
+ do k = 1, n
+ x%combuf(k+i-1) = x%v(idx%v(k+i-1))
+ end do
+ end if
- !$acc parallel loop
- do i = 1, n
- x%combuf(i) = x%v(idx%v(i))
- end do
+ contains
+ subroutine inner_gth(n,x,y,idx)
+ integer(psb_ipk_) :: n, idx(:)
+ complex(psb_spk_) :: x(:), y(:)
+ integer(psb_ipk_) :: k
+
+ !$acc parallel loop present(y)
+ do k = 1, n
+ y(k) = x(idx(k))
+ end do
+ !$acc end parallel loop
+ !$acc update self(y)
+ end subroutine inner_gth
end subroutine c_oacc_gthzbuf
-
+
subroutine c_oacc_gthzv_x(i, n, idx, x, y)
use psb_base_mod
implicit none
@@ -508,24 +572,41 @@ contains
class(psb_i_base_vect_type):: idx
complex(psb_spk_) :: y(:)
class(psb_c_vect_oacc):: x
- integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: info, k
+ logical :: acc_done
info = 0
-
- select type(ii => idx)
+ acc_done = .false.
+ select type (ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
- class default
- call psb_errpush(info, 'c_oacc_gthzv_x')
- return
+ if (x%is_host()) call x%sync()
+ if (acc_is_present(y)) then
+ call inner_gth(n,x%v,y(i:),ii%v(i:))
+ acc_done=.true.
+ end if
end select
-
- if (x%is_host()) call x%sync()
-
- !$acc parallel loop
- do i = 1, n
- y(i) = x%v(idx%v(i))
- end do
+ if (.not.acc_done) then
+ if (x%is_dev()) call x%sync()
+ if (idx%is_dev()) call idx%sync()
+ do k = 1, n
+ y(k+i-1) = x%v(idx%v(k+i-1))
+ !write(0,*) 'oa gthzv ',k+i-1,idx%v(k+i-1),k,y(k)
+ end do
+ end if
+ contains
+ subroutine inner_gth(n,x,y,idx)
+ integer(psb_ipk_) :: n, idx(:)
+ complex(psb_spk_) :: x(:), y(:)
+ integer(psb_ipk_) :: k
+
+ !$acc parallel loop present(y)
+ do k = 1, n
+ y(k) = x(idx(k))
+ end do
+ !$acc end parallel loop
+ !$acc update self(y)
+ end subroutine inner_gth
end subroutine c_oacc_gthzv_x
subroutine c_oacc_ins_v(n, irl, val, dupl, x, info)
@@ -718,7 +799,7 @@ contains
integer(psb_ipk_) :: info
res = czero
- !write(0,*) 'dot_v'
+!!$ write(0,*) 'oacc_dot_v'
select type(yy => y)
type is (psb_c_base_vect_type)
if (x%is_dev()) call x%sync()
@@ -762,6 +843,17 @@ contains
end function c_oacc_dot_a
+ subroutine c_oacc_new_buffer(n,x,info)
+ implicit none
+ class(psb_c_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(in) :: n
+ integer(psb_ipk_), intent(out) :: info
+ if (n /= psb_size(x%combuf)) then
+ call x%psb_c_base_vect_type%new_buffer(n,info)
+ !$acc enter data copyin(x%combuf)
+ end if
+ end subroutine c_oacc_new_buffer
+
subroutine c_oacc_sync_dev_space(x)
implicit none
class(psb_c_vect_oacc), intent(inout) :: x
@@ -860,12 +952,33 @@ contains
class(psb_c_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
- if (allocated(x%v)) then
- if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
- deallocate(x%v, stat=info)
- end if
+ if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
+ if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
+ call x%psb_c_base_vect_type%free(info)
end subroutine c_oacc_vect_free
+
+ subroutine c_oacc_vect_maybe_free_buffer(x,info)
+ implicit none
+ class(psb_c_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(out) :: info
+
+ info = 0
+ if (psb_oacc_get_maybe_free_buffer())&
+ & call x%free_buffer(info)
+ end subroutine c_oacc_vect_maybe_free_buffer
+
+ subroutine c_oacc_vect_free_buffer(x,info)
+ implicit none
+ class(psb_c_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(out) :: info
+
+ info = 0
+ if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
+ call x%psb_c_base_vect_type%free_buffer(info)
+
+ end subroutine c_oacc_vect_free_buffer
+
function c_oacc_get_size(x) result(res)
implicit none
class(psb_c_vect_oacc), intent(inout) :: x
diff --git a/openacc/psb_d_oacc_vect_mod.F90 b/openacc/psb_d_oacc_vect_mod.F90
index 3d71e54c..80ac35f7 100644
--- a/openacc/psb_d_oacc_vect_mod.F90
+++ b/openacc/psb_d_oacc_vect_mod.F90
@@ -3,6 +3,8 @@ module psb_d_oacc_vect_mod
use openacc
use psb_const_mod
use psb_error_mod
+ use psb_realloc_mod
+ use psb_oacc_env_mod
use psb_d_vect_mod
use psb_i_vect_mod
use psb_i_oacc_vect_mod
@@ -26,6 +28,8 @@ module psb_d_oacc_vect_mod
procedure, pass(x) :: bld_x => d_oacc_bld_x
procedure, pass(x) :: bld_mn => d_oacc_bld_mn
procedure, pass(x) :: free => d_oacc_vect_free
+ procedure, pass(x) :: free_buffer => d_oacc_vect_free_buffer
+ procedure, pass(x) :: maybe_free_buffer => d_oacc_vect_maybe_free_buffer
procedure, pass(x) :: ins_a => d_oacc_ins_a
procedure, pass(x) :: ins_v => d_oacc_ins_v
procedure, pass(x) :: is_host => d_oacc_is_host
@@ -36,11 +40,13 @@ module psb_d_oacc_vect_mod
procedure, pass(x) :: set_sync => d_oacc_set_sync
procedure, pass(x) :: set_scal => d_oacc_set_scal
+ procedure, pass(x) :: new_buffer => d_oacc_new_buffer
procedure, pass(x) :: gthzv_x => d_oacc_gthzv_x
- procedure, pass(x) :: gthzbuf_x => d_oacc_gthzbuf
+ procedure, pass(x) :: gthzbuf => d_oacc_gthzbuf
procedure, pass(y) :: sctb => d_oacc_sctb
procedure, pass(y) :: sctb_x => d_oacc_sctb_x
procedure, pass(y) :: sctb_buf => d_oacc_sctb_buf
+ procedure, nopass :: device_wait => d_oacc_device_wait
procedure, pass(x) :: get_size => d_oacc_get_size
@@ -87,6 +93,11 @@ module psb_d_oacc_vect_mod
contains
+ subroutine d_oacc_device_wait()
+ implicit none
+ call acc_wait_all()
+ end subroutine d_oacc_device_wait
+
subroutine d_oacc_absval1(x)
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
@@ -181,13 +192,17 @@ contains
!$acc parallel loop reduction(max:mx)
do i = 1, n
if (abs(x(i)) > mx) mx = abs(x(i))
- end do
- sum = dzero
- !$acc parallel loop reduction(+:sum)
- do i = 1, n
- sum = sum + abs(x(i)/mx)**2
end do
- res = mx*sqrt(sum)
+ if (mx == dzero) then
+ res = mx
+ else
+ sum = dzero
+ !$acc parallel loop reduction(+:sum)
+ do i = 1, n
+ sum = sum + abs(x(i)/mx)**2
+ end do
+ res = mx*sqrt(sum)
+ end if
end function d_inner_oacc_nrm2
end function d_oacc_nrm2
@@ -398,29 +413,44 @@ contains
class(psb_i_base_vect_type) :: idx
real(psb_dpk_) :: beta
class(psb_d_vect_oacc) :: y
- integer(psb_ipk_) :: info
-
+ integer(psb_ipk_) :: info, k
+ logical :: acc_done
if (.not.allocated(y%combuf)) then
call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
return
end if
+ acc_done = .false.
select type(ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
if (y%is_host()) call y%sync()
+ !$acc update device(y%combuf)
+ call inner_sctb(n,y%combuf(i:i+n-1),beta,y%v,ii%v(i:i+n-1))
+ call y%set_dev()
+ acc_done = .true.
+ end select
- !$acc parallel loop
- do i = 1, n
- y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i)
+ if (.not.acc_done) then
+ if (idx%is_dev()) call idx%sync()
+ if (y%is_dev()) call y%sync()
+ do k = 1, n
+ y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + y%combuf(k)
end do
+ end if
- class default
- !$acc parallel loop
- do i = 1, n
- y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i)
+ contains
+ subroutine inner_sctb(n,x,beta,y,idx)
+ integer(psb_ipk_) :: n, idx(:)
+ real(psb_dpk_) :: beta,x(:), y(:)
+ integer(psb_ipk_) :: k
+ !$acc parallel loop
+ do k = 1, n
+ y(idx(k)) = x(k) + beta *y(idx(k))
end do
- end select
+ !$acc end parallel loop
+ end subroutine inner_sctb
+
end subroutine d_oacc_sctb_buf
subroutine d_oacc_sctb_x(i, n, idx, x, beta, y)
@@ -430,24 +460,41 @@ contains
class(psb_i_base_vect_type) :: idx
real(psb_dpk_) :: beta, x(:)
class(psb_d_vect_oacc) :: y
- integer(psb_ipk_) :: info, ni
+ integer(psb_ipk_) :: info, ni, k
+ logical :: acc_done
+ acc_done = .false.
select type(ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
- class default
- call psb_errpush(info, 'd_oacc_sctb_x')
- return
+ if (y%is_host()) call y%sync()
+ if (acc_is_present(x)) then
+ call inner_sctb(n,x(i:i+n-1),beta,y%v,idx%v(i:i+n-1))
+ acc_done = .true.
+ call y%set_dev()
+ end if
end select
+ if (.not.acc_done) then
+ if (idx%is_dev()) call idx%sync()
+ if (y%is_dev()) call y%sync()
+ do k = 1, n
+ y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + x(k+i-1)
+ end do
+ call y%set_host()
+ end if
- if (y%is_host()) call y%sync()
-
- !$acc parallel loop
- do i = 1, n
- y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i)
- end do
-
- call y%set_dev()
+ contains
+ subroutine inner_sctb(n,x,beta,y,idx)
+ integer(psb_ipk_) :: n, idx(:)
+ real(psb_dpk_) :: beta, x(:), y(:)
+ integer(psb_ipk_) :: k
+ !$acc parallel loop
+ do k = 1, n
+ y(idx(k)) = x(k) + beta *y(idx(k))
+ end do
+ !$acc end parallel loop
+ end subroutine inner_sctb
+
end subroutine d_oacc_sctb_x
subroutine d_oacc_sctb(n, idx, x, beta, y)
@@ -463,7 +510,6 @@ contains
if (n == 0) return
if (y%is_dev()) call y%sync()
- !$acc parallel loop
do i = 1, n
y%v(idx(i)) = beta * y%v(idx(i)) + x(i)
end do
@@ -477,30 +523,48 @@ contains
integer(psb_ipk_) :: i, n
class(psb_i_base_vect_type) :: idx
class(psb_d_vect_oacc) :: x
- integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: info,k
+ logical :: acc_done
info = 0
+ acc_done = .false.
+
if (.not.allocated(x%combuf)) then
call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
return
end if
- select type(ii => idx)
+ select type (ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
- class default
- call psb_errpush(info, 'd_oacc_gthzbuf')
- return
+ if (x%is_host()) call x%sync()
+ call inner_gth(n,x%v,x%combuf(i:i+n-1),ii%v(i:i+n-1))
+ acc_done = .true.
end select
- if (x%is_host()) call x%sync()
+ if (.not.acc_done) then
+ if (idx%is_dev()) call idx%sync()
+ if (x%is_dev()) call x%sync()
+ do k = 1, n
+ x%combuf(k+i-1) = x%v(idx%v(k+i-1))
+ end do
+ end if
- !$acc parallel loop
- do i = 1, n
- x%combuf(i) = x%v(idx%v(i))
- end do
+ contains
+ subroutine inner_gth(n,x,y,idx)
+ integer(psb_ipk_) :: n, idx(:)
+ real(psb_dpk_) :: x(:), y(:)
+ integer(psb_ipk_) :: k
+
+ !$acc parallel loop present(y)
+ do k = 1, n
+ y(k) = x(idx(k))
+ end do
+ !$acc end parallel loop
+ !$acc update self(y)
+ end subroutine inner_gth
end subroutine d_oacc_gthzbuf
-
+
subroutine d_oacc_gthzv_x(i, n, idx, x, y)
use psb_base_mod
implicit none
@@ -508,24 +572,41 @@ contains
class(psb_i_base_vect_type):: idx
real(psb_dpk_) :: y(:)
class(psb_d_vect_oacc):: x
- integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: info, k
+ logical :: acc_done
info = 0
-
- select type(ii => idx)
+ acc_done = .false.
+ select type (ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
- class default
- call psb_errpush(info, 'd_oacc_gthzv_x')
- return
+ if (x%is_host()) call x%sync()
+ if (acc_is_present(y)) then
+ call inner_gth(n,x%v,y(i:),ii%v(i:))
+ acc_done=.true.
+ end if
end select
-
- if (x%is_host()) call x%sync()
-
- !$acc parallel loop
- do i = 1, n
- y(i) = x%v(idx%v(i))
- end do
+ if (.not.acc_done) then
+ if (x%is_dev()) call x%sync()
+ if (idx%is_dev()) call idx%sync()
+ do k = 1, n
+ y(k+i-1) = x%v(idx%v(k+i-1))
+ !write(0,*) 'oa gthzv ',k+i-1,idx%v(k+i-1),k,y(k)
+ end do
+ end if
+ contains
+ subroutine inner_gth(n,x,y,idx)
+ integer(psb_ipk_) :: n, idx(:)
+ real(psb_dpk_) :: x(:), y(:)
+ integer(psb_ipk_) :: k
+
+ !$acc parallel loop present(y)
+ do k = 1, n
+ y(k) = x(idx(k))
+ end do
+ !$acc end parallel loop
+ !$acc update self(y)
+ end subroutine inner_gth
end subroutine d_oacc_gthzv_x
subroutine d_oacc_ins_v(n, irl, val, dupl, x, info)
@@ -718,7 +799,7 @@ contains
integer(psb_ipk_) :: info
res = dzero
- !write(0,*) 'dot_v'
+!!$ write(0,*) 'oacc_dot_v'
select type(yy => y)
type is (psb_d_base_vect_type)
if (x%is_dev()) call x%sync()
@@ -762,6 +843,17 @@ contains
end function d_oacc_dot_a
+ subroutine d_oacc_new_buffer(n,x,info)
+ implicit none
+ class(psb_d_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(in) :: n
+ integer(psb_ipk_), intent(out) :: info
+ if (n /= psb_size(x%combuf)) then
+ call x%psb_d_base_vect_type%new_buffer(n,info)
+ !$acc enter data copyin(x%combuf)
+ end if
+ end subroutine d_oacc_new_buffer
+
subroutine d_oacc_sync_dev_space(x)
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
@@ -860,12 +952,33 @@ contains
class(psb_d_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
- if (allocated(x%v)) then
- if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
- deallocate(x%v, stat=info)
- end if
+ if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
+ if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
+ call x%psb_d_base_vect_type%free(info)
end subroutine d_oacc_vect_free
+
+ subroutine d_oacc_vect_maybe_free_buffer(x,info)
+ implicit none
+ class(psb_d_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(out) :: info
+
+ info = 0
+ if (psb_oacc_get_maybe_free_buffer())&
+ & call x%free_buffer(info)
+ end subroutine d_oacc_vect_maybe_free_buffer
+
+ subroutine d_oacc_vect_free_buffer(x,info)
+ implicit none
+ class(psb_d_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(out) :: info
+
+ info = 0
+ if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
+ call x%psb_d_base_vect_type%free_buffer(info)
+
+ end subroutine d_oacc_vect_free_buffer
+
function d_oacc_get_size(x) result(res)
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
diff --git a/openacc/psb_i_oacc_vect_mod.F90 b/openacc/psb_i_oacc_vect_mod.F90
index 42226f0c..cfd0c210 100644
--- a/openacc/psb_i_oacc_vect_mod.F90
+++ b/openacc/psb_i_oacc_vect_mod.F90
@@ -3,6 +3,8 @@ module psb_i_oacc_vect_mod
use openacc
use psb_const_mod
use psb_error_mod
+ use psb_realloc_mod
+ use psb_oacc_env_mod
use psb_i_vect_mod
integer(psb_ipk_), parameter, private :: is_host = -1
@@ -24,6 +26,8 @@ module psb_i_oacc_vect_mod
procedure, pass(x) :: bld_x => i_oacc_bld_x
procedure, pass(x) :: bld_mn => i_oacc_bld_mn
procedure, pass(x) :: free => i_oacc_vect_free
+ procedure, pass(x) :: free_buffer => i_oacc_vect_free_buffer
+ procedure, pass(x) :: maybe_free_buffer => i_oacc_vect_maybe_free_buffer
procedure, pass(x) :: ins_a => i_oacc_ins_a
procedure, pass(x) :: ins_v => i_oacc_ins_v
procedure, pass(x) :: is_host => i_oacc_is_host
@@ -34,11 +38,13 @@ module psb_i_oacc_vect_mod
procedure, pass(x) :: set_sync => i_oacc_set_sync
procedure, pass(x) :: set_scal => i_oacc_set_scal
+ procedure, pass(x) :: new_buffer => i_oacc_new_buffer
procedure, pass(x) :: gthzv_x => i_oacc_gthzv_x
- procedure, pass(x) :: gthzbuf_x => i_oacc_gthzbuf
+ procedure, pass(x) :: gthzbuf => i_oacc_gthzbuf
procedure, pass(y) :: sctb => i_oacc_sctb
procedure, pass(y) :: sctb_x => i_oacc_sctb_x
procedure, pass(y) :: sctb_buf => i_oacc_sctb_buf
+ procedure, nopass :: device_wait => i_oacc_device_wait
procedure, pass(x) :: get_size => i_oacc_get_size
@@ -48,6 +54,11 @@ module psb_i_oacc_vect_mod
contains
+ subroutine i_oacc_device_wait()
+ implicit none
+ call acc_wait_all()
+ end subroutine i_oacc_device_wait
+
subroutine i_oacc_sctb_buf(i, n, idx, beta, y)
use psb_base_mod
@@ -56,29 +67,44 @@ contains
class(psb_i_base_vect_type) :: idx
integer(psb_ipk_) :: beta
class(psb_i_vect_oacc) :: y
- integer(psb_ipk_) :: info
-
+ integer(psb_ipk_) :: info, k
+ logical :: acc_done
if (.not.allocated(y%combuf)) then
call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
return
end if
+ acc_done = .false.
select type(ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
if (y%is_host()) call y%sync()
+ !$acc update device(y%combuf)
+ call inner_sctb(n,y%combuf(i:i+n-1),beta,y%v,ii%v(i:i+n-1))
+ call y%set_dev()
+ acc_done = .true.
+ end select
- !$acc parallel loop
- do i = 1, n
- y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i)
+ if (.not.acc_done) then
+ if (idx%is_dev()) call idx%sync()
+ if (y%is_dev()) call y%sync()
+ do k = 1, n
+ y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + y%combuf(k)
end do
+ end if
- class default
- !$acc parallel loop
- do i = 1, n
- y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i)
+ contains
+ subroutine inner_sctb(n,x,beta,y,idx)
+ integer(psb_ipk_) :: n, idx(:)
+ integer(psb_ipk_) :: beta,x(:), y(:)
+ integer(psb_ipk_) :: k
+ !$acc parallel loop
+ do k = 1, n
+ y(idx(k)) = x(k) + beta *y(idx(k))
end do
- end select
+ !$acc end parallel loop
+ end subroutine inner_sctb
+
end subroutine i_oacc_sctb_buf
subroutine i_oacc_sctb_x(i, n, idx, x, beta, y)
@@ -88,24 +114,41 @@ contains
class(psb_i_base_vect_type) :: idx
integer(psb_ipk_) :: beta, x(:)
class(psb_i_vect_oacc) :: y
- integer(psb_ipk_) :: info, ni
+ integer(psb_ipk_) :: info, ni, k
+ logical :: acc_done
+ acc_done = .false.
select type(ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
- class default
- call psb_errpush(info, 'i_oacc_sctb_x')
- return
+ if (y%is_host()) call y%sync()
+ if (acc_is_present(x)) then
+ call inner_sctb(n,x(i:i+n-1),beta,y%v,idx%v(i:i+n-1))
+ acc_done = .true.
+ call y%set_dev()
+ end if
end select
+ if (.not.acc_done) then
+ if (idx%is_dev()) call idx%sync()
+ if (y%is_dev()) call y%sync()
+ do k = 1, n
+ y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + x(k+i-1)
+ end do
+ call y%set_host()
+ end if
- if (y%is_host()) call y%sync()
-
- !$acc parallel loop
- do i = 1, n
- y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i)
- end do
-
- call y%set_dev()
+ contains
+ subroutine inner_sctb(n,x,beta,y,idx)
+ integer(psb_ipk_) :: n, idx(:)
+ integer(psb_ipk_) :: beta, x(:), y(:)
+ integer(psb_ipk_) :: k
+ !$acc parallel loop
+ do k = 1, n
+ y(idx(k)) = x(k) + beta *y(idx(k))
+ end do
+ !$acc end parallel loop
+ end subroutine inner_sctb
+
end subroutine i_oacc_sctb_x
subroutine i_oacc_sctb(n, idx, x, beta, y)
@@ -121,7 +164,6 @@ contains
if (n == 0) return
if (y%is_dev()) call y%sync()
- !$acc parallel loop
do i = 1, n
y%v(idx(i)) = beta * y%v(idx(i)) + x(i)
end do
@@ -135,30 +177,48 @@ contains
integer(psb_ipk_) :: i, n
class(psb_i_base_vect_type) :: idx
class(psb_i_vect_oacc) :: x
- integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: info,k
+ logical :: acc_done
info = 0
+ acc_done = .false.
+
if (.not.allocated(x%combuf)) then
call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
return
end if
- select type(ii => idx)
+ select type (ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
- class default
- call psb_errpush(info, 'i_oacc_gthzbuf')
- return
+ if (x%is_host()) call x%sync()
+ call inner_gth(n,x%v,x%combuf(i:i+n-1),ii%v(i:i+n-1))
+ acc_done = .true.
end select
- if (x%is_host()) call x%sync()
+ if (.not.acc_done) then
+ if (idx%is_dev()) call idx%sync()
+ if (x%is_dev()) call x%sync()
+ do k = 1, n
+ x%combuf(k+i-1) = x%v(idx%v(k+i-1))
+ end do
+ end if
- !$acc parallel loop
- do i = 1, n
- x%combuf(i) = x%v(idx%v(i))
- end do
+ contains
+ subroutine inner_gth(n,x,y,idx)
+ integer(psb_ipk_) :: n, idx(:)
+ integer(psb_ipk_) :: x(:), y(:)
+ integer(psb_ipk_) :: k
+
+ !$acc parallel loop present(y)
+ do k = 1, n
+ y(k) = x(idx(k))
+ end do
+ !$acc end parallel loop
+ !$acc update self(y)
+ end subroutine inner_gth
end subroutine i_oacc_gthzbuf
-
+
subroutine i_oacc_gthzv_x(i, n, idx, x, y)
use psb_base_mod
implicit none
@@ -166,24 +226,41 @@ contains
class(psb_i_base_vect_type):: idx
integer(psb_ipk_) :: y(:)
class(psb_i_vect_oacc):: x
- integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: info, k
+ logical :: acc_done
info = 0
-
- select type(ii => idx)
+ acc_done = .false.
+ select type (ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
- class default
- call psb_errpush(info, 'i_oacc_gthzv_x')
- return
+ if (x%is_host()) call x%sync()
+ if (acc_is_present(y)) then
+ call inner_gth(n,x%v,y(i:),ii%v(i:))
+ acc_done=.true.
+ end if
end select
-
- if (x%is_host()) call x%sync()
-
- !$acc parallel loop
- do i = 1, n
- y(i) = x%v(idx%v(i))
- end do
+ if (.not.acc_done) then
+ if (x%is_dev()) call x%sync()
+ if (idx%is_dev()) call idx%sync()
+ do k = 1, n
+ y(k+i-1) = x%v(idx%v(k+i-1))
+ !write(0,*) 'oa gthzv ',k+i-1,idx%v(k+i-1),k,y(k)
+ end do
+ end if
+ contains
+ subroutine inner_gth(n,x,y,idx)
+ integer(psb_ipk_) :: n, idx(:)
+ integer(psb_ipk_) :: x(:), y(:)
+ integer(psb_ipk_) :: k
+
+ !$acc parallel loop present(y)
+ do k = 1, n
+ y(k) = x(idx(k))
+ end do
+ !$acc end parallel loop
+ !$acc update self(y)
+ end subroutine inner_gth
end subroutine i_oacc_gthzv_x
subroutine i_oacc_ins_v(n, irl, val, dupl, x, info)
@@ -366,6 +443,17 @@ contains
end function i_oacc_get_fmt
+ subroutine i_oacc_new_buffer(n,x,info)
+ implicit none
+ class(psb_i_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(in) :: n
+ integer(psb_ipk_), intent(out) :: info
+ if (n /= psb_size(x%combuf)) then
+ call x%psb_i_base_vect_type%new_buffer(n,info)
+ !$acc enter data copyin(x%combuf)
+ end if
+ end subroutine i_oacc_new_buffer
+
subroutine i_oacc_sync_dev_space(x)
implicit none
class(psb_i_vect_oacc), intent(inout) :: x
@@ -464,12 +552,33 @@ contains
class(psb_i_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
- if (allocated(x%v)) then
- if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
- deallocate(x%v, stat=info)
- end if
+ if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
+ if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
+ call x%psb_i_base_vect_type%free(info)
end subroutine i_oacc_vect_free
+
+ subroutine i_oacc_vect_maybe_free_buffer(x,info)
+ implicit none
+ class(psb_i_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(out) :: info
+
+ info = 0
+ if (psb_oacc_get_maybe_free_buffer())&
+ & call x%free_buffer(info)
+
+ end subroutine i_oacc_vect_maybe_free_buffer
+
+ subroutine i_oacc_vect_free_buffer(x,info)
+ implicit none
+ class(psb_i_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(out) :: info
+
+ info = 0
+ if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
+ call x%psb_i_base_vect_type%free_buffer(info)
+ end subroutine i_oacc_vect_free_buffer
+
function i_oacc_get_size(x) result(res)
implicit none
class(psb_i_vect_oacc), intent(inout) :: x
diff --git a/openacc/psb_l_oacc_vect_mod.F90 b/openacc/psb_l_oacc_vect_mod.F90
index eb9b2b9a..5526796f 100644
--- a/openacc/psb_l_oacc_vect_mod.F90
+++ b/openacc/psb_l_oacc_vect_mod.F90
@@ -3,6 +3,8 @@ module psb_l_oacc_vect_mod
use openacc
use psb_const_mod
use psb_error_mod
+ use psb_realloc_mod
+ use psb_oacc_env_mod
use psb_l_vect_mod
use psb_i_vect_mod
use psb_i_oacc_vect_mod
@@ -26,6 +28,8 @@ module psb_l_oacc_vect_mod
procedure, pass(x) :: bld_x => l_oacc_bld_x
procedure, pass(x) :: bld_mn => l_oacc_bld_mn
procedure, pass(x) :: free => l_oacc_vect_free
+ procedure, pass(x) :: free_buffer => l_oacc_vect_free_buffer
+ procedure, pass(x) :: maybe_free_buffer => l_oacc_vect_maybe_free_buffer
procedure, pass(x) :: ins_a => l_oacc_ins_a
procedure, pass(x) :: ins_v => l_oacc_ins_v
procedure, pass(x) :: is_host => l_oacc_is_host
@@ -36,11 +40,13 @@ module psb_l_oacc_vect_mod
procedure, pass(x) :: set_sync => l_oacc_set_sync
procedure, pass(x) :: set_scal => l_oacc_set_scal
+ procedure, pass(x) :: new_buffer => l_oacc_new_buffer
procedure, pass(x) :: gthzv_x => l_oacc_gthzv_x
- procedure, pass(x) :: gthzbuf_x => l_oacc_gthzbuf
+ procedure, pass(x) :: gthzbuf => l_oacc_gthzbuf
procedure, pass(y) :: sctb => l_oacc_sctb
procedure, pass(y) :: sctb_x => l_oacc_sctb_x
procedure, pass(y) :: sctb_buf => l_oacc_sctb_buf
+ procedure, nopass :: device_wait => l_oacc_device_wait
procedure, pass(x) :: get_size => l_oacc_get_size
@@ -50,6 +56,11 @@ module psb_l_oacc_vect_mod
contains
+ subroutine l_oacc_device_wait()
+ implicit none
+ call acc_wait_all()
+ end subroutine l_oacc_device_wait
+
subroutine l_oacc_sctb_buf(i, n, idx, beta, y)
use psb_base_mod
@@ -58,29 +69,44 @@ contains
class(psb_i_base_vect_type) :: idx
integer(psb_lpk_) :: beta
class(psb_l_vect_oacc) :: y
- integer(psb_ipk_) :: info
-
+ integer(psb_ipk_) :: info, k
+ logical :: acc_done
if (.not.allocated(y%combuf)) then
call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
return
end if
+ acc_done = .false.
select type(ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
if (y%is_host()) call y%sync()
+ !$acc update device(y%combuf)
+ call inner_sctb(n,y%combuf(i:i+n-1),beta,y%v,ii%v(i:i+n-1))
+ call y%set_dev()
+ acc_done = .true.
+ end select
- !$acc parallel loop
- do i = 1, n
- y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i)
+ if (.not.acc_done) then
+ if (idx%is_dev()) call idx%sync()
+ if (y%is_dev()) call y%sync()
+ do k = 1, n
+ y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + y%combuf(k)
end do
+ end if
- class default
- !$acc parallel loop
- do i = 1, n
- y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i)
+ contains
+ subroutine inner_sctb(n,x,beta,y,idx)
+ integer(psb_ipk_) :: n, idx(:)
+ integer(psb_lpk_) :: beta,x(:), y(:)
+ integer(psb_ipk_) :: k
+ !$acc parallel loop
+ do k = 1, n
+ y(idx(k)) = x(k) + beta *y(idx(k))
end do
- end select
+ !$acc end parallel loop
+ end subroutine inner_sctb
+
end subroutine l_oacc_sctb_buf
subroutine l_oacc_sctb_x(i, n, idx, x, beta, y)
@@ -90,24 +116,41 @@ contains
class(psb_i_base_vect_type) :: idx
integer(psb_lpk_) :: beta, x(:)
class(psb_l_vect_oacc) :: y
- integer(psb_ipk_) :: info, ni
+ integer(psb_ipk_) :: info, ni, k
+ logical :: acc_done
+ acc_done = .false.
select type(ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
- class default
- call psb_errpush(info, 'l_oacc_sctb_x')
- return
+ if (y%is_host()) call y%sync()
+ if (acc_is_present(x)) then
+ call inner_sctb(n,x(i:i+n-1),beta,y%v,idx%v(i:i+n-1))
+ acc_done = .true.
+ call y%set_dev()
+ end if
end select
+ if (.not.acc_done) then
+ if (idx%is_dev()) call idx%sync()
+ if (y%is_dev()) call y%sync()
+ do k = 1, n
+ y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + x(k+i-1)
+ end do
+ call y%set_host()
+ end if
- if (y%is_host()) call y%sync()
-
- !$acc parallel loop
- do i = 1, n
- y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i)
- end do
-
- call y%set_dev()
+ contains
+ subroutine inner_sctb(n,x,beta,y,idx)
+ integer(psb_ipk_) :: n, idx(:)
+ integer(psb_lpk_) :: beta, x(:), y(:)
+ integer(psb_ipk_) :: k
+ !$acc parallel loop
+ do k = 1, n
+ y(idx(k)) = x(k) + beta *y(idx(k))
+ end do
+ !$acc end parallel loop
+ end subroutine inner_sctb
+
end subroutine l_oacc_sctb_x
subroutine l_oacc_sctb(n, idx, x, beta, y)
@@ -123,7 +166,6 @@ contains
if (n == 0) return
if (y%is_dev()) call y%sync()
- !$acc parallel loop
do i = 1, n
y%v(idx(i)) = beta * y%v(idx(i)) + x(i)
end do
@@ -137,30 +179,48 @@ contains
integer(psb_ipk_) :: i, n
class(psb_i_base_vect_type) :: idx
class(psb_l_vect_oacc) :: x
- integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: info,k
+ logical :: acc_done
info = 0
+ acc_done = .false.
+
if (.not.allocated(x%combuf)) then
call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
return
end if
- select type(ii => idx)
+ select type (ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
- class default
- call psb_errpush(info, 'l_oacc_gthzbuf')
- return
+ if (x%is_host()) call x%sync()
+ call inner_gth(n,x%v,x%combuf(i:i+n-1),ii%v(i:i+n-1))
+ acc_done = .true.
end select
- if (x%is_host()) call x%sync()
+ if (.not.acc_done) then
+ if (idx%is_dev()) call idx%sync()
+ if (x%is_dev()) call x%sync()
+ do k = 1, n
+ x%combuf(k+i-1) = x%v(idx%v(k+i-1))
+ end do
+ end if
- !$acc parallel loop
- do i = 1, n
- x%combuf(i) = x%v(idx%v(i))
- end do
+ contains
+ subroutine inner_gth(n,x,y,idx)
+ integer(psb_ipk_) :: n, idx(:)
+ integer(psb_lpk_) :: x(:), y(:)
+ integer(psb_ipk_) :: k
+
+ !$acc parallel loop present(y)
+ do k = 1, n
+ y(k) = x(idx(k))
+ end do
+ !$acc end parallel loop
+ !$acc update self(y)
+ end subroutine inner_gth
end subroutine l_oacc_gthzbuf
-
+
subroutine l_oacc_gthzv_x(i, n, idx, x, y)
use psb_base_mod
implicit none
@@ -168,24 +228,41 @@ contains
class(psb_i_base_vect_type):: idx
integer(psb_lpk_) :: y(:)
class(psb_l_vect_oacc):: x
- integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: info, k
+ logical :: acc_done
info = 0
-
- select type(ii => idx)
+ acc_done = .false.
+ select type (ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
- class default
- call psb_errpush(info, 'l_oacc_gthzv_x')
- return
+ if (x%is_host()) call x%sync()
+ if (acc_is_present(y)) then
+ call inner_gth(n,x%v,y(i:),ii%v(i:))
+ acc_done=.true.
+ end if
end select
-
- if (x%is_host()) call x%sync()
-
- !$acc parallel loop
- do i = 1, n
- y(i) = x%v(idx%v(i))
- end do
+ if (.not.acc_done) then
+ if (x%is_dev()) call x%sync()
+ if (idx%is_dev()) call idx%sync()
+ do k = 1, n
+ y(k+i-1) = x%v(idx%v(k+i-1))
+ !write(0,*) 'oa gthzv ',k+i-1,idx%v(k+i-1),k,y(k)
+ end do
+ end if
+ contains
+ subroutine inner_gth(n,x,y,idx)
+ integer(psb_ipk_) :: n, idx(:)
+ integer(psb_lpk_) :: x(:), y(:)
+ integer(psb_ipk_) :: k
+
+ !$acc parallel loop present(y)
+ do k = 1, n
+ y(k) = x(idx(k))
+ end do
+ !$acc end parallel loop
+ !$acc update self(y)
+ end subroutine inner_gth
end subroutine l_oacc_gthzv_x
subroutine l_oacc_ins_v(n, irl, val, dupl, x, info)
@@ -368,6 +445,17 @@ contains
end function l_oacc_get_fmt
+ subroutine l_oacc_new_buffer(n,x,info)
+ implicit none
+ class(psb_l_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(in) :: n
+ integer(psb_ipk_), intent(out) :: info
+ if (n /= psb_size(x%combuf)) then
+ call x%psb_l_base_vect_type%new_buffer(n,info)
+ !$acc enter data copyin(x%combuf)
+ end if
+ end subroutine l_oacc_new_buffer
+
subroutine l_oacc_sync_dev_space(x)
implicit none
class(psb_l_vect_oacc), intent(inout) :: x
@@ -466,12 +554,33 @@ contains
class(psb_l_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
- if (allocated(x%v)) then
- if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
- deallocate(x%v, stat=info)
- end if
+ if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
+ if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
+ call x%psb_l_base_vect_type%free(info)
end subroutine l_oacc_vect_free
+
+ subroutine l_oacc_vect_maybe_free_buffer(x,info)
+ implicit none
+ class(psb_l_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(out) :: info
+
+ info = 0
+ if (psb_oacc_get_maybe_free_buffer())&
+ & call x%free_buffer(info)
+
+ end subroutine l_oacc_vect_maybe_free_buffer
+
+ subroutine l_oacc_vect_free_buffer(x,info)
+ implicit none
+ class(psb_l_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(out) :: info
+
+ info = 0
+ if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
+ call x%psb_l_base_vect_type%free_buffer(info)
+ end subroutine l_oacc_vect_free_buffer
+
function l_oacc_get_size(x) result(res)
implicit none
class(psb_l_vect_oacc), intent(inout) :: x
diff --git a/openacc/psb_oacc_env_mod.F90 b/openacc/psb_oacc_env_mod.F90
index 83c9426d..dc01ad3a 100644
--- a/openacc/psb_oacc_env_mod.F90
+++ b/openacc/psb_oacc_env_mod.F90
@@ -1,18 +1,29 @@
module psb_oacc_env_mod
-contains
+ use psb_penv_mod
+ use psb_const_mod
+ use psb_error_mod
+ logical, private :: oacc_do_maybe_free_buffer = .false.
- subroutine psb_oacc_init(ctxt, dev)
- use psb_penv_mod
- use psb_const_mod
- use psb_error_mod
- type(psb_ctxt_type), intent(in) :: ctxt
- integer, intent(in), optional :: dev
+contains
+ function psb_oacc_get_maybe_free_buffer() result(res)
+ logical :: res
+ res = oacc_do_maybe_free_buffer
+ end function psb_oacc_get_maybe_free_buffer
- end subroutine psb_oacc_init
+ subroutine psb_oacc_set_maybe_free_buffer(val)
+ logical, intent(in) :: val
+ oacc_do_maybe_free_buffer = val
+ end subroutine psb_oacc_set_maybe_free_buffer
- subroutine psb_oacc_exit()
- integer :: res
+ subroutine psb_oacc_init(ctxt, dev)
+ type(psb_ctxt_type), intent(in) :: ctxt
+ integer, intent(in), optional :: dev
+
+ end subroutine psb_oacc_init
- end subroutine psb_oacc_exit
+ subroutine psb_oacc_exit()
+ integer :: res
+
+ end subroutine psb_oacc_exit
end module psb_oacc_env_mod
diff --git a/openacc/psb_s_oacc_vect_mod.F90 b/openacc/psb_s_oacc_vect_mod.F90
index 16b45461..b80108ab 100644
--- a/openacc/psb_s_oacc_vect_mod.F90
+++ b/openacc/psb_s_oacc_vect_mod.F90
@@ -3,6 +3,8 @@ module psb_s_oacc_vect_mod
use openacc
use psb_const_mod
use psb_error_mod
+ use psb_realloc_mod
+ use psb_oacc_env_mod
use psb_s_vect_mod
use psb_i_vect_mod
use psb_i_oacc_vect_mod
@@ -26,6 +28,8 @@ module psb_s_oacc_vect_mod
procedure, pass(x) :: bld_x => s_oacc_bld_x
procedure, pass(x) :: bld_mn => s_oacc_bld_mn
procedure, pass(x) :: free => s_oacc_vect_free
+ procedure, pass(x) :: free_buffer => s_oacc_vect_free_buffer
+ procedure, pass(x) :: maybe_free_buffer => s_oacc_vect_maybe_free_buffer
procedure, pass(x) :: ins_a => s_oacc_ins_a
procedure, pass(x) :: ins_v => s_oacc_ins_v
procedure, pass(x) :: is_host => s_oacc_is_host
@@ -36,11 +40,13 @@ module psb_s_oacc_vect_mod
procedure, pass(x) :: set_sync => s_oacc_set_sync
procedure, pass(x) :: set_scal => s_oacc_set_scal
+ procedure, pass(x) :: new_buffer => s_oacc_new_buffer
procedure, pass(x) :: gthzv_x => s_oacc_gthzv_x
- procedure, pass(x) :: gthzbuf_x => s_oacc_gthzbuf
+ procedure, pass(x) :: gthzbuf => s_oacc_gthzbuf
procedure, pass(y) :: sctb => s_oacc_sctb
procedure, pass(y) :: sctb_x => s_oacc_sctb_x
procedure, pass(y) :: sctb_buf => s_oacc_sctb_buf
+ procedure, nopass :: device_wait => s_oacc_device_wait
procedure, pass(x) :: get_size => s_oacc_get_size
@@ -87,6 +93,11 @@ module psb_s_oacc_vect_mod
contains
+ subroutine s_oacc_device_wait()
+ implicit none
+ call acc_wait_all()
+ end subroutine s_oacc_device_wait
+
subroutine s_oacc_absval1(x)
implicit none
class(psb_s_vect_oacc), intent(inout) :: x
@@ -181,13 +192,17 @@ contains
!$acc parallel loop reduction(max:mx)
do i = 1, n
if (abs(x(i)) > mx) mx = abs(x(i))
- end do
- sum = szero
- !$acc parallel loop reduction(+:sum)
- do i = 1, n
- sum = sum + abs(x(i)/mx)**2
end do
- res = mx*sqrt(sum)
+ if (mx == szero) then
+ res = mx
+ else
+ sum = szero
+ !$acc parallel loop reduction(+:sum)
+ do i = 1, n
+ sum = sum + abs(x(i)/mx)**2
+ end do
+ res = mx*sqrt(sum)
+ end if
end function s_inner_oacc_nrm2
end function s_oacc_nrm2
@@ -398,29 +413,44 @@ contains
class(psb_i_base_vect_type) :: idx
real(psb_spk_) :: beta
class(psb_s_vect_oacc) :: y
- integer(psb_ipk_) :: info
-
+ integer(psb_ipk_) :: info, k
+ logical :: acc_done
if (.not.allocated(y%combuf)) then
call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
return
end if
+ acc_done = .false.
select type(ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
if (y%is_host()) call y%sync()
+ !$acc update device(y%combuf)
+ call inner_sctb(n,y%combuf(i:i+n-1),beta,y%v,ii%v(i:i+n-1))
+ call y%set_dev()
+ acc_done = .true.
+ end select
- !$acc parallel loop
- do i = 1, n
- y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i)
+ if (.not.acc_done) then
+ if (idx%is_dev()) call idx%sync()
+ if (y%is_dev()) call y%sync()
+ do k = 1, n
+ y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + y%combuf(k)
end do
+ end if
- class default
- !$acc parallel loop
- do i = 1, n
- y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i)
+ contains
+ subroutine inner_sctb(n,x,beta,y,idx)
+ integer(psb_ipk_) :: n, idx(:)
+ real(psb_spk_) :: beta,x(:), y(:)
+ integer(psb_ipk_) :: k
+ !$acc parallel loop
+ do k = 1, n
+ y(idx(k)) = x(k) + beta *y(idx(k))
end do
- end select
+ !$acc end parallel loop
+ end subroutine inner_sctb
+
end subroutine s_oacc_sctb_buf
subroutine s_oacc_sctb_x(i, n, idx, x, beta, y)
@@ -430,24 +460,41 @@ contains
class(psb_i_base_vect_type) :: idx
real(psb_spk_) :: beta, x(:)
class(psb_s_vect_oacc) :: y
- integer(psb_ipk_) :: info, ni
+ integer(psb_ipk_) :: info, ni, k
+ logical :: acc_done
+ acc_done = .false.
select type(ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
- class default
- call psb_errpush(info, 's_oacc_sctb_x')
- return
+ if (y%is_host()) call y%sync()
+ if (acc_is_present(x)) then
+ call inner_sctb(n,x(i:i+n-1),beta,y%v,idx%v(i:i+n-1))
+ acc_done = .true.
+ call y%set_dev()
+ end if
end select
+ if (.not.acc_done) then
+ if (idx%is_dev()) call idx%sync()
+ if (y%is_dev()) call y%sync()
+ do k = 1, n
+ y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + x(k+i-1)
+ end do
+ call y%set_host()
+ end if
- if (y%is_host()) call y%sync()
-
- !$acc parallel loop
- do i = 1, n
- y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i)
- end do
-
- call y%set_dev()
+ contains
+ subroutine inner_sctb(n,x,beta,y,idx)
+ integer(psb_ipk_) :: n, idx(:)
+ real(psb_spk_) :: beta, x(:), y(:)
+ integer(psb_ipk_) :: k
+ !$acc parallel loop
+ do k = 1, n
+ y(idx(k)) = x(k) + beta *y(idx(k))
+ end do
+ !$acc end parallel loop
+ end subroutine inner_sctb
+
end subroutine s_oacc_sctb_x
subroutine s_oacc_sctb(n, idx, x, beta, y)
@@ -463,7 +510,6 @@ contains
if (n == 0) return
if (y%is_dev()) call y%sync()
- !$acc parallel loop
do i = 1, n
y%v(idx(i)) = beta * y%v(idx(i)) + x(i)
end do
@@ -477,30 +523,48 @@ contains
integer(psb_ipk_) :: i, n
class(psb_i_base_vect_type) :: idx
class(psb_s_vect_oacc) :: x
- integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: info,k
+ logical :: acc_done
info = 0
+ acc_done = .false.
+
if (.not.allocated(x%combuf)) then
call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
return
end if
- select type(ii => idx)
+ select type (ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
- class default
- call psb_errpush(info, 's_oacc_gthzbuf')
- return
+ if (x%is_host()) call x%sync()
+ call inner_gth(n,x%v,x%combuf(i:i+n-1),ii%v(i:i+n-1))
+ acc_done = .true.
end select
- if (x%is_host()) call x%sync()
+ if (.not.acc_done) then
+ if (idx%is_dev()) call idx%sync()
+ if (x%is_dev()) call x%sync()
+ do k = 1, n
+ x%combuf(k+i-1) = x%v(idx%v(k+i-1))
+ end do
+ end if
- !$acc parallel loop
- do i = 1, n
- x%combuf(i) = x%v(idx%v(i))
- end do
+ contains
+ subroutine inner_gth(n,x,y,idx)
+ integer(psb_ipk_) :: n, idx(:)
+ real(psb_spk_) :: x(:), y(:)
+ integer(psb_ipk_) :: k
+
+ !$acc parallel loop present(y)
+ do k = 1, n
+ y(k) = x(idx(k))
+ end do
+ !$acc end parallel loop
+ !$acc update self(y)
+ end subroutine inner_gth
end subroutine s_oacc_gthzbuf
-
+
subroutine s_oacc_gthzv_x(i, n, idx, x, y)
use psb_base_mod
implicit none
@@ -508,24 +572,41 @@ contains
class(psb_i_base_vect_type):: idx
real(psb_spk_) :: y(:)
class(psb_s_vect_oacc):: x
- integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: info, k
+ logical :: acc_done
info = 0
-
- select type(ii => idx)
+ acc_done = .false.
+ select type (ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
- class default
- call psb_errpush(info, 's_oacc_gthzv_x')
- return
+ if (x%is_host()) call x%sync()
+ if (acc_is_present(y)) then
+ call inner_gth(n,x%v,y(i:),ii%v(i:))
+ acc_done=.true.
+ end if
end select
-
- if (x%is_host()) call x%sync()
-
- !$acc parallel loop
- do i = 1, n
- y(i) = x%v(idx%v(i))
- end do
+ if (.not.acc_done) then
+ if (x%is_dev()) call x%sync()
+ if (idx%is_dev()) call idx%sync()
+ do k = 1, n
+ y(k+i-1) = x%v(idx%v(k+i-1))
+ !write(0,*) 'oa gthzv ',k+i-1,idx%v(k+i-1),k,y(k)
+ end do
+ end if
+ contains
+ subroutine inner_gth(n,x,y,idx)
+ integer(psb_ipk_) :: n, idx(:)
+ real(psb_spk_) :: x(:), y(:)
+ integer(psb_ipk_) :: k
+
+ !$acc parallel loop present(y)
+ do k = 1, n
+ y(k) = x(idx(k))
+ end do
+ !$acc end parallel loop
+ !$acc update self(y)
+ end subroutine inner_gth
end subroutine s_oacc_gthzv_x
subroutine s_oacc_ins_v(n, irl, val, dupl, x, info)
@@ -718,7 +799,7 @@ contains
integer(psb_ipk_) :: info
res = szero
- !write(0,*) 'dot_v'
+!!$ write(0,*) 'oacc_dot_v'
select type(yy => y)
type is (psb_s_base_vect_type)
if (x%is_dev()) call x%sync()
@@ -762,6 +843,17 @@ contains
end function s_oacc_dot_a
+ subroutine s_oacc_new_buffer(n,x,info)
+ implicit none
+ class(psb_s_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(in) :: n
+ integer(psb_ipk_), intent(out) :: info
+ if (n /= psb_size(x%combuf)) then
+ call x%psb_s_base_vect_type%new_buffer(n,info)
+ !$acc enter data copyin(x%combuf)
+ end if
+ end subroutine s_oacc_new_buffer
+
subroutine s_oacc_sync_dev_space(x)
implicit none
class(psb_s_vect_oacc), intent(inout) :: x
@@ -860,12 +952,33 @@ contains
class(psb_s_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
- if (allocated(x%v)) then
- if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
- deallocate(x%v, stat=info)
- end if
+ if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
+ if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
+ call x%psb_s_base_vect_type%free(info)
end subroutine s_oacc_vect_free
+
+ subroutine s_oacc_vect_maybe_free_buffer(x,info)
+ implicit none
+ class(psb_s_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(out) :: info
+
+ info = 0
+ if (psb_oacc_get_maybe_free_buffer())&
+ & call x%free_buffer(info)
+ end subroutine s_oacc_vect_maybe_free_buffer
+
+ subroutine s_oacc_vect_free_buffer(x,info)
+ implicit none
+ class(psb_s_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(out) :: info
+
+ info = 0
+ if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
+ call x%psb_s_base_vect_type%free_buffer(info)
+
+ end subroutine s_oacc_vect_free_buffer
+
function s_oacc_get_size(x) result(res)
implicit none
class(psb_s_vect_oacc), intent(inout) :: x
diff --git a/openacc/psb_z_oacc_vect_mod.F90 b/openacc/psb_z_oacc_vect_mod.F90
index 9e6bbb2d..86107c31 100644
--- a/openacc/psb_z_oacc_vect_mod.F90
+++ b/openacc/psb_z_oacc_vect_mod.F90
@@ -3,6 +3,8 @@ module psb_z_oacc_vect_mod
use openacc
use psb_const_mod
use psb_error_mod
+ use psb_realloc_mod
+ use psb_oacc_env_mod
use psb_z_vect_mod
use psb_i_vect_mod
use psb_i_oacc_vect_mod
@@ -26,6 +28,8 @@ module psb_z_oacc_vect_mod
procedure, pass(x) :: bld_x => z_oacc_bld_x
procedure, pass(x) :: bld_mn => z_oacc_bld_mn
procedure, pass(x) :: free => z_oacc_vect_free
+ procedure, pass(x) :: free_buffer => z_oacc_vect_free_buffer
+ procedure, pass(x) :: maybe_free_buffer => z_oacc_vect_maybe_free_buffer
procedure, pass(x) :: ins_a => z_oacc_ins_a
procedure, pass(x) :: ins_v => z_oacc_ins_v
procedure, pass(x) :: is_host => z_oacc_is_host
@@ -36,11 +40,13 @@ module psb_z_oacc_vect_mod
procedure, pass(x) :: set_sync => z_oacc_set_sync
procedure, pass(x) :: set_scal => z_oacc_set_scal
+ procedure, pass(x) :: new_buffer => z_oacc_new_buffer
procedure, pass(x) :: gthzv_x => z_oacc_gthzv_x
- procedure, pass(x) :: gthzbuf_x => z_oacc_gthzbuf
+ procedure, pass(x) :: gthzbuf => z_oacc_gthzbuf
procedure, pass(y) :: sctb => z_oacc_sctb
procedure, pass(y) :: sctb_x => z_oacc_sctb_x
procedure, pass(y) :: sctb_buf => z_oacc_sctb_buf
+ procedure, nopass :: device_wait => z_oacc_device_wait
procedure, pass(x) :: get_size => z_oacc_get_size
@@ -87,6 +93,11 @@ module psb_z_oacc_vect_mod
contains
+ subroutine z_oacc_device_wait()
+ implicit none
+ call acc_wait_all()
+ end subroutine z_oacc_device_wait
+
subroutine z_oacc_absval1(x)
implicit none
class(psb_z_vect_oacc), intent(inout) :: x
@@ -181,13 +192,17 @@ contains
!$acc parallel loop reduction(max:mx)
do i = 1, n
if (abs(x(i)) > mx) mx = abs(x(i))
- end do
- sum = dzero
- !$acc parallel loop reduction(+:sum)
- do i = 1, n
- sum = sum + abs(x(i)/mx)**2
end do
- res = mx*sqrt(sum)
+ if (mx == dzero) then
+ res = mx
+ else
+ sum = dzero
+ !$acc parallel loop reduction(+:sum)
+ do i = 1, n
+ sum = sum + abs(x(i)/mx)**2
+ end do
+ res = mx*sqrt(sum)
+ end if
end function z_inner_oacc_nrm2
end function z_oacc_nrm2
@@ -398,29 +413,44 @@ contains
class(psb_i_base_vect_type) :: idx
complex(psb_dpk_) :: beta
class(psb_z_vect_oacc) :: y
- integer(psb_ipk_) :: info
-
+ integer(psb_ipk_) :: info, k
+ logical :: acc_done
if (.not.allocated(y%combuf)) then
call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
return
end if
+ acc_done = .false.
select type(ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
if (y%is_host()) call y%sync()
+ !$acc update device(y%combuf)
+ call inner_sctb(n,y%combuf(i:i+n-1),beta,y%v,ii%v(i:i+n-1))
+ call y%set_dev()
+ acc_done = .true.
+ end select
- !$acc parallel loop
- do i = 1, n
- y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i)
+ if (.not.acc_done) then
+ if (idx%is_dev()) call idx%sync()
+ if (y%is_dev()) call y%sync()
+ do k = 1, n
+ y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + y%combuf(k)
end do
+ end if
- class default
- !$acc parallel loop
- do i = 1, n
- y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i)
+ contains
+ subroutine inner_sctb(n,x,beta,y,idx)
+ integer(psb_ipk_) :: n, idx(:)
+ complex(psb_dpk_) :: beta,x(:), y(:)
+ integer(psb_ipk_) :: k
+ !$acc parallel loop
+ do k = 1, n
+ y(idx(k)) = x(k) + beta *y(idx(k))
end do
- end select
+ !$acc end parallel loop
+ end subroutine inner_sctb
+
end subroutine z_oacc_sctb_buf
subroutine z_oacc_sctb_x(i, n, idx, x, beta, y)
@@ -430,24 +460,41 @@ contains
class(psb_i_base_vect_type) :: idx
complex(psb_dpk_) :: beta, x(:)
class(psb_z_vect_oacc) :: y
- integer(psb_ipk_) :: info, ni
+ integer(psb_ipk_) :: info, ni, k
+ logical :: acc_done
+ acc_done = .false.
select type(ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
- class default
- call psb_errpush(info, 'z_oacc_sctb_x')
- return
+ if (y%is_host()) call y%sync()
+ if (acc_is_present(x)) then
+ call inner_sctb(n,x(i:i+n-1),beta,y%v,idx%v(i:i+n-1))
+ acc_done = .true.
+ call y%set_dev()
+ end if
end select
+ if (.not.acc_done) then
+ if (idx%is_dev()) call idx%sync()
+ if (y%is_dev()) call y%sync()
+ do k = 1, n
+ y%v(idx%v(k+i-1)) = beta * y%v(idx%v(k+i-1)) + x(k+i-1)
+ end do
+ call y%set_host()
+ end if
- if (y%is_host()) call y%sync()
-
- !$acc parallel loop
- do i = 1, n
- y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i)
- end do
-
- call y%set_dev()
+ contains
+ subroutine inner_sctb(n,x,beta,y,idx)
+ integer(psb_ipk_) :: n, idx(:)
+ complex(psb_dpk_) :: beta, x(:), y(:)
+ integer(psb_ipk_) :: k
+ !$acc parallel loop
+ do k = 1, n
+ y(idx(k)) = x(k) + beta *y(idx(k))
+ end do
+ !$acc end parallel loop
+ end subroutine inner_sctb
+
end subroutine z_oacc_sctb_x
subroutine z_oacc_sctb(n, idx, x, beta, y)
@@ -463,7 +510,6 @@ contains
if (n == 0) return
if (y%is_dev()) call y%sync()
- !$acc parallel loop
do i = 1, n
y%v(idx(i)) = beta * y%v(idx(i)) + x(i)
end do
@@ -477,30 +523,48 @@ contains
integer(psb_ipk_) :: i, n
class(psb_i_base_vect_type) :: idx
class(psb_z_vect_oacc) :: x
- integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: info,k
+ logical :: acc_done
info = 0
+ acc_done = .false.
+
if (.not.allocated(x%combuf)) then
call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
return
end if
- select type(ii => idx)
+ select type (ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
- class default
- call psb_errpush(info, 'z_oacc_gthzbuf')
- return
+ if (x%is_host()) call x%sync()
+ call inner_gth(n,x%v,x%combuf(i:i+n-1),ii%v(i:i+n-1))
+ acc_done = .true.
end select
- if (x%is_host()) call x%sync()
+ if (.not.acc_done) then
+ if (idx%is_dev()) call idx%sync()
+ if (x%is_dev()) call x%sync()
+ do k = 1, n
+ x%combuf(k+i-1) = x%v(idx%v(k+i-1))
+ end do
+ end if
- !$acc parallel loop
- do i = 1, n
- x%combuf(i) = x%v(idx%v(i))
- end do
+ contains
+ subroutine inner_gth(n,x,y,idx)
+ integer(psb_ipk_) :: n, idx(:)
+ complex(psb_dpk_) :: x(:), y(:)
+ integer(psb_ipk_) :: k
+
+ !$acc parallel loop present(y)
+ do k = 1, n
+ y(k) = x(idx(k))
+ end do
+ !$acc end parallel loop
+ !$acc update self(y)
+ end subroutine inner_gth
end subroutine z_oacc_gthzbuf
-
+
subroutine z_oacc_gthzv_x(i, n, idx, x, y)
use psb_base_mod
implicit none
@@ -508,24 +572,41 @@ contains
class(psb_i_base_vect_type):: idx
complex(psb_dpk_) :: y(:)
class(psb_z_vect_oacc):: x
- integer(psb_ipk_) :: info
+ integer(psb_ipk_) :: info, k
+ logical :: acc_done
info = 0
-
- select type(ii => idx)
+ acc_done = .false.
+ select type (ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
- class default
- call psb_errpush(info, 'z_oacc_gthzv_x')
- return
+ if (x%is_host()) call x%sync()
+ if (acc_is_present(y)) then
+ call inner_gth(n,x%v,y(i:),ii%v(i:))
+ acc_done=.true.
+ end if
end select
-
- if (x%is_host()) call x%sync()
-
- !$acc parallel loop
- do i = 1, n
- y(i) = x%v(idx%v(i))
- end do
+ if (.not.acc_done) then
+ if (x%is_dev()) call x%sync()
+ if (idx%is_dev()) call idx%sync()
+ do k = 1, n
+ y(k+i-1) = x%v(idx%v(k+i-1))
+ !write(0,*) 'oa gthzv ',k+i-1,idx%v(k+i-1),k,y(k)
+ end do
+ end if
+ contains
+ subroutine inner_gth(n,x,y,idx)
+ integer(psb_ipk_) :: n, idx(:)
+ complex(psb_dpk_) :: x(:), y(:)
+ integer(psb_ipk_) :: k
+
+ !$acc parallel loop present(y)
+ do k = 1, n
+ y(k) = x(idx(k))
+ end do
+ !$acc end parallel loop
+ !$acc update self(y)
+ end subroutine inner_gth
end subroutine z_oacc_gthzv_x
subroutine z_oacc_ins_v(n, irl, val, dupl, x, info)
@@ -718,7 +799,7 @@ contains
integer(psb_ipk_) :: info
res = zzero
- !write(0,*) 'dot_v'
+!!$ write(0,*) 'oacc_dot_v'
select type(yy => y)
type is (psb_z_base_vect_type)
if (x%is_dev()) call x%sync()
@@ -762,6 +843,17 @@ contains
end function z_oacc_dot_a
+ subroutine z_oacc_new_buffer(n,x,info)
+ implicit none
+ class(psb_z_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(in) :: n
+ integer(psb_ipk_), intent(out) :: info
+ if (n /= psb_size(x%combuf)) then
+ call x%psb_z_base_vect_type%new_buffer(n,info)
+ !$acc enter data copyin(x%combuf)
+ end if
+ end subroutine z_oacc_new_buffer
+
subroutine z_oacc_sync_dev_space(x)
implicit none
class(psb_z_vect_oacc), intent(inout) :: x
@@ -860,12 +952,33 @@ contains
class(psb_z_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
- if (allocated(x%v)) then
- if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
- deallocate(x%v, stat=info)
- end if
+ if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
+ if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
+ call x%psb_z_base_vect_type%free(info)
end subroutine z_oacc_vect_free
+
+ subroutine z_oacc_vect_maybe_free_buffer(x,info)
+ implicit none
+ class(psb_z_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(out) :: info
+
+ info = 0
+ if (psb_oacc_get_maybe_free_buffer())&
+ & call x%free_buffer(info)
+ end subroutine z_oacc_vect_maybe_free_buffer
+
+ subroutine z_oacc_vect_free_buffer(x,info)
+ implicit none
+ class(psb_z_vect_oacc), intent(inout) :: x
+ integer(psb_ipk_), intent(out) :: info
+
+ info = 0
+ if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
+ call x%psb_z_base_vect_type%free_buffer(info)
+
+ end subroutine z_oacc_vect_free_buffer
+
function z_oacc_get_size(x) result(res)
implicit none
class(psb_z_vect_oacc), intent(inout) :: x
From 096bce08c138df418648aef5fd5abeef1364c455 Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Wed, 11 Sep 2024 12:43:46 +0200
Subject: [PATCH 39/86] Merged changes from V4 OpenACC
---
openacc/psb_c_oacc_csr_mat_mod.F90 | 40 ++++++++++++++-------
openacc/psb_c_oacc_ell_mat_mod.F90 | 48 ++++++++++++++++---------
openacc/psb_c_oacc_hll_mat_mod.F90 | 56 +++++++++++++++++++-----------
openacc/psb_d_oacc_csr_mat_mod.F90 | 40 ++++++++++++++-------
openacc/psb_d_oacc_ell_mat_mod.F90 | 48 ++++++++++++++++---------
openacc/psb_d_oacc_hll_mat_mod.F90 | 56 +++++++++++++++++++-----------
openacc/psb_s_oacc_csr_mat_mod.F90 | 40 ++++++++++++++-------
openacc/psb_s_oacc_ell_mat_mod.F90 | 48 ++++++++++++++++---------
openacc/psb_s_oacc_hll_mat_mod.F90 | 56 +++++++++++++++++++-----------
openacc/psb_z_oacc_csr_mat_mod.F90 | 40 ++++++++++++++-------
openacc/psb_z_oacc_ell_mat_mod.F90 | 48 ++++++++++++++++---------
openacc/psb_z_oacc_hll_mat_mod.F90 | 56 +++++++++++++++++++-----------
12 files changed, 376 insertions(+), 200 deletions(-)
diff --git a/openacc/psb_c_oacc_csr_mat_mod.F90 b/openacc/psb_c_oacc_csr_mat_mod.F90
index 59794335..94edc5e9 100644
--- a/openacc/psb_c_oacc_csr_mat_mod.F90
+++ b/openacc/psb_c_oacc_csr_mat_mod.F90
@@ -139,9 +139,14 @@ contains
class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
- if (allocated(a%val)) call acc_delete_finalize(a%val)
- if (allocated(a%ja)) call acc_delete_finalize(a%ja)
- if (allocated(a%irp)) call acc_delete_finalize(a%irp)
+ !
+ ! Note: at least on GNU, if an array is allocated
+ ! but with size 0, then CREATE,UPDATE and DELETE
+ ! will fail
+ !
+ if (psb_size(a%val)>0) call acc_delete_finalize(a%val)
+ if (psb_size(a%ja)>0) call acc_delete_finalize(a%ja)
+ if (psb_size(a%irp)>0) call acc_delete_finalize(a%irp)
return
end subroutine c_oacc_csr_free_dev_space
@@ -246,9 +251,15 @@ contains
subroutine c_oacc_csr_sync_dev_space(a)
implicit none
class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
- if (allocated(a%val)) call acc_create(a%val)
- if (allocated(a%ja)) call acc_create(a%ja)
- if (allocated(a%irp)) call acc_create(a%irp)
+
+ !
+ ! Note: at least on GNU, if an array is allocated
+ ! but with size 0, then CREATE,UPDATE and DELETE
+ ! will fail
+ !
+ if (psb_size(a%val)>0) call acc_create(a%val)
+ if (psb_size(a%ja)>0) call acc_create(a%ja)
+ if (psb_size(a%irp)>0) call acc_create(a%irp)
end subroutine c_oacc_csr_sync_dev_space
subroutine c_oacc_csr_sync(a)
@@ -258,14 +269,19 @@ contains
integer(psb_ipk_) :: info
tmpa => a
+ !
+ ! Note: at least on GNU, if an array is allocated
+ ! but with size 0, then CREATE,UPDATE and DELETE
+ ! will fail
+ !
if (a%is_dev()) then
- call acc_update_self(a%val)
- call acc_update_self(a%ja)
- call acc_update_self(a%irp)
+ if (psb_size(a%val)>0) call acc_update_self(a%val)
+ if (psb_size(a%ja)>0) call acc_update_self(a%ja)
+ if (psb_size(a%irp)>0) call acc_update_self(a%irp)
else if (a%is_host()) then
- call acc_update_device(a%val)
- call acc_update_device(a%ja)
- call acc_update_device(a%irp)
+ if (psb_size(a%val)>0) call acc_update_device(a%val)
+ if (psb_size(a%ja)>0) call acc_update_device(a%ja)
+ if (psb_size(a%irp)>0) call acc_update_device(a%irp)
end if
call tmpa%set_sync()
end subroutine c_oacc_csr_sync
diff --git a/openacc/psb_c_oacc_ell_mat_mod.F90 b/openacc/psb_c_oacc_ell_mat_mod.F90
index f0559d26..f0b9779b 100644
--- a/openacc/psb_c_oacc_ell_mat_mod.F90
+++ b/openacc/psb_c_oacc_ell_mat_mod.F90
@@ -138,11 +138,15 @@ contains
class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
- if (allocated(a%val)) call acc_delete_finalize(a%val)
- if (allocated(a%ja)) call acc_delete_finalize(a%ja)
- if (allocated(a%irn)) call acc_delete_finalize(a%irn)
- if (allocated(a%idiag)) call acc_delete_finalize(a%idiag)
-
+ !
+ ! Note: at least on GNU, if an array is allocated
+ ! but with size 0, then CREATE,UPDATE and DELETE
+ ! will fail
+ !
+ if (psb_size(a%val)>0) call acc_delete_finalize(a%val)
+ if (psb_size(a%ja)>0) call acc_delete_finalize(a%ja)
+ if (psb_size(a%irn)>0) call acc_delete_finalize(a%irn)
+ if (psb_size(a%idiag)>0) call acc_delete_finalize(a%idiag)
return
end subroutine c_oacc_ell_free_dev_space
@@ -177,10 +181,15 @@ contains
implicit none
class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
- if (allocated(a%val)) call acc_create(a%val)
- if (allocated(a%ja)) call acc_create(a%ja)
- if (allocated(a%irn)) call acc_create(a%irn)
- if (allocated(a%idiag)) call acc_create(a%idiag)
+ !
+ ! Note: at least on GNU, if an array is allocated
+ ! but with size 0, then CREATE,UPDATE and DELETE
+ ! will fail
+ !
+ if (psb_size(a%val)>0) call acc_create(a%val)
+ if (psb_size(a%ja)>0) call acc_create(a%ja)
+ if (psb_size(a%irn)>0) call acc_create(a%irn)
+ if (psb_size(a%idiag)>0) call acc_create(a%idiag)
end subroutine c_oacc_ell_sync_dev_space
function c_oacc_ell_is_host(a) result(res)
@@ -241,16 +250,21 @@ contains
integer(psb_ipk_) :: info
tmpa => a
+ !
+ ! Note: at least on GNU, if an array is allocated
+ ! but with size 0, then CREATE,UPDATE and DELETE
+ ! will fail
+ !
if (a%is_dev()) then
- call acc_update_self(a%val)
- call acc_update_self(a%ja)
- call acc_update_self(a%irn)
- call acc_update_self(a%idiag)
+ if (psb_size(a%val)>0) call acc_update_self(a%val)
+ if (psb_size(a%ja)>0) call acc_update_self(a%ja)
+ if (psb_size(a%irn)>0) call acc_update_self(a%irn)
+ if (psb_size(a%idiag)>0) call acc_update_self(a%idiag)
else if (a%is_host()) then
- call acc_update_device(a%val)
- call acc_update_device(a%ja)
- call acc_update_device(a%irn)
- call acc_update_device(a%idiag)
+ if (psb_size(a%val)>0) call acc_update_device(a%val)
+ if (psb_size(a%ja)>0) call acc_update_device(a%ja)
+ if (psb_size(a%irn)>0) call acc_update_device(a%irn)
+ if (psb_size(a%idiag)>0) call acc_update_device(a%idiag)
end if
call tmpa%set_sync()
end subroutine c_oacc_ell_sync
diff --git a/openacc/psb_c_oacc_hll_mat_mod.F90 b/openacc/psb_c_oacc_hll_mat_mod.F90
index 93436224..98c6a2ee 100644
--- a/openacc/psb_c_oacc_hll_mat_mod.F90
+++ b/openacc/psb_c_oacc_hll_mat_mod.F90
@@ -138,12 +138,16 @@ contains
class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
- if (allocated(a%val)) call acc_delete_finalize(a%val)
- if (allocated(a%ja)) call acc_delete_finalize(a%ja)
- if (allocated(a%irn)) call acc_delete_finalize(a%irn)
- if (allocated(a%idiag)) call acc_delete_finalize(a%idiag)
- if (allocated(a%hkoffs)) call acc_delete_finalize(a%hkoffs)
-
+ !
+ ! Note: at least on GNU, if an array is allocated
+ ! but with size 0, then CREATE,UPDATE and DELETE
+ ! will fail
+ !
+ if (psb_size(a%val)>0) call acc_delete_finalize(a%val)
+ if (psb_size(a%ja)>0) call acc_delete_finalize(a%ja)
+ if (psb_size(a%irn)>0) call acc_delete_finalize(a%irn)
+ if (psb_size(a%idiag)>0) call acc_delete_finalize(a%idiag)
+ if (psb_size(a%hkoffs)>0) call acc_delete_finalize(a%hkoffs)
return
end subroutine c_oacc_hll_free_dev_space
@@ -231,11 +235,16 @@ contains
implicit none
class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
- if (allocated(a%val)) call acc_create(a%val)
- if (allocated(a%ja)) call acc_create(a%ja)
- if (allocated(a%irn)) call acc_create(a%irn)
- if (allocated(a%idiag)) call acc_create(a%idiag)
- if (allocated(a%hkoffs)) call acc_create(a%hkoffs)
+ !
+ ! Note: at least on GNU, if an array is allocated
+ ! but with size 0, then CREATE,UPDATE and DELETE
+ ! will fail
+ !
+ if (psb_size(a%val)>0) call acc_create(a%val)
+ if (psb_size(a%ja)>0) call acc_create(a%ja)
+ if (psb_size(a%irn)>0) call acc_create(a%irn)
+ if (psb_size(a%idiag)>0) call acc_create(a%idiag)
+ if (psb_size(a%hkoffs)>0) call acc_create(a%hkoffs)
end subroutine c_oacc_hll_sync_dev_space
@@ -246,18 +255,23 @@ contains
integer(psb_ipk_) :: info
tmpa => a
+ !
+ ! Note: at least on GNU, if an array is allocated
+ ! but with size 0, then CREATE,UPDATE and DELETE
+ ! will fail
+ !
if (a%is_dev()) then
- call acc_update_self(a%val)
- call acc_update_self(a%ja)
- call acc_update_self(a%irn)
- call acc_update_self(a%idiag)
- call acc_update_self(a%hkoffs)
+ if (psb_size(a%val)>0) call acc_update_self(a%val)
+ if (psb_size(a%ja)>0) call acc_update_self(a%ja)
+ if (psb_size(a%irn)>0) call acc_update_self(a%irn)
+ if (psb_size(a%idiag)>0) call acc_update_self(a%idiag)
+ if (psb_size(a%hkoffs)>0) call acc_update_self(a%hkoffs)
else if (a%is_host()) then
- call acc_update_device(a%val)
- call acc_update_device(a%ja)
- call acc_update_device(a%irn)
- call acc_update_device(a%idiag)
- call acc_update_device(a%hkoffs)
+ if (psb_size(a%val)>0) call acc_update_device(a%val)
+ if (psb_size(a%ja)>0) call acc_update_device(a%ja)
+ if (psb_size(a%irn)>0) call acc_update_device(a%irn)
+ if (psb_size(a%idiag)>0) call acc_update_device(a%idiag)
+ if (psb_size(a%hkoffs)>0) call acc_update_device(a%hkoffs)
end if
call tmpa%set_sync()
end subroutine c_oacc_hll_sync
diff --git a/openacc/psb_d_oacc_csr_mat_mod.F90 b/openacc/psb_d_oacc_csr_mat_mod.F90
index 08b75575..a3119b64 100644
--- a/openacc/psb_d_oacc_csr_mat_mod.F90
+++ b/openacc/psb_d_oacc_csr_mat_mod.F90
@@ -139,9 +139,14 @@ contains
class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
- if (allocated(a%val)) call acc_delete_finalize(a%val)
- if (allocated(a%ja)) call acc_delete_finalize(a%ja)
- if (allocated(a%irp)) call acc_delete_finalize(a%irp)
+ !
+ ! Note: at least on GNU, if an array is allocated
+ ! but with size 0, then CREATE,UPDATE and DELETE
+ ! will fail
+ !
+ if (psb_size(a%val)>0) call acc_delete_finalize(a%val)
+ if (psb_size(a%ja)>0) call acc_delete_finalize(a%ja)
+ if (psb_size(a%irp)>0) call acc_delete_finalize(a%irp)
return
end subroutine d_oacc_csr_free_dev_space
@@ -246,9 +251,15 @@ contains
subroutine d_oacc_csr_sync_dev_space(a)
implicit none
class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
- if (allocated(a%val)) call acc_create(a%val)
- if (allocated(a%ja)) call acc_create(a%ja)
- if (allocated(a%irp)) call acc_create(a%irp)
+
+ !
+ ! Note: at least on GNU, if an array is allocated
+ ! but with size 0, then CREATE,UPDATE and DELETE
+ ! will fail
+ !
+ if (psb_size(a%val)>0) call acc_create(a%val)
+ if (psb_size(a%ja)>0) call acc_create(a%ja)
+ if (psb_size(a%irp)>0) call acc_create(a%irp)
end subroutine d_oacc_csr_sync_dev_space
subroutine d_oacc_csr_sync(a)
@@ -258,14 +269,19 @@ contains
integer(psb_ipk_) :: info
tmpa => a
+ !
+ ! Note: at least on GNU, if an array is allocated
+ ! but with size 0, then CREATE,UPDATE and DELETE
+ ! will fail
+ !
if (a%is_dev()) then
- call acc_update_self(a%val)
- call acc_update_self(a%ja)
- call acc_update_self(a%irp)
+ if (psb_size(a%val)>0) call acc_update_self(a%val)
+ if (psb_size(a%ja)>0) call acc_update_self(a%ja)
+ if (psb_size(a%irp)>0) call acc_update_self(a%irp)
else if (a%is_host()) then
- call acc_update_device(a%val)
- call acc_update_device(a%ja)
- call acc_update_device(a%irp)
+ if (psb_size(a%val)>0) call acc_update_device(a%val)
+ if (psb_size(a%ja)>0) call acc_update_device(a%ja)
+ if (psb_size(a%irp)>0) call acc_update_device(a%irp)
end if
call tmpa%set_sync()
end subroutine d_oacc_csr_sync
diff --git a/openacc/psb_d_oacc_ell_mat_mod.F90 b/openacc/psb_d_oacc_ell_mat_mod.F90
index 3e25f576..3932e286 100644
--- a/openacc/psb_d_oacc_ell_mat_mod.F90
+++ b/openacc/psb_d_oacc_ell_mat_mod.F90
@@ -138,11 +138,15 @@ contains
class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
- if (allocated(a%val)) call acc_delete_finalize(a%val)
- if (allocated(a%ja)) call acc_delete_finalize(a%ja)
- if (allocated(a%irn)) call acc_delete_finalize(a%irn)
- if (allocated(a%idiag)) call acc_delete_finalize(a%idiag)
-
+ !
+ ! Note: at least on GNU, if an array is allocated
+ ! but with size 0, then CREATE,UPDATE and DELETE
+ ! will fail
+ !
+ if (psb_size(a%val)>0) call acc_delete_finalize(a%val)
+ if (psb_size(a%ja)>0) call acc_delete_finalize(a%ja)
+ if (psb_size(a%irn)>0) call acc_delete_finalize(a%irn)
+ if (psb_size(a%idiag)>0) call acc_delete_finalize(a%idiag)
return
end subroutine d_oacc_ell_free_dev_space
@@ -177,10 +181,15 @@ contains
implicit none
class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
- if (allocated(a%val)) call acc_create(a%val)
- if (allocated(a%ja)) call acc_create(a%ja)
- if (allocated(a%irn)) call acc_create(a%irn)
- if (allocated(a%idiag)) call acc_create(a%idiag)
+ !
+ ! Note: at least on GNU, if an array is allocated
+ ! but with size 0, then CREATE,UPDATE and DELETE
+ ! will fail
+ !
+ if (psb_size(a%val)>0) call acc_create(a%val)
+ if (psb_size(a%ja)>0) call acc_create(a%ja)
+ if (psb_size(a%irn)>0) call acc_create(a%irn)
+ if (psb_size(a%idiag)>0) call acc_create(a%idiag)
end subroutine d_oacc_ell_sync_dev_space
function d_oacc_ell_is_host(a) result(res)
@@ -241,16 +250,21 @@ contains
integer(psb_ipk_) :: info
tmpa => a
+ !
+ ! Note: at least on GNU, if an array is allocated
+ ! but with size 0, then CREATE,UPDATE and DELETE
+ ! will fail
+ !
if (a%is_dev()) then
- call acc_update_self(a%val)
- call acc_update_self(a%ja)
- call acc_update_self(a%irn)
- call acc_update_self(a%idiag)
+ if (psb_size(a%val)>0) call acc_update_self(a%val)
+ if (psb_size(a%ja)>0) call acc_update_self(a%ja)
+ if (psb_size(a%irn)>0) call acc_update_self(a%irn)
+ if (psb_size(a%idiag)>0) call acc_update_self(a%idiag)
else if (a%is_host()) then
- call acc_update_device(a%val)
- call acc_update_device(a%ja)
- call acc_update_device(a%irn)
- call acc_update_device(a%idiag)
+ if (psb_size(a%val)>0) call acc_update_device(a%val)
+ if (psb_size(a%ja)>0) call acc_update_device(a%ja)
+ if (psb_size(a%irn)>0) call acc_update_device(a%irn)
+ if (psb_size(a%idiag)>0) call acc_update_device(a%idiag)
end if
call tmpa%set_sync()
end subroutine d_oacc_ell_sync
diff --git a/openacc/psb_d_oacc_hll_mat_mod.F90 b/openacc/psb_d_oacc_hll_mat_mod.F90
index 084987e5..fd0fe54f 100644
--- a/openacc/psb_d_oacc_hll_mat_mod.F90
+++ b/openacc/psb_d_oacc_hll_mat_mod.F90
@@ -138,12 +138,16 @@ contains
class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
- if (allocated(a%val)) call acc_delete_finalize(a%val)
- if (allocated(a%ja)) call acc_delete_finalize(a%ja)
- if (allocated(a%irn)) call acc_delete_finalize(a%irn)
- if (allocated(a%idiag)) call acc_delete_finalize(a%idiag)
- if (allocated(a%hkoffs)) call acc_delete_finalize(a%hkoffs)
-
+ !
+ ! Note: at least on GNU, if an array is allocated
+ ! but with size 0, then CREATE,UPDATE and DELETE
+ ! will fail
+ !
+ if (psb_size(a%val)>0) call acc_delete_finalize(a%val)
+ if (psb_size(a%ja)>0) call acc_delete_finalize(a%ja)
+ if (psb_size(a%irn)>0) call acc_delete_finalize(a%irn)
+ if (psb_size(a%idiag)>0) call acc_delete_finalize(a%idiag)
+ if (psb_size(a%hkoffs)>0) call acc_delete_finalize(a%hkoffs)
return
end subroutine d_oacc_hll_free_dev_space
@@ -231,11 +235,16 @@ contains
implicit none
class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
- if (allocated(a%val)) call acc_create(a%val)
- if (allocated(a%ja)) call acc_create(a%ja)
- if (allocated(a%irn)) call acc_create(a%irn)
- if (allocated(a%idiag)) call acc_create(a%idiag)
- if (allocated(a%hkoffs)) call acc_create(a%hkoffs)
+ !
+ ! Note: at least on GNU, if an array is allocated
+ ! but with size 0, then CREATE,UPDATE and DELETE
+ ! will fail
+ !
+ if (psb_size(a%val)>0) call acc_create(a%val)
+ if (psb_size(a%ja)>0) call acc_create(a%ja)
+ if (psb_size(a%irn)>0) call acc_create(a%irn)
+ if (psb_size(a%idiag)>0) call acc_create(a%idiag)
+ if (psb_size(a%hkoffs)>0) call acc_create(a%hkoffs)
end subroutine d_oacc_hll_sync_dev_space
@@ -246,18 +255,23 @@ contains
integer(psb_ipk_) :: info
tmpa => a
+ !
+ ! Note: at least on GNU, if an array is allocated
+ ! but with size 0, then CREATE,UPDATE and DELETE
+ ! will fail
+ !
if (a%is_dev()) then
- call acc_update_self(a%val)
- call acc_update_self(a%ja)
- call acc_update_self(a%irn)
- call acc_update_self(a%idiag)
- call acc_update_self(a%hkoffs)
+ if (psb_size(a%val)>0) call acc_update_self(a%val)
+ if (psb_size(a%ja)>0) call acc_update_self(a%ja)
+ if (psb_size(a%irn)>0) call acc_update_self(a%irn)
+ if (psb_size(a%idiag)>0) call acc_update_self(a%idiag)
+ if (psb_size(a%hkoffs)>0) call acc_update_self(a%hkoffs)
else if (a%is_host()) then
- call acc_update_device(a%val)
- call acc_update_device(a%ja)
- call acc_update_device(a%irn)
- call acc_update_device(a%idiag)
- call acc_update_device(a%hkoffs)
+ if (psb_size(a%val)>0) call acc_update_device(a%val)
+ if (psb_size(a%ja)>0) call acc_update_device(a%ja)
+ if (psb_size(a%irn)>0) call acc_update_device(a%irn)
+ if (psb_size(a%idiag)>0) call acc_update_device(a%idiag)
+ if (psb_size(a%hkoffs)>0) call acc_update_device(a%hkoffs)
end if
call tmpa%set_sync()
end subroutine d_oacc_hll_sync
diff --git a/openacc/psb_s_oacc_csr_mat_mod.F90 b/openacc/psb_s_oacc_csr_mat_mod.F90
index 16668d25..5eaf80f7 100644
--- a/openacc/psb_s_oacc_csr_mat_mod.F90
+++ b/openacc/psb_s_oacc_csr_mat_mod.F90
@@ -139,9 +139,14 @@ contains
class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
- if (allocated(a%val)) call acc_delete_finalize(a%val)
- if (allocated(a%ja)) call acc_delete_finalize(a%ja)
- if (allocated(a%irp)) call acc_delete_finalize(a%irp)
+ !
+ ! Note: at least on GNU, if an array is allocated
+ ! but with size 0, then CREATE,UPDATE and DELETE
+ ! will fail
+ !
+ if (psb_size(a%val)>0) call acc_delete_finalize(a%val)
+ if (psb_size(a%ja)>0) call acc_delete_finalize(a%ja)
+ if (psb_size(a%irp)>0) call acc_delete_finalize(a%irp)
return
end subroutine s_oacc_csr_free_dev_space
@@ -246,9 +251,15 @@ contains
subroutine s_oacc_csr_sync_dev_space(a)
implicit none
class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
- if (allocated(a%val)) call acc_create(a%val)
- if (allocated(a%ja)) call acc_create(a%ja)
- if (allocated(a%irp)) call acc_create(a%irp)
+
+ !
+ ! Note: at least on GNU, if an array is allocated
+ ! but with size 0, then CREATE,UPDATE and DELETE
+ ! will fail
+ !
+ if (psb_size(a%val)>0) call acc_create(a%val)
+ if (psb_size(a%ja)>0) call acc_create(a%ja)
+ if (psb_size(a%irp)>0) call acc_create(a%irp)
end subroutine s_oacc_csr_sync_dev_space
subroutine s_oacc_csr_sync(a)
@@ -258,14 +269,19 @@ contains
integer(psb_ipk_) :: info
tmpa => a
+ !
+ ! Note: at least on GNU, if an array is allocated
+ ! but with size 0, then CREATE,UPDATE and DELETE
+ ! will fail
+ !
if (a%is_dev()) then
- call acc_update_self(a%val)
- call acc_update_self(a%ja)
- call acc_update_self(a%irp)
+ if (psb_size(a%val)>0) call acc_update_self(a%val)
+ if (psb_size(a%ja)>0) call acc_update_self(a%ja)
+ if (psb_size(a%irp)>0) call acc_update_self(a%irp)
else if (a%is_host()) then
- call acc_update_device(a%val)
- call acc_update_device(a%ja)
- call acc_update_device(a%irp)
+ if (psb_size(a%val)>0) call acc_update_device(a%val)
+ if (psb_size(a%ja)>0) call acc_update_device(a%ja)
+ if (psb_size(a%irp)>0) call acc_update_device(a%irp)
end if
call tmpa%set_sync()
end subroutine s_oacc_csr_sync
diff --git a/openacc/psb_s_oacc_ell_mat_mod.F90 b/openacc/psb_s_oacc_ell_mat_mod.F90
index dcfc1850..56775879 100644
--- a/openacc/psb_s_oacc_ell_mat_mod.F90
+++ b/openacc/psb_s_oacc_ell_mat_mod.F90
@@ -138,11 +138,15 @@ contains
class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
- if (allocated(a%val)) call acc_delete_finalize(a%val)
- if (allocated(a%ja)) call acc_delete_finalize(a%ja)
- if (allocated(a%irn)) call acc_delete_finalize(a%irn)
- if (allocated(a%idiag)) call acc_delete_finalize(a%idiag)
-
+ !
+ ! Note: at least on GNU, if an array is allocated
+ ! but with size 0, then CREATE,UPDATE and DELETE
+ ! will fail
+ !
+ if (psb_size(a%val)>0) call acc_delete_finalize(a%val)
+ if (psb_size(a%ja)>0) call acc_delete_finalize(a%ja)
+ if (psb_size(a%irn)>0) call acc_delete_finalize(a%irn)
+ if (psb_size(a%idiag)>0) call acc_delete_finalize(a%idiag)
return
end subroutine s_oacc_ell_free_dev_space
@@ -177,10 +181,15 @@ contains
implicit none
class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
- if (allocated(a%val)) call acc_create(a%val)
- if (allocated(a%ja)) call acc_create(a%ja)
- if (allocated(a%irn)) call acc_create(a%irn)
- if (allocated(a%idiag)) call acc_create(a%idiag)
+ !
+ ! Note: at least on GNU, if an array is allocated
+ ! but with size 0, then CREATE,UPDATE and DELETE
+ ! will fail
+ !
+ if (psb_size(a%val)>0) call acc_create(a%val)
+ if (psb_size(a%ja)>0) call acc_create(a%ja)
+ if (psb_size(a%irn)>0) call acc_create(a%irn)
+ if (psb_size(a%idiag)>0) call acc_create(a%idiag)
end subroutine s_oacc_ell_sync_dev_space
function s_oacc_ell_is_host(a) result(res)
@@ -241,16 +250,21 @@ contains
integer(psb_ipk_) :: info
tmpa => a
+ !
+ ! Note: at least on GNU, if an array is allocated
+ ! but with size 0, then CREATE,UPDATE and DELETE
+ ! will fail
+ !
if (a%is_dev()) then
- call acc_update_self(a%val)
- call acc_update_self(a%ja)
- call acc_update_self(a%irn)
- call acc_update_self(a%idiag)
+ if (psb_size(a%val)>0) call acc_update_self(a%val)
+ if (psb_size(a%ja)>0) call acc_update_self(a%ja)
+ if (psb_size(a%irn)>0) call acc_update_self(a%irn)
+ if (psb_size(a%idiag)>0) call acc_update_self(a%idiag)
else if (a%is_host()) then
- call acc_update_device(a%val)
- call acc_update_device(a%ja)
- call acc_update_device(a%irn)
- call acc_update_device(a%idiag)
+ if (psb_size(a%val)>0) call acc_update_device(a%val)
+ if (psb_size(a%ja)>0) call acc_update_device(a%ja)
+ if (psb_size(a%irn)>0) call acc_update_device(a%irn)
+ if (psb_size(a%idiag)>0) call acc_update_device(a%idiag)
end if
call tmpa%set_sync()
end subroutine s_oacc_ell_sync
diff --git a/openacc/psb_s_oacc_hll_mat_mod.F90 b/openacc/psb_s_oacc_hll_mat_mod.F90
index 0e7362f2..997433a1 100644
--- a/openacc/psb_s_oacc_hll_mat_mod.F90
+++ b/openacc/psb_s_oacc_hll_mat_mod.F90
@@ -138,12 +138,16 @@ contains
class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
- if (allocated(a%val)) call acc_delete_finalize(a%val)
- if (allocated(a%ja)) call acc_delete_finalize(a%ja)
- if (allocated(a%irn)) call acc_delete_finalize(a%irn)
- if (allocated(a%idiag)) call acc_delete_finalize(a%idiag)
- if (allocated(a%hkoffs)) call acc_delete_finalize(a%hkoffs)
-
+ !
+ ! Note: at least on GNU, if an array is allocated
+ ! but with size 0, then CREATE,UPDATE and DELETE
+ ! will fail
+ !
+ if (psb_size(a%val)>0) call acc_delete_finalize(a%val)
+ if (psb_size(a%ja)>0) call acc_delete_finalize(a%ja)
+ if (psb_size(a%irn)>0) call acc_delete_finalize(a%irn)
+ if (psb_size(a%idiag)>0) call acc_delete_finalize(a%idiag)
+ if (psb_size(a%hkoffs)>0) call acc_delete_finalize(a%hkoffs)
return
end subroutine s_oacc_hll_free_dev_space
@@ -231,11 +235,16 @@ contains
implicit none
class(psb_s_oacc_hll_sparse_mat), intent(inout) :: a
- if (allocated(a%val)) call acc_create(a%val)
- if (allocated(a%ja)) call acc_create(a%ja)
- if (allocated(a%irn)) call acc_create(a%irn)
- if (allocated(a%idiag)) call acc_create(a%idiag)
- if (allocated(a%hkoffs)) call acc_create(a%hkoffs)
+ !
+ ! Note: at least on GNU, if an array is allocated
+ ! but with size 0, then CREATE,UPDATE and DELETE
+ ! will fail
+ !
+ if (psb_size(a%val)>0) call acc_create(a%val)
+ if (psb_size(a%ja)>0) call acc_create(a%ja)
+ if (psb_size(a%irn)>0) call acc_create(a%irn)
+ if (psb_size(a%idiag)>0) call acc_create(a%idiag)
+ if (psb_size(a%hkoffs)>0) call acc_create(a%hkoffs)
end subroutine s_oacc_hll_sync_dev_space
@@ -246,18 +255,23 @@ contains
integer(psb_ipk_) :: info
tmpa => a
+ !
+ ! Note: at least on GNU, if an array is allocated
+ ! but with size 0, then CREATE,UPDATE and DELETE
+ ! will fail
+ !
if (a%is_dev()) then
- call acc_update_self(a%val)
- call acc_update_self(a%ja)
- call acc_update_self(a%irn)
- call acc_update_self(a%idiag)
- call acc_update_self(a%hkoffs)
+ if (psb_size(a%val)>0) call acc_update_self(a%val)
+ if (psb_size(a%ja)>0) call acc_update_self(a%ja)
+ if (psb_size(a%irn)>0) call acc_update_self(a%irn)
+ if (psb_size(a%idiag)>0) call acc_update_self(a%idiag)
+ if (psb_size(a%hkoffs)>0) call acc_update_self(a%hkoffs)
else if (a%is_host()) then
- call acc_update_device(a%val)
- call acc_update_device(a%ja)
- call acc_update_device(a%irn)
- call acc_update_device(a%idiag)
- call acc_update_device(a%hkoffs)
+ if (psb_size(a%val)>0) call acc_update_device(a%val)
+ if (psb_size(a%ja)>0) call acc_update_device(a%ja)
+ if (psb_size(a%irn)>0) call acc_update_device(a%irn)
+ if (psb_size(a%idiag)>0) call acc_update_device(a%idiag)
+ if (psb_size(a%hkoffs)>0) call acc_update_device(a%hkoffs)
end if
call tmpa%set_sync()
end subroutine s_oacc_hll_sync
diff --git a/openacc/psb_z_oacc_csr_mat_mod.F90 b/openacc/psb_z_oacc_csr_mat_mod.F90
index fdb59b4b..ed92373d 100644
--- a/openacc/psb_z_oacc_csr_mat_mod.F90
+++ b/openacc/psb_z_oacc_csr_mat_mod.F90
@@ -139,9 +139,14 @@ contains
class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
- if (allocated(a%val)) call acc_delete_finalize(a%val)
- if (allocated(a%ja)) call acc_delete_finalize(a%ja)
- if (allocated(a%irp)) call acc_delete_finalize(a%irp)
+ !
+ ! Note: at least on GNU, if an array is allocated
+ ! but with size 0, then CREATE,UPDATE and DELETE
+ ! will fail
+ !
+ if (psb_size(a%val)>0) call acc_delete_finalize(a%val)
+ if (psb_size(a%ja)>0) call acc_delete_finalize(a%ja)
+ if (psb_size(a%irp)>0) call acc_delete_finalize(a%irp)
return
end subroutine z_oacc_csr_free_dev_space
@@ -246,9 +251,15 @@ contains
subroutine z_oacc_csr_sync_dev_space(a)
implicit none
class(psb_z_oacc_csr_sparse_mat), intent(inout) :: a
- if (allocated(a%val)) call acc_create(a%val)
- if (allocated(a%ja)) call acc_create(a%ja)
- if (allocated(a%irp)) call acc_create(a%irp)
+
+ !
+ ! Note: at least on GNU, if an array is allocated
+ ! but with size 0, then CREATE,UPDATE and DELETE
+ ! will fail
+ !
+ if (psb_size(a%val)>0) call acc_create(a%val)
+ if (psb_size(a%ja)>0) call acc_create(a%ja)
+ if (psb_size(a%irp)>0) call acc_create(a%irp)
end subroutine z_oacc_csr_sync_dev_space
subroutine z_oacc_csr_sync(a)
@@ -258,14 +269,19 @@ contains
integer(psb_ipk_) :: info
tmpa => a
+ !
+ ! Note: at least on GNU, if an array is allocated
+ ! but with size 0, then CREATE,UPDATE and DELETE
+ ! will fail
+ !
if (a%is_dev()) then
- call acc_update_self(a%val)
- call acc_update_self(a%ja)
- call acc_update_self(a%irp)
+ if (psb_size(a%val)>0) call acc_update_self(a%val)
+ if (psb_size(a%ja)>0) call acc_update_self(a%ja)
+ if (psb_size(a%irp)>0) call acc_update_self(a%irp)
else if (a%is_host()) then
- call acc_update_device(a%val)
- call acc_update_device(a%ja)
- call acc_update_device(a%irp)
+ if (psb_size(a%val)>0) call acc_update_device(a%val)
+ if (psb_size(a%ja)>0) call acc_update_device(a%ja)
+ if (psb_size(a%irp)>0) call acc_update_device(a%irp)
end if
call tmpa%set_sync()
end subroutine z_oacc_csr_sync
diff --git a/openacc/psb_z_oacc_ell_mat_mod.F90 b/openacc/psb_z_oacc_ell_mat_mod.F90
index 9d09d43d..d494922f 100644
--- a/openacc/psb_z_oacc_ell_mat_mod.F90
+++ b/openacc/psb_z_oacc_ell_mat_mod.F90
@@ -138,11 +138,15 @@ contains
class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
- if (allocated(a%val)) call acc_delete_finalize(a%val)
- if (allocated(a%ja)) call acc_delete_finalize(a%ja)
- if (allocated(a%irn)) call acc_delete_finalize(a%irn)
- if (allocated(a%idiag)) call acc_delete_finalize(a%idiag)
-
+ !
+ ! Note: at least on GNU, if an array is allocated
+ ! but with size 0, then CREATE,UPDATE and DELETE
+ ! will fail
+ !
+ if (psb_size(a%val)>0) call acc_delete_finalize(a%val)
+ if (psb_size(a%ja)>0) call acc_delete_finalize(a%ja)
+ if (psb_size(a%irn)>0) call acc_delete_finalize(a%irn)
+ if (psb_size(a%idiag)>0) call acc_delete_finalize(a%idiag)
return
end subroutine z_oacc_ell_free_dev_space
@@ -177,10 +181,15 @@ contains
implicit none
class(psb_z_oacc_ell_sparse_mat), intent(inout) :: a
- if (allocated(a%val)) call acc_create(a%val)
- if (allocated(a%ja)) call acc_create(a%ja)
- if (allocated(a%irn)) call acc_create(a%irn)
- if (allocated(a%idiag)) call acc_create(a%idiag)
+ !
+ ! Note: at least on GNU, if an array is allocated
+ ! but with size 0, then CREATE,UPDATE and DELETE
+ ! will fail
+ !
+ if (psb_size(a%val)>0) call acc_create(a%val)
+ if (psb_size(a%ja)>0) call acc_create(a%ja)
+ if (psb_size(a%irn)>0) call acc_create(a%irn)
+ if (psb_size(a%idiag)>0) call acc_create(a%idiag)
end subroutine z_oacc_ell_sync_dev_space
function z_oacc_ell_is_host(a) result(res)
@@ -241,16 +250,21 @@ contains
integer(psb_ipk_) :: info
tmpa => a
+ !
+ ! Note: at least on GNU, if an array is allocated
+ ! but with size 0, then CREATE,UPDATE and DELETE
+ ! will fail
+ !
if (a%is_dev()) then
- call acc_update_self(a%val)
- call acc_update_self(a%ja)
- call acc_update_self(a%irn)
- call acc_update_self(a%idiag)
+ if (psb_size(a%val)>0) call acc_update_self(a%val)
+ if (psb_size(a%ja)>0) call acc_update_self(a%ja)
+ if (psb_size(a%irn)>0) call acc_update_self(a%irn)
+ if (psb_size(a%idiag)>0) call acc_update_self(a%idiag)
else if (a%is_host()) then
- call acc_update_device(a%val)
- call acc_update_device(a%ja)
- call acc_update_device(a%irn)
- call acc_update_device(a%idiag)
+ if (psb_size(a%val)>0) call acc_update_device(a%val)
+ if (psb_size(a%ja)>0) call acc_update_device(a%ja)
+ if (psb_size(a%irn)>0) call acc_update_device(a%irn)
+ if (psb_size(a%idiag)>0) call acc_update_device(a%idiag)
end if
call tmpa%set_sync()
end subroutine z_oacc_ell_sync
diff --git a/openacc/psb_z_oacc_hll_mat_mod.F90 b/openacc/psb_z_oacc_hll_mat_mod.F90
index 9eb7d08c..07739348 100644
--- a/openacc/psb_z_oacc_hll_mat_mod.F90
+++ b/openacc/psb_z_oacc_hll_mat_mod.F90
@@ -138,12 +138,16 @@ contains
class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
- if (allocated(a%val)) call acc_delete_finalize(a%val)
- if (allocated(a%ja)) call acc_delete_finalize(a%ja)
- if (allocated(a%irn)) call acc_delete_finalize(a%irn)
- if (allocated(a%idiag)) call acc_delete_finalize(a%idiag)
- if (allocated(a%hkoffs)) call acc_delete_finalize(a%hkoffs)
-
+ !
+ ! Note: at least on GNU, if an array is allocated
+ ! but with size 0, then CREATE,UPDATE and DELETE
+ ! will fail
+ !
+ if (psb_size(a%val)>0) call acc_delete_finalize(a%val)
+ if (psb_size(a%ja)>0) call acc_delete_finalize(a%ja)
+ if (psb_size(a%irn)>0) call acc_delete_finalize(a%irn)
+ if (psb_size(a%idiag)>0) call acc_delete_finalize(a%idiag)
+ if (psb_size(a%hkoffs)>0) call acc_delete_finalize(a%hkoffs)
return
end subroutine z_oacc_hll_free_dev_space
@@ -231,11 +235,16 @@ contains
implicit none
class(psb_z_oacc_hll_sparse_mat), intent(inout) :: a
- if (allocated(a%val)) call acc_create(a%val)
- if (allocated(a%ja)) call acc_create(a%ja)
- if (allocated(a%irn)) call acc_create(a%irn)
- if (allocated(a%idiag)) call acc_create(a%idiag)
- if (allocated(a%hkoffs)) call acc_create(a%hkoffs)
+ !
+ ! Note: at least on GNU, if an array is allocated
+ ! but with size 0, then CREATE,UPDATE and DELETE
+ ! will fail
+ !
+ if (psb_size(a%val)>0) call acc_create(a%val)
+ if (psb_size(a%ja)>0) call acc_create(a%ja)
+ if (psb_size(a%irn)>0) call acc_create(a%irn)
+ if (psb_size(a%idiag)>0) call acc_create(a%idiag)
+ if (psb_size(a%hkoffs)>0) call acc_create(a%hkoffs)
end subroutine z_oacc_hll_sync_dev_space
@@ -246,18 +255,23 @@ contains
integer(psb_ipk_) :: info
tmpa => a
+ !
+ ! Note: at least on GNU, if an array is allocated
+ ! but with size 0, then CREATE,UPDATE and DELETE
+ ! will fail
+ !
if (a%is_dev()) then
- call acc_update_self(a%val)
- call acc_update_self(a%ja)
- call acc_update_self(a%irn)
- call acc_update_self(a%idiag)
- call acc_update_self(a%hkoffs)
+ if (psb_size(a%val)>0) call acc_update_self(a%val)
+ if (psb_size(a%ja)>0) call acc_update_self(a%ja)
+ if (psb_size(a%irn)>0) call acc_update_self(a%irn)
+ if (psb_size(a%idiag)>0) call acc_update_self(a%idiag)
+ if (psb_size(a%hkoffs)>0) call acc_update_self(a%hkoffs)
else if (a%is_host()) then
- call acc_update_device(a%val)
- call acc_update_device(a%ja)
- call acc_update_device(a%irn)
- call acc_update_device(a%idiag)
- call acc_update_device(a%hkoffs)
+ if (psb_size(a%val)>0) call acc_update_device(a%val)
+ if (psb_size(a%ja)>0) call acc_update_device(a%ja)
+ if (psb_size(a%irn)>0) call acc_update_device(a%irn)
+ if (psb_size(a%idiag)>0) call acc_update_device(a%idiag)
+ if (psb_size(a%hkoffs)>0) call acc_update_device(a%hkoffs)
end if
call tmpa%set_sync()
end subroutine z_oacc_hll_sync
From e5504ddddc39bb4d2f6dbf1a1fcc6bcb6af58653 Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Thu, 12 Sep 2024 15:08:21 +0200
Subject: [PATCH 40/86] Fix memory traffic in GTH/SCT
---
openacc/psb_c_oacc_vect_mod.F90 | 7 ++++---
openacc/psb_d_oacc_vect_mod.F90 | 7 ++++---
openacc/psb_i_oacc_vect_mod.F90 | 7 ++++---
openacc/psb_l_oacc_vect_mod.F90 | 7 ++++---
openacc/psb_s_oacc_vect_mod.F90 | 7 ++++---
openacc/psb_z_oacc_vect_mod.F90 | 7 ++++---
6 files changed, 24 insertions(+), 18 deletions(-)
diff --git a/openacc/psb_c_oacc_vect_mod.F90 b/openacc/psb_c_oacc_vect_mod.F90
index 4e2cca9e..e479f58d 100644
--- a/openacc/psb_c_oacc_vect_mod.F90
+++ b/openacc/psb_c_oacc_vect_mod.F90
@@ -425,7 +425,6 @@ contains
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
if (y%is_host()) call y%sync()
- !$acc update device(y%combuf)
call inner_sctb(n,y%combuf(i:i+n-1),beta,y%v,ii%v(i:i+n-1))
call y%set_dev()
acc_done = .true.
@@ -444,6 +443,7 @@ contains
integer(psb_ipk_) :: n, idx(:)
complex(psb_spk_) :: beta,x(:), y(:)
integer(psb_ipk_) :: k
+ !$acc update device(x(1:n)) async
!$acc parallel loop
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
@@ -488,6 +488,7 @@ contains
integer(psb_ipk_) :: n, idx(:)
complex(psb_spk_) :: beta, x(:), y(:)
integer(psb_ipk_) :: k
+ !$acc update device(x(1:n)) async
!$acc parallel loop
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
@@ -561,7 +562,7 @@ contains
y(k) = x(idx(k))
end do
!$acc end parallel loop
- !$acc update self(y)
+ !$acc update self(y(1:n)) async
end subroutine inner_gth
end subroutine c_oacc_gthzbuf
@@ -605,7 +606,7 @@ contains
y(k) = x(idx(k))
end do
!$acc end parallel loop
- !$acc update self(y)
+ !$acc update self(y(1:n)) async
end subroutine inner_gth
end subroutine c_oacc_gthzv_x
diff --git a/openacc/psb_d_oacc_vect_mod.F90 b/openacc/psb_d_oacc_vect_mod.F90
index 80ac35f7..7fd2a441 100644
--- a/openacc/psb_d_oacc_vect_mod.F90
+++ b/openacc/psb_d_oacc_vect_mod.F90
@@ -425,7 +425,6 @@ contains
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
if (y%is_host()) call y%sync()
- !$acc update device(y%combuf)
call inner_sctb(n,y%combuf(i:i+n-1),beta,y%v,ii%v(i:i+n-1))
call y%set_dev()
acc_done = .true.
@@ -444,6 +443,7 @@ contains
integer(psb_ipk_) :: n, idx(:)
real(psb_dpk_) :: beta,x(:), y(:)
integer(psb_ipk_) :: k
+ !$acc update device(x(1:n)) async
!$acc parallel loop
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
@@ -488,6 +488,7 @@ contains
integer(psb_ipk_) :: n, idx(:)
real(psb_dpk_) :: beta, x(:), y(:)
integer(psb_ipk_) :: k
+ !$acc update device(x(1:n)) async
!$acc parallel loop
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
@@ -561,7 +562,7 @@ contains
y(k) = x(idx(k))
end do
!$acc end parallel loop
- !$acc update self(y)
+ !$acc update self(y(1:n)) async
end subroutine inner_gth
end subroutine d_oacc_gthzbuf
@@ -605,7 +606,7 @@ contains
y(k) = x(idx(k))
end do
!$acc end parallel loop
- !$acc update self(y)
+ !$acc update self(y(1:n)) async
end subroutine inner_gth
end subroutine d_oacc_gthzv_x
diff --git a/openacc/psb_i_oacc_vect_mod.F90 b/openacc/psb_i_oacc_vect_mod.F90
index cfd0c210..455453a1 100644
--- a/openacc/psb_i_oacc_vect_mod.F90
+++ b/openacc/psb_i_oacc_vect_mod.F90
@@ -79,7 +79,6 @@ contains
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
if (y%is_host()) call y%sync()
- !$acc update device(y%combuf)
call inner_sctb(n,y%combuf(i:i+n-1),beta,y%v,ii%v(i:i+n-1))
call y%set_dev()
acc_done = .true.
@@ -98,6 +97,7 @@ contains
integer(psb_ipk_) :: n, idx(:)
integer(psb_ipk_) :: beta,x(:), y(:)
integer(psb_ipk_) :: k
+ !$acc update device(x(1:n)) async
!$acc parallel loop
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
@@ -142,6 +142,7 @@ contains
integer(psb_ipk_) :: n, idx(:)
integer(psb_ipk_) :: beta, x(:), y(:)
integer(psb_ipk_) :: k
+ !$acc update device(x(1:n)) async
!$acc parallel loop
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
@@ -215,7 +216,7 @@ contains
y(k) = x(idx(k))
end do
!$acc end parallel loop
- !$acc update self(y)
+ !$acc update self(y(1:n)) async
end subroutine inner_gth
end subroutine i_oacc_gthzbuf
@@ -259,7 +260,7 @@ contains
y(k) = x(idx(k))
end do
!$acc end parallel loop
- !$acc update self(y)
+ !$acc update self(y(1:n)) async
end subroutine inner_gth
end subroutine i_oacc_gthzv_x
diff --git a/openacc/psb_l_oacc_vect_mod.F90 b/openacc/psb_l_oacc_vect_mod.F90
index 5526796f..d35e9141 100644
--- a/openacc/psb_l_oacc_vect_mod.F90
+++ b/openacc/psb_l_oacc_vect_mod.F90
@@ -81,7 +81,6 @@ contains
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
if (y%is_host()) call y%sync()
- !$acc update device(y%combuf)
call inner_sctb(n,y%combuf(i:i+n-1),beta,y%v,ii%v(i:i+n-1))
call y%set_dev()
acc_done = .true.
@@ -100,6 +99,7 @@ contains
integer(psb_ipk_) :: n, idx(:)
integer(psb_lpk_) :: beta,x(:), y(:)
integer(psb_ipk_) :: k
+ !$acc update device(x(1:n)) async
!$acc parallel loop
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
@@ -144,6 +144,7 @@ contains
integer(psb_ipk_) :: n, idx(:)
integer(psb_lpk_) :: beta, x(:), y(:)
integer(psb_ipk_) :: k
+ !$acc update device(x(1:n)) async
!$acc parallel loop
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
@@ -217,7 +218,7 @@ contains
y(k) = x(idx(k))
end do
!$acc end parallel loop
- !$acc update self(y)
+ !$acc update self(y(1:n)) async
end subroutine inner_gth
end subroutine l_oacc_gthzbuf
@@ -261,7 +262,7 @@ contains
y(k) = x(idx(k))
end do
!$acc end parallel loop
- !$acc update self(y)
+ !$acc update self(y(1:n)) async
end subroutine inner_gth
end subroutine l_oacc_gthzv_x
diff --git a/openacc/psb_s_oacc_vect_mod.F90 b/openacc/psb_s_oacc_vect_mod.F90
index b80108ab..87eeccea 100644
--- a/openacc/psb_s_oacc_vect_mod.F90
+++ b/openacc/psb_s_oacc_vect_mod.F90
@@ -425,7 +425,6 @@ contains
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
if (y%is_host()) call y%sync()
- !$acc update device(y%combuf)
call inner_sctb(n,y%combuf(i:i+n-1),beta,y%v,ii%v(i:i+n-1))
call y%set_dev()
acc_done = .true.
@@ -444,6 +443,7 @@ contains
integer(psb_ipk_) :: n, idx(:)
real(psb_spk_) :: beta,x(:), y(:)
integer(psb_ipk_) :: k
+ !$acc update device(x(1:n)) async
!$acc parallel loop
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
@@ -488,6 +488,7 @@ contains
integer(psb_ipk_) :: n, idx(:)
real(psb_spk_) :: beta, x(:), y(:)
integer(psb_ipk_) :: k
+ !$acc update device(x(1:n)) async
!$acc parallel loop
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
@@ -561,7 +562,7 @@ contains
y(k) = x(idx(k))
end do
!$acc end parallel loop
- !$acc update self(y)
+ !$acc update self(y(1:n)) async
end subroutine inner_gth
end subroutine s_oacc_gthzbuf
@@ -605,7 +606,7 @@ contains
y(k) = x(idx(k))
end do
!$acc end parallel loop
- !$acc update self(y)
+ !$acc update self(y(1:n)) async
end subroutine inner_gth
end subroutine s_oacc_gthzv_x
diff --git a/openacc/psb_z_oacc_vect_mod.F90 b/openacc/psb_z_oacc_vect_mod.F90
index 86107c31..0fe1adaa 100644
--- a/openacc/psb_z_oacc_vect_mod.F90
+++ b/openacc/psb_z_oacc_vect_mod.F90
@@ -425,7 +425,6 @@ contains
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync()
if (y%is_host()) call y%sync()
- !$acc update device(y%combuf)
call inner_sctb(n,y%combuf(i:i+n-1),beta,y%v,ii%v(i:i+n-1))
call y%set_dev()
acc_done = .true.
@@ -444,6 +443,7 @@ contains
integer(psb_ipk_) :: n, idx(:)
complex(psb_dpk_) :: beta,x(:), y(:)
integer(psb_ipk_) :: k
+ !$acc update device(x(1:n)) async
!$acc parallel loop
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
@@ -488,6 +488,7 @@ contains
integer(psb_ipk_) :: n, idx(:)
complex(psb_dpk_) :: beta, x(:), y(:)
integer(psb_ipk_) :: k
+ !$acc update device(x(1:n)) async
!$acc parallel loop
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
@@ -561,7 +562,7 @@ contains
y(k) = x(idx(k))
end do
!$acc end parallel loop
- !$acc update self(y)
+ !$acc update self(y(1:n)) async
end subroutine inner_gth
end subroutine z_oacc_gthzbuf
@@ -605,7 +606,7 @@ contains
y(k) = x(idx(k))
end do
!$acc end parallel loop
- !$acc update self(y)
+ !$acc update self(y(1:n)) async
end subroutine inner_gth
end subroutine z_oacc_gthzv_x
From 8ab5cef448a9dfb6759bdc37e65a48f0bcd3626e Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Fri, 4 Oct 2024 08:37:22 +0200
Subject: [PATCH 41/86] OpenACC environment fixes
---
openacc/psb_oacc_env_mod.F90 | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/openacc/psb_oacc_env_mod.F90 b/openacc/psb_oacc_env_mod.F90
index dc01ad3a..6d810f74 100644
--- a/openacc/psb_oacc_env_mod.F90
+++ b/openacc/psb_oacc_env_mod.F90
@@ -18,7 +18,7 @@ contains
subroutine psb_oacc_init(ctxt, dev)
type(psb_ctxt_type), intent(in) :: ctxt
integer, intent(in), optional :: dev
-
+ oacc_do_maybe_free_buffer = .false.
end subroutine psb_oacc_init
subroutine psb_oacc_exit()
From 108d544fc13ca17f04ebe9769fd77b35d1833a3e Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Fri, 4 Oct 2024 08:37:45 +0200
Subject: [PATCH 42/86] Fix clean_zeros to always keep the diagonal
---
base/serial/impl/psb_c_coo_impl.F90 | 8 ++++++--
base/serial/impl/psb_c_csc_impl.F90 | 6 +++++-
base/serial/impl/psb_c_csr_impl.F90 | 6 +++++-
base/serial/impl/psb_d_coo_impl.F90 | 8 ++++++--
base/serial/impl/psb_d_csc_impl.F90 | 6 +++++-
base/serial/impl/psb_d_csr_impl.F90 | 6 +++++-
base/serial/impl/psb_s_coo_impl.F90 | 8 ++++++--
base/serial/impl/psb_s_csc_impl.F90 | 6 +++++-
base/serial/impl/psb_s_csr_impl.F90 | 6 +++++-
base/serial/impl/psb_z_coo_impl.F90 | 8 ++++++--
base/serial/impl/psb_z_csc_impl.F90 | 6 +++++-
base/serial/impl/psb_z_csr_impl.F90 | 6 +++++-
12 files changed, 64 insertions(+), 16 deletions(-)
diff --git a/base/serial/impl/psb_c_coo_impl.F90 b/base/serial/impl/psb_c_coo_impl.F90
index ccf7e42d..2a6aaf13 100644
--- a/base/serial/impl/psb_c_coo_impl.F90
+++ b/base/serial/impl/psb_c_coo_impl.F90
@@ -595,12 +595,16 @@ subroutine psb_c_coo_clean_zeros(a, info)
integer(psb_ipk_), intent(out) :: info
!
integer(psb_ipk_) :: i,j,k, nzin
-
+ logical :: cpy
+
info = 0
nzin = a%get_nzeros()
j = 0
do i=1, nzin
- if (a%val(i) /= czero) then
+ cpy = (a%val(i) /= czero)
+ ! Always keep the diagonal, even if numerically zero
+ if (.not.cpy) cpy = (a%ia(i) == a%ja(i))
+ if (cpy) then
j = j + 1
a%val(j) = a%val(i)
a%ia(j) = a%ia(i)
diff --git a/base/serial/impl/psb_c_csc_impl.F90 b/base/serial/impl/psb_c_csc_impl.F90
index fe7227dd..f8ab1025 100644
--- a/base/serial/impl/psb_c_csc_impl.F90
+++ b/base/serial/impl/psb_c_csc_impl.F90
@@ -2412,6 +2412,7 @@ subroutine psb_c_csc_clean_zeros(a, info)
!
integer(psb_ipk_) :: i, j, k, nc
integer(psb_ipk_), allocatable :: ilcp(:)
+ logical :: cpy
info = 0
call a%sync()
@@ -2421,7 +2422,10 @@ subroutine psb_c_csc_clean_zeros(a, info)
j = a%icp(1)
do i=1, nc
do k = ilcp(i), ilcp(i+1) -1
- if (a%val(k) /= czero) then
+ cpy = (a%val(k) /= czero)
+ ! Always keep the diagonal, even if numerically zero
+ if (.not.cpy) cpy = (i == a%ia(k))
+ if (cpy) then
a%val(j) = a%val(k)
a%ia(j) = a%ia(k)
j = j + 1
diff --git a/base/serial/impl/psb_c_csr_impl.F90 b/base/serial/impl/psb_c_csr_impl.F90
index 276d3d1c..d3494f46 100644
--- a/base/serial/impl/psb_c_csr_impl.F90
+++ b/base/serial/impl/psb_c_csr_impl.F90
@@ -3633,6 +3633,7 @@ subroutine psb_c_csr_clean_zeros(a, info)
!
integer(psb_ipk_) :: i, j, k, nr
integer(psb_ipk_), allocatable :: ilrp(:)
+ logical :: cpy
info = 0
call a%sync()
@@ -3642,7 +3643,10 @@ subroutine psb_c_csr_clean_zeros(a, info)
j = a%irp(1)
do i=1, nr
do k = ilrp(i), ilrp(i+1) -1
- if (a%val(k) /= czero) then
+ cpy = (a%val(k) /= czero)
+ ! Always keep the diagonal, even if numerically zero
+ if (.not.cpy) cpy = (i == a%ja(k))
+ if (cpy) then
a%val(j) = a%val(k)
a%ja(j) = a%ja(k)
j = j + 1
diff --git a/base/serial/impl/psb_d_coo_impl.F90 b/base/serial/impl/psb_d_coo_impl.F90
index beb438aa..15d4c4ad 100644
--- a/base/serial/impl/psb_d_coo_impl.F90
+++ b/base/serial/impl/psb_d_coo_impl.F90
@@ -595,12 +595,16 @@ subroutine psb_d_coo_clean_zeros(a, info)
integer(psb_ipk_), intent(out) :: info
!
integer(psb_ipk_) :: i,j,k, nzin
-
+ logical :: cpy
+
info = 0
nzin = a%get_nzeros()
j = 0
do i=1, nzin
- if (a%val(i) /= dzero) then
+ cpy = (a%val(i) /= dzero)
+ ! Always keep the diagonal, even if numerically zero
+ if (.not.cpy) cpy = (a%ia(i) == a%ja(i))
+ if (cpy) then
j = j + 1
a%val(j) = a%val(i)
a%ia(j) = a%ia(i)
diff --git a/base/serial/impl/psb_d_csc_impl.F90 b/base/serial/impl/psb_d_csc_impl.F90
index d9fa2874..92a4c555 100644
--- a/base/serial/impl/psb_d_csc_impl.F90
+++ b/base/serial/impl/psb_d_csc_impl.F90
@@ -2412,6 +2412,7 @@ subroutine psb_d_csc_clean_zeros(a, info)
!
integer(psb_ipk_) :: i, j, k, nc
integer(psb_ipk_), allocatable :: ilcp(:)
+ logical :: cpy
info = 0
call a%sync()
@@ -2421,7 +2422,10 @@ subroutine psb_d_csc_clean_zeros(a, info)
j = a%icp(1)
do i=1, nc
do k = ilcp(i), ilcp(i+1) -1
- if (a%val(k) /= dzero) then
+ cpy = (a%val(k) /= dzero)
+ ! Always keep the diagonal, even if numerically zero
+ if (.not.cpy) cpy = (i == a%ia(k))
+ if (cpy) then
a%val(j) = a%val(k)
a%ia(j) = a%ia(k)
j = j + 1
diff --git a/base/serial/impl/psb_d_csr_impl.F90 b/base/serial/impl/psb_d_csr_impl.F90
index a0aaeeee..09dffe0e 100644
--- a/base/serial/impl/psb_d_csr_impl.F90
+++ b/base/serial/impl/psb_d_csr_impl.F90
@@ -3633,6 +3633,7 @@ subroutine psb_d_csr_clean_zeros(a, info)
!
integer(psb_ipk_) :: i, j, k, nr
integer(psb_ipk_), allocatable :: ilrp(:)
+ logical :: cpy
info = 0
call a%sync()
@@ -3642,7 +3643,10 @@ subroutine psb_d_csr_clean_zeros(a, info)
j = a%irp(1)
do i=1, nr
do k = ilrp(i), ilrp(i+1) -1
- if (a%val(k) /= dzero) then
+ cpy = (a%val(k) /= dzero)
+ ! Always keep the diagonal, even if numerically zero
+ if (.not.cpy) cpy = (i == a%ja(k))
+ if (cpy) then
a%val(j) = a%val(k)
a%ja(j) = a%ja(k)
j = j + 1
diff --git a/base/serial/impl/psb_s_coo_impl.F90 b/base/serial/impl/psb_s_coo_impl.F90
index d833cf96..217b39c2 100644
--- a/base/serial/impl/psb_s_coo_impl.F90
+++ b/base/serial/impl/psb_s_coo_impl.F90
@@ -595,12 +595,16 @@ subroutine psb_s_coo_clean_zeros(a, info)
integer(psb_ipk_), intent(out) :: info
!
integer(psb_ipk_) :: i,j,k, nzin
-
+ logical :: cpy
+
info = 0
nzin = a%get_nzeros()
j = 0
do i=1, nzin
- if (a%val(i) /= szero) then
+ cpy = (a%val(i) /= szero)
+ ! Always keep the diagonal, even if numerically zero
+ if (.not.cpy) cpy = (a%ia(i) == a%ja(i))
+ if (cpy) then
j = j + 1
a%val(j) = a%val(i)
a%ia(j) = a%ia(i)
diff --git a/base/serial/impl/psb_s_csc_impl.F90 b/base/serial/impl/psb_s_csc_impl.F90
index c135c9ef..ffa32f8f 100644
--- a/base/serial/impl/psb_s_csc_impl.F90
+++ b/base/serial/impl/psb_s_csc_impl.F90
@@ -2412,6 +2412,7 @@ subroutine psb_s_csc_clean_zeros(a, info)
!
integer(psb_ipk_) :: i, j, k, nc
integer(psb_ipk_), allocatable :: ilcp(:)
+ logical :: cpy
info = 0
call a%sync()
@@ -2421,7 +2422,10 @@ subroutine psb_s_csc_clean_zeros(a, info)
j = a%icp(1)
do i=1, nc
do k = ilcp(i), ilcp(i+1) -1
- if (a%val(k) /= szero) then
+ cpy = (a%val(k) /= szero)
+ ! Always keep the diagonal, even if numerically zero
+ if (.not.cpy) cpy = (i == a%ia(k))
+ if (cpy) then
a%val(j) = a%val(k)
a%ia(j) = a%ia(k)
j = j + 1
diff --git a/base/serial/impl/psb_s_csr_impl.F90 b/base/serial/impl/psb_s_csr_impl.F90
index 9d5dc145..5e18de7b 100644
--- a/base/serial/impl/psb_s_csr_impl.F90
+++ b/base/serial/impl/psb_s_csr_impl.F90
@@ -3633,6 +3633,7 @@ subroutine psb_s_csr_clean_zeros(a, info)
!
integer(psb_ipk_) :: i, j, k, nr
integer(psb_ipk_), allocatable :: ilrp(:)
+ logical :: cpy
info = 0
call a%sync()
@@ -3642,7 +3643,10 @@ subroutine psb_s_csr_clean_zeros(a, info)
j = a%irp(1)
do i=1, nr
do k = ilrp(i), ilrp(i+1) -1
- if (a%val(k) /= szero) then
+ cpy = (a%val(k) /= szero)
+ ! Always keep the diagonal, even if numerically zero
+ if (.not.cpy) cpy = (i == a%ja(k))
+ if (cpy) then
a%val(j) = a%val(k)
a%ja(j) = a%ja(k)
j = j + 1
diff --git a/base/serial/impl/psb_z_coo_impl.F90 b/base/serial/impl/psb_z_coo_impl.F90
index ea6b2492..89950c4c 100644
--- a/base/serial/impl/psb_z_coo_impl.F90
+++ b/base/serial/impl/psb_z_coo_impl.F90
@@ -595,12 +595,16 @@ subroutine psb_z_coo_clean_zeros(a, info)
integer(psb_ipk_), intent(out) :: info
!
integer(psb_ipk_) :: i,j,k, nzin
-
+ logical :: cpy
+
info = 0
nzin = a%get_nzeros()
j = 0
do i=1, nzin
- if (a%val(i) /= zzero) then
+ cpy = (a%val(i) /= zzero)
+ ! Always keep the diagonal, even if numerically zero
+ if (.not.cpy) cpy = (a%ia(i) == a%ja(i))
+ if (cpy) then
j = j + 1
a%val(j) = a%val(i)
a%ia(j) = a%ia(i)
diff --git a/base/serial/impl/psb_z_csc_impl.F90 b/base/serial/impl/psb_z_csc_impl.F90
index becb7003..a37e04e5 100644
--- a/base/serial/impl/psb_z_csc_impl.F90
+++ b/base/serial/impl/psb_z_csc_impl.F90
@@ -2412,6 +2412,7 @@ subroutine psb_z_csc_clean_zeros(a, info)
!
integer(psb_ipk_) :: i, j, k, nc
integer(psb_ipk_), allocatable :: ilcp(:)
+ logical :: cpy
info = 0
call a%sync()
@@ -2421,7 +2422,10 @@ subroutine psb_z_csc_clean_zeros(a, info)
j = a%icp(1)
do i=1, nc
do k = ilcp(i), ilcp(i+1) -1
- if (a%val(k) /= zzero) then
+ cpy = (a%val(k) /= zzero)
+ ! Always keep the diagonal, even if numerically zero
+ if (.not.cpy) cpy = (i == a%ia(k))
+ if (cpy) then
a%val(j) = a%val(k)
a%ia(j) = a%ia(k)
j = j + 1
diff --git a/base/serial/impl/psb_z_csr_impl.F90 b/base/serial/impl/psb_z_csr_impl.F90
index 7f11c3bd..57d31f80 100644
--- a/base/serial/impl/psb_z_csr_impl.F90
+++ b/base/serial/impl/psb_z_csr_impl.F90
@@ -3633,6 +3633,7 @@ subroutine psb_z_csr_clean_zeros(a, info)
!
integer(psb_ipk_) :: i, j, k, nr
integer(psb_ipk_), allocatable :: ilrp(:)
+ logical :: cpy
info = 0
call a%sync()
@@ -3642,7 +3643,10 @@ subroutine psb_z_csr_clean_zeros(a, info)
j = a%irp(1)
do i=1, nr
do k = ilrp(i), ilrp(i+1) -1
- if (a%val(k) /= zzero) then
+ cpy = (a%val(k) /= zzero)
+ ! Always keep the diagonal, even if numerically zero
+ if (.not.cpy) cpy = (i == a%ja(k))
+ if (cpy) then
a%val(j) = a%val(k)
a%ja(j) = a%ja(k)
j = j + 1
From 1c235f928189169a9318d9027fd78f33740149ff Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Fri, 4 Oct 2024 10:28:52 +0200
Subject: [PATCH 43/86] Improve clean_zeros
---
base/serial/impl/psb_c_coo_impl.F90 | 20 +++++++++++++++-----
base/serial/impl/psb_c_csc_impl.F90 | 20 +++++++++++++++-----
base/serial/impl/psb_c_csr_impl.F90 | 20 +++++++++++++++-----
base/serial/impl/psb_d_coo_impl.F90 | 20 +++++++++++++++-----
base/serial/impl/psb_d_csc_impl.F90 | 20 +++++++++++++++-----
base/serial/impl/psb_d_csr_impl.F90 | 20 +++++++++++++++-----
base/serial/impl/psb_s_coo_impl.F90 | 20 +++++++++++++++-----
base/serial/impl/psb_s_csc_impl.F90 | 20 +++++++++++++++-----
base/serial/impl/psb_s_csr_impl.F90 | 20 +++++++++++++++-----
base/serial/impl/psb_z_coo_impl.F90 | 20 +++++++++++++++-----
base/serial/impl/psb_z_csc_impl.F90 | 20 +++++++++++++++-----
base/serial/impl/psb_z_csr_impl.F90 | 20 +++++++++++++++-----
12 files changed, 180 insertions(+), 60 deletions(-)
diff --git a/base/serial/impl/psb_c_coo_impl.F90 b/base/serial/impl/psb_c_coo_impl.F90
index 2a6aaf13..5b681d22 100644
--- a/base/serial/impl/psb_c_coo_impl.F90
+++ b/base/serial/impl/psb_c_coo_impl.F90
@@ -601,9 +601,12 @@ subroutine psb_c_coo_clean_zeros(a, info)
nzin = a%get_nzeros()
j = 0
do i=1, nzin
- cpy = (a%val(i) /= czero)
- ! Always keep the diagonal, even if numerically zero
- if (.not.cpy) cpy = (a%ia(i) == a%ja(i))
+ if (a%val(i) /= czero) then
+ cpy = .true.
+ else
+ ! Always keep the diagonal, even if numerically zero
+ cpy = (a%ia(i) == a%ja(i))
+ end if
if (cpy) then
j = j + 1
a%val(j) = a%val(i)
@@ -5930,12 +5933,19 @@ subroutine psb_lc_coo_clean_zeros(a, info)
integer(psb_ipk_), intent(out) :: info
!
integer(psb_lpk_) :: i,j,k, nzin
-
+ logical :: cpy
+
info = 0
nzin = a%get_nzeros()
j = 0
do i=1, nzin
- if (a%val(i) /= czero) then
+ if (a%val(i) /= czero) then
+ cpy = .true.
+ else
+ ! Always keep the diagonal, even if numerically zero
+ cpy = (a%ia(i) == a%ja(i))
+ end if
+ if (cpy) then
j = j + 1
a%val(j) = a%val(i)
a%ia(j) = a%ia(i)
diff --git a/base/serial/impl/psb_c_csc_impl.F90 b/base/serial/impl/psb_c_csc_impl.F90
index f8ab1025..190a4d5b 100644
--- a/base/serial/impl/psb_c_csc_impl.F90
+++ b/base/serial/impl/psb_c_csc_impl.F90
@@ -2422,9 +2422,12 @@ subroutine psb_c_csc_clean_zeros(a, info)
j = a%icp(1)
do i=1, nc
do k = ilcp(i), ilcp(i+1) -1
- cpy = (a%val(k) /= czero)
- ! Always keep the diagonal, even if numerically zero
- if (.not.cpy) cpy = (i == a%ia(k))
+ if (a%val(k) /= czero) then
+ cpy = .true.
+ else
+ ! Always keep the diagonal, even if numerically zero
+ cpy = (i == a%ia(k))
+ end if
if (cpy) then
a%val(j) = a%val(k)
a%ia(j) = a%ia(k)
@@ -4317,7 +4320,8 @@ subroutine psb_lc_csc_clean_zeros(a, info)
!
integer(psb_lpk_) :: i, j, k, nc
integer(psb_lpk_), allocatable :: ilcp(:)
-
+ logical :: cpy
+
info = 0
call a%sync()
nc = a%get_ncols()
@@ -4326,7 +4330,13 @@ subroutine psb_lc_csc_clean_zeros(a, info)
j = a%icp(1)
do i=1, nc
do k = ilcp(i), ilcp(i+1) -1
- if (a%val(k) /= czero) then
+ if (a%val(k) /= czero) then
+ cpy = .true.
+ else
+ ! Always keep the diagonal, even if numerically zero
+ cpy = (i == a%ia(k))
+ end if
+ if (cpy) then
a%val(j) = a%val(k)
a%ia(j) = a%ia(k)
j = j + 1
diff --git a/base/serial/impl/psb_c_csr_impl.F90 b/base/serial/impl/psb_c_csr_impl.F90
index d3494f46..0db9f3fa 100644
--- a/base/serial/impl/psb_c_csr_impl.F90
+++ b/base/serial/impl/psb_c_csr_impl.F90
@@ -3643,9 +3643,12 @@ subroutine psb_c_csr_clean_zeros(a, info)
j = a%irp(1)
do i=1, nr
do k = ilrp(i), ilrp(i+1) -1
- cpy = (a%val(k) /= czero)
- ! Always keep the diagonal, even if numerically zero
- if (.not.cpy) cpy = (i == a%ja(k))
+ if (a%val(k) /= czero) then
+ cpy = .true.
+ else
+ ! Always keep the diagonal, even if numerically zero
+ cpy = (i == a%ja(k))
+ end if
if (cpy) then
a%val(j) = a%val(k)
a%ja(j) = a%ja(k)
@@ -6556,7 +6559,8 @@ subroutine psb_lc_csr_clean_zeros(a, info)
!
integer(psb_lpk_) :: i, j, k, nr
integer(psb_lpk_), allocatable :: ilrp(:)
-
+ logical :: cpy
+
info = 0
call a%sync()
nr = a%get_nrows()
@@ -6565,7 +6569,13 @@ subroutine psb_lc_csr_clean_zeros(a, info)
j = a%irp(1)
do i=1, nr
do k = ilrp(i), ilrp(i+1) -1
- if (a%val(k) /= czero) then
+ if (a%val(k) /= czero) then
+ cpy = .true.
+ else
+ ! Always keep the diagonal, even if numerically zero
+ cpy = (i == a%ja(k))
+ end if
+ if (cpy) then
a%val(j) = a%val(k)
a%ja(j) = a%ja(k)
j = j + 1
diff --git a/base/serial/impl/psb_d_coo_impl.F90 b/base/serial/impl/psb_d_coo_impl.F90
index 15d4c4ad..a802775a 100644
--- a/base/serial/impl/psb_d_coo_impl.F90
+++ b/base/serial/impl/psb_d_coo_impl.F90
@@ -601,9 +601,12 @@ subroutine psb_d_coo_clean_zeros(a, info)
nzin = a%get_nzeros()
j = 0
do i=1, nzin
- cpy = (a%val(i) /= dzero)
- ! Always keep the diagonal, even if numerically zero
- if (.not.cpy) cpy = (a%ia(i) == a%ja(i))
+ if (a%val(i) /= dzero) then
+ cpy = .true.
+ else
+ ! Always keep the diagonal, even if numerically zero
+ cpy = (a%ia(i) == a%ja(i))
+ end if
if (cpy) then
j = j + 1
a%val(j) = a%val(i)
@@ -5930,12 +5933,19 @@ subroutine psb_ld_coo_clean_zeros(a, info)
integer(psb_ipk_), intent(out) :: info
!
integer(psb_lpk_) :: i,j,k, nzin
-
+ logical :: cpy
+
info = 0
nzin = a%get_nzeros()
j = 0
do i=1, nzin
- if (a%val(i) /= dzero) then
+ if (a%val(i) /= dzero) then
+ cpy = .true.
+ else
+ ! Always keep the diagonal, even if numerically zero
+ cpy = (a%ia(i) == a%ja(i))
+ end if
+ if (cpy) then
j = j + 1
a%val(j) = a%val(i)
a%ia(j) = a%ia(i)
diff --git a/base/serial/impl/psb_d_csc_impl.F90 b/base/serial/impl/psb_d_csc_impl.F90
index 92a4c555..61e2ad14 100644
--- a/base/serial/impl/psb_d_csc_impl.F90
+++ b/base/serial/impl/psb_d_csc_impl.F90
@@ -2422,9 +2422,12 @@ subroutine psb_d_csc_clean_zeros(a, info)
j = a%icp(1)
do i=1, nc
do k = ilcp(i), ilcp(i+1) -1
- cpy = (a%val(k) /= dzero)
- ! Always keep the diagonal, even if numerically zero
- if (.not.cpy) cpy = (i == a%ia(k))
+ if (a%val(k) /= dzero) then
+ cpy = .true.
+ else
+ ! Always keep the diagonal, even if numerically zero
+ cpy = (i == a%ia(k))
+ end if
if (cpy) then
a%val(j) = a%val(k)
a%ia(j) = a%ia(k)
@@ -4317,7 +4320,8 @@ subroutine psb_ld_csc_clean_zeros(a, info)
!
integer(psb_lpk_) :: i, j, k, nc
integer(psb_lpk_), allocatable :: ilcp(:)
-
+ logical :: cpy
+
info = 0
call a%sync()
nc = a%get_ncols()
@@ -4326,7 +4330,13 @@ subroutine psb_ld_csc_clean_zeros(a, info)
j = a%icp(1)
do i=1, nc
do k = ilcp(i), ilcp(i+1) -1
- if (a%val(k) /= dzero) then
+ if (a%val(k) /= dzero) then
+ cpy = .true.
+ else
+ ! Always keep the diagonal, even if numerically zero
+ cpy = (i == a%ia(k))
+ end if
+ if (cpy) then
a%val(j) = a%val(k)
a%ia(j) = a%ia(k)
j = j + 1
diff --git a/base/serial/impl/psb_d_csr_impl.F90 b/base/serial/impl/psb_d_csr_impl.F90
index 09dffe0e..56ba8c63 100644
--- a/base/serial/impl/psb_d_csr_impl.F90
+++ b/base/serial/impl/psb_d_csr_impl.F90
@@ -3643,9 +3643,12 @@ subroutine psb_d_csr_clean_zeros(a, info)
j = a%irp(1)
do i=1, nr
do k = ilrp(i), ilrp(i+1) -1
- cpy = (a%val(k) /= dzero)
- ! Always keep the diagonal, even if numerically zero
- if (.not.cpy) cpy = (i == a%ja(k))
+ if (a%val(k) /= dzero) then
+ cpy = .true.
+ else
+ ! Always keep the diagonal, even if numerically zero
+ cpy = (i == a%ja(k))
+ end if
if (cpy) then
a%val(j) = a%val(k)
a%ja(j) = a%ja(k)
@@ -6556,7 +6559,8 @@ subroutine psb_ld_csr_clean_zeros(a, info)
!
integer(psb_lpk_) :: i, j, k, nr
integer(psb_lpk_), allocatable :: ilrp(:)
-
+ logical :: cpy
+
info = 0
call a%sync()
nr = a%get_nrows()
@@ -6565,7 +6569,13 @@ subroutine psb_ld_csr_clean_zeros(a, info)
j = a%irp(1)
do i=1, nr
do k = ilrp(i), ilrp(i+1) -1
- if (a%val(k) /= dzero) then
+ if (a%val(k) /= dzero) then
+ cpy = .true.
+ else
+ ! Always keep the diagonal, even if numerically zero
+ cpy = (i == a%ja(k))
+ end if
+ if (cpy) then
a%val(j) = a%val(k)
a%ja(j) = a%ja(k)
j = j + 1
diff --git a/base/serial/impl/psb_s_coo_impl.F90 b/base/serial/impl/psb_s_coo_impl.F90
index 217b39c2..0979ff85 100644
--- a/base/serial/impl/psb_s_coo_impl.F90
+++ b/base/serial/impl/psb_s_coo_impl.F90
@@ -601,9 +601,12 @@ subroutine psb_s_coo_clean_zeros(a, info)
nzin = a%get_nzeros()
j = 0
do i=1, nzin
- cpy = (a%val(i) /= szero)
- ! Always keep the diagonal, even if numerically zero
- if (.not.cpy) cpy = (a%ia(i) == a%ja(i))
+ if (a%val(i) /= szero) then
+ cpy = .true.
+ else
+ ! Always keep the diagonal, even if numerically zero
+ cpy = (a%ia(i) == a%ja(i))
+ end if
if (cpy) then
j = j + 1
a%val(j) = a%val(i)
@@ -5930,12 +5933,19 @@ subroutine psb_ls_coo_clean_zeros(a, info)
integer(psb_ipk_), intent(out) :: info
!
integer(psb_lpk_) :: i,j,k, nzin
-
+ logical :: cpy
+
info = 0
nzin = a%get_nzeros()
j = 0
do i=1, nzin
- if (a%val(i) /= szero) then
+ if (a%val(i) /= szero) then
+ cpy = .true.
+ else
+ ! Always keep the diagonal, even if numerically zero
+ cpy = (a%ia(i) == a%ja(i))
+ end if
+ if (cpy) then
j = j + 1
a%val(j) = a%val(i)
a%ia(j) = a%ia(i)
diff --git a/base/serial/impl/psb_s_csc_impl.F90 b/base/serial/impl/psb_s_csc_impl.F90
index ffa32f8f..ca41d705 100644
--- a/base/serial/impl/psb_s_csc_impl.F90
+++ b/base/serial/impl/psb_s_csc_impl.F90
@@ -2422,9 +2422,12 @@ subroutine psb_s_csc_clean_zeros(a, info)
j = a%icp(1)
do i=1, nc
do k = ilcp(i), ilcp(i+1) -1
- cpy = (a%val(k) /= szero)
- ! Always keep the diagonal, even if numerically zero
- if (.not.cpy) cpy = (i == a%ia(k))
+ if (a%val(k) /= szero) then
+ cpy = .true.
+ else
+ ! Always keep the diagonal, even if numerically zero
+ cpy = (i == a%ia(k))
+ end if
if (cpy) then
a%val(j) = a%val(k)
a%ia(j) = a%ia(k)
@@ -4317,7 +4320,8 @@ subroutine psb_ls_csc_clean_zeros(a, info)
!
integer(psb_lpk_) :: i, j, k, nc
integer(psb_lpk_), allocatable :: ilcp(:)
-
+ logical :: cpy
+
info = 0
call a%sync()
nc = a%get_ncols()
@@ -4326,7 +4330,13 @@ subroutine psb_ls_csc_clean_zeros(a, info)
j = a%icp(1)
do i=1, nc
do k = ilcp(i), ilcp(i+1) -1
- if (a%val(k) /= szero) then
+ if (a%val(k) /= szero) then
+ cpy = .true.
+ else
+ ! Always keep the diagonal, even if numerically zero
+ cpy = (i == a%ia(k))
+ end if
+ if (cpy) then
a%val(j) = a%val(k)
a%ia(j) = a%ia(k)
j = j + 1
diff --git a/base/serial/impl/psb_s_csr_impl.F90 b/base/serial/impl/psb_s_csr_impl.F90
index 5e18de7b..323601ca 100644
--- a/base/serial/impl/psb_s_csr_impl.F90
+++ b/base/serial/impl/psb_s_csr_impl.F90
@@ -3643,9 +3643,12 @@ subroutine psb_s_csr_clean_zeros(a, info)
j = a%irp(1)
do i=1, nr
do k = ilrp(i), ilrp(i+1) -1
- cpy = (a%val(k) /= szero)
- ! Always keep the diagonal, even if numerically zero
- if (.not.cpy) cpy = (i == a%ja(k))
+ if (a%val(k) /= szero) then
+ cpy = .true.
+ else
+ ! Always keep the diagonal, even if numerically zero
+ cpy = (i == a%ja(k))
+ end if
if (cpy) then
a%val(j) = a%val(k)
a%ja(j) = a%ja(k)
@@ -6556,7 +6559,8 @@ subroutine psb_ls_csr_clean_zeros(a, info)
!
integer(psb_lpk_) :: i, j, k, nr
integer(psb_lpk_), allocatable :: ilrp(:)
-
+ logical :: cpy
+
info = 0
call a%sync()
nr = a%get_nrows()
@@ -6565,7 +6569,13 @@ subroutine psb_ls_csr_clean_zeros(a, info)
j = a%irp(1)
do i=1, nr
do k = ilrp(i), ilrp(i+1) -1
- if (a%val(k) /= szero) then
+ if (a%val(k) /= szero) then
+ cpy = .true.
+ else
+ ! Always keep the diagonal, even if numerically zero
+ cpy = (i == a%ja(k))
+ end if
+ if (cpy) then
a%val(j) = a%val(k)
a%ja(j) = a%ja(k)
j = j + 1
diff --git a/base/serial/impl/psb_z_coo_impl.F90 b/base/serial/impl/psb_z_coo_impl.F90
index 89950c4c..74c3f2cb 100644
--- a/base/serial/impl/psb_z_coo_impl.F90
+++ b/base/serial/impl/psb_z_coo_impl.F90
@@ -601,9 +601,12 @@ subroutine psb_z_coo_clean_zeros(a, info)
nzin = a%get_nzeros()
j = 0
do i=1, nzin
- cpy = (a%val(i) /= zzero)
- ! Always keep the diagonal, even if numerically zero
- if (.not.cpy) cpy = (a%ia(i) == a%ja(i))
+ if (a%val(i) /= zzero) then
+ cpy = .true.
+ else
+ ! Always keep the diagonal, even if numerically zero
+ cpy = (a%ia(i) == a%ja(i))
+ end if
if (cpy) then
j = j + 1
a%val(j) = a%val(i)
@@ -5930,12 +5933,19 @@ subroutine psb_lz_coo_clean_zeros(a, info)
integer(psb_ipk_), intent(out) :: info
!
integer(psb_lpk_) :: i,j,k, nzin
-
+ logical :: cpy
+
info = 0
nzin = a%get_nzeros()
j = 0
do i=1, nzin
- if (a%val(i) /= zzero) then
+ if (a%val(i) /= zzero) then
+ cpy = .true.
+ else
+ ! Always keep the diagonal, even if numerically zero
+ cpy = (a%ia(i) == a%ja(i))
+ end if
+ if (cpy) then
j = j + 1
a%val(j) = a%val(i)
a%ia(j) = a%ia(i)
diff --git a/base/serial/impl/psb_z_csc_impl.F90 b/base/serial/impl/psb_z_csc_impl.F90
index a37e04e5..7ceff47f 100644
--- a/base/serial/impl/psb_z_csc_impl.F90
+++ b/base/serial/impl/psb_z_csc_impl.F90
@@ -2422,9 +2422,12 @@ subroutine psb_z_csc_clean_zeros(a, info)
j = a%icp(1)
do i=1, nc
do k = ilcp(i), ilcp(i+1) -1
- cpy = (a%val(k) /= zzero)
- ! Always keep the diagonal, even if numerically zero
- if (.not.cpy) cpy = (i == a%ia(k))
+ if (a%val(k) /= zzero) then
+ cpy = .true.
+ else
+ ! Always keep the diagonal, even if numerically zero
+ cpy = (i == a%ia(k))
+ end if
if (cpy) then
a%val(j) = a%val(k)
a%ia(j) = a%ia(k)
@@ -4317,7 +4320,8 @@ subroutine psb_lz_csc_clean_zeros(a, info)
!
integer(psb_lpk_) :: i, j, k, nc
integer(psb_lpk_), allocatable :: ilcp(:)
-
+ logical :: cpy
+
info = 0
call a%sync()
nc = a%get_ncols()
@@ -4326,7 +4330,13 @@ subroutine psb_lz_csc_clean_zeros(a, info)
j = a%icp(1)
do i=1, nc
do k = ilcp(i), ilcp(i+1) -1
- if (a%val(k) /= zzero) then
+ if (a%val(k) /= zzero) then
+ cpy = .true.
+ else
+ ! Always keep the diagonal, even if numerically zero
+ cpy = (i == a%ia(k))
+ end if
+ if (cpy) then
a%val(j) = a%val(k)
a%ia(j) = a%ia(k)
j = j + 1
diff --git a/base/serial/impl/psb_z_csr_impl.F90 b/base/serial/impl/psb_z_csr_impl.F90
index 57d31f80..54659def 100644
--- a/base/serial/impl/psb_z_csr_impl.F90
+++ b/base/serial/impl/psb_z_csr_impl.F90
@@ -3643,9 +3643,12 @@ subroutine psb_z_csr_clean_zeros(a, info)
j = a%irp(1)
do i=1, nr
do k = ilrp(i), ilrp(i+1) -1
- cpy = (a%val(k) /= zzero)
- ! Always keep the diagonal, even if numerically zero
- if (.not.cpy) cpy = (i == a%ja(k))
+ if (a%val(k) /= zzero) then
+ cpy = .true.
+ else
+ ! Always keep the diagonal, even if numerically zero
+ cpy = (i == a%ja(k))
+ end if
if (cpy) then
a%val(j) = a%val(k)
a%ja(j) = a%ja(k)
@@ -6556,7 +6559,8 @@ subroutine psb_lz_csr_clean_zeros(a, info)
!
integer(psb_lpk_) :: i, j, k, nr
integer(psb_lpk_), allocatable :: ilrp(:)
-
+ logical :: cpy
+
info = 0
call a%sync()
nr = a%get_nrows()
@@ -6565,7 +6569,13 @@ subroutine psb_lz_csr_clean_zeros(a, info)
j = a%irp(1)
do i=1, nr
do k = ilrp(i), ilrp(i+1) -1
- if (a%val(k) /= zzero) then
+ if (a%val(k) /= zzero) then
+ cpy = .true.
+ else
+ ! Always keep the diagonal, even if numerically zero
+ cpy = (i == a%ja(k))
+ end if
+ if (cpy) then
a%val(j) = a%val(k)
a%ja(j) = a%ja(k)
j = j + 1
From 9601a837f59469a7a6c1518edeaca045217ec8d8 Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Fri, 4 Oct 2024 11:18:39 +0200
Subject: [PATCH 44/86] Define --enable-cuda --with-cudadir for CUDA configry
---
config/pac.m4 | 27 ++++++++++++++++++++++++++-
configure | 38 ++++++++++++++++++++++++++++++--------
configure.ac | 4 ++++
3 files changed, 60 insertions(+), 9 deletions(-)
diff --git a/config/pac.m4 b/config/pac.m4
index 83356540..a39256e0 100644
--- a/config/pac.m4
+++ b/config/pac.m4
@@ -2157,6 +2157,31 @@ CPPFLAGS="$SAVE_CPPFLAGS"
+dnl @synopsis PAC_ARG_CUDA
+dnl
+dnl Test for --enable-cuda
+dnl
+dnl
+dnl
+dnl Example use:
+dnl
+dnl
+dnl @author Salvatore Filippone
+dnl
+AC_DEFUN([PAC_ARG_CUDA],
+[AC_MSG_CHECKING([whether we want cuda ])
+AC_ARG_ENABLE(cuda,
+AS_HELP_STRING([--enable-cuda],
+[Specify whether to enable cuda. ]),
+[
+pac_cv_cuda="yes";
+]
+dnl ,
+dnl [pac_cv_cuda="no";]
+ )
+]
+)
+
dnl @synopsis PAC_CHECK_CUDA
dnl
@@ -2173,7 +2198,7 @@ dnl
dnl @author Salvatore Filippone
dnl
AC_DEFUN(PAC_CHECK_CUDA,
-[AC_ARG_WITH(cuda, AC_HELP_STRING([--with-cuda=DIR], [Specify the CUDA install directory.]),
+[AC_ARG_WITH(cudadir, AC_HELP_STRING([--with-cudadir=DIR], [Specify the CUDA install directory.]),
[pac_cv_cuda_dir=$withval],
[pac_cv_cuda_dir=''])
diff --git a/configure b/configure
index 3022ff53..dbc419fe 100755
--- a/configure
+++ b/configure
@@ -849,7 +849,8 @@ with_amd
with_amddir
with_amdincdir
with_amdlibdir
-with_cuda
+enable_cuda
+with_cudadir
with_cudacc
enable_openacc
with_extraopenacc
@@ -1506,6 +1507,7 @@ Optional Features:
--disable-silent-rules verbose build output (undo: "make V=0")
--enable-openmp Specify whether to enable openmp.
--disable-openmp do not use OpenMP
+ --enable-cuda Specify whether to enable cuda.
--enable-openacc Specify whether to enable openacc.
--disable-openacc do not use Openacc
@@ -1550,7 +1552,7 @@ Optional Packages:
--with-amddir=DIR Specify the directory for AMD library and includes.
--with-amdincdir=DIR Specify the directory for AMD includes.
--with-amdlibdir=DIR Specify the directory for AMD library.
- --with-cuda=DIR Specify the CUDA install directory.
+ --with-cudadir=DIR Specify the CUDA install directory.
--with-cudacc A comma-separated list of CCs to compile to, for
example, --with-cudacc=50,60,70,75
--with-extraopenacc additional [EXTRAOPENACC] flags to be added: will
@@ -10628,12 +10630,26 @@ if test "x$psblas_cv_have_amd" == "xyes" ; then
fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether we want cuda " >&5
+printf %s "checking whether we want cuda ... " >&6; }
+# Check whether --enable-cuda was given.
+if test ${enable_cuda+y}
+then :
+ enableval=$enable_cuda;
+pac_cv_cuda="yes";
+
+
+fi
-# Check whether --with-cuda was given.
-if test ${with_cuda+y}
+
+if test "x$pac_cv_cuda" == "xyes"; then
+
+
+# Check whether --with-cudadir was given.
+if test ${with_cudadir+y}
then :
- withval=$with_cuda; pac_cv_cuda_dir=$withval
+ withval=$with_cudadir; pac_cv_cuda_dir=$withval
else $as_nop
pac_cv_cuda_dir=''
fi
@@ -10719,10 +10735,10 @@ SAVE_LIBS="$LIBS"
SAVE_CPPFLAGS="$CPPFLAGS"
if test "x$pac_cv_have_cuda" == "x"; then
-# Check whether --with-cuda was given.
-if test ${with_cuda+y}
+# Check whether --with-cudadir was given.
+if test ${with_cudadir+y}
then :
- withval=$with_cuda; pac_cv_cuda_dir=$withval
+ withval=$with_cudadir; pac_cv_cuda_dir=$withval
else $as_nop
pac_cv_cuda_dir=''
fi
@@ -10884,6 +10900,7 @@ fi
if test "x$pac_cv_ipk_size" != "x4"; then
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size, disabling CUDA/SPGPU" >&5
printf "%s\n" "$as_me: For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size, disabling CUDA/SPGPU" >&6;}
+ pac_cv_cuda="no";
HAVE_CUDA="no";
CUDA_CC="";
SPGPU_LIBS="";
@@ -10893,6 +10910,7 @@ printf "%s\n" "$as_me: For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_
CUDA_INCLUDES="";
CUDA_LIBS="";
fi
+fi
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether we want openacc " >&5
printf %s "checking whether we want openacc ... " >&6; }
@@ -12741,6 +12759,8 @@ fi
OPENACC : ${pac_cv_openacc}
FCOPENACC : ${FCOPENACC}
+ OACCD : ${OACCD}
+ OACCLD : ${OACCLD}
BLAS : ${BLAS_LIBS}
@@ -12777,6 +12797,8 @@ printf "%s\n" "$as_me:
OPENACC : ${pac_cv_openacc}
FCOPENACC : ${FCOPENACC}
+ OACCD : ${OACCD}
+ OACCLD : ${OACCLD}
BLAS : ${BLAS_LIBS}
diff --git a/configure.ac b/configure.ac
index c702d4b1..5966801c 100755
--- a/configure.ac
+++ b/configure.ac
@@ -795,6 +795,8 @@ if test "x$psblas_cv_have_amd" == "xyes" ; then
fi
+PAC_ARG_CUDA()
+if test "x$pac_cv_cuda" == "xyes"; then
PAC_CHECK_CUDA()
@@ -834,6 +836,7 @@ fi
if test "x$pac_cv_ipk_size" != "x4"; then
AC_MSG_NOTICE([For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size, disabling CUDA/SPGPU])
+ pac_cv_cuda="no";
HAVE_CUDA="no";
CUDA_CC="";
SPGPU_LIBS="";
@@ -843,6 +846,7 @@ if test "x$pac_cv_ipk_size" != "x4"; then
CUDA_INCLUDES="";
CUDA_LIBS="";
fi
+fi
PAC_ARG_OPENACC()
dnl AC_ARG_ENABLE([openacc],
From 68f20c0e7a04467f00fcc2b6febbd891a2d61557 Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Mon, 7 Oct 2024 12:44:45 +0200
Subject: [PATCH 45/86] Modify init
---
openacc/psb_oacc_env_mod.F90 | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/openacc/psb_oacc_env_mod.F90 b/openacc/psb_oacc_env_mod.F90
index dc01ad3a..6d810f74 100644
--- a/openacc/psb_oacc_env_mod.F90
+++ b/openacc/psb_oacc_env_mod.F90
@@ -18,7 +18,7 @@ contains
subroutine psb_oacc_init(ctxt, dev)
type(psb_ctxt_type), intent(in) :: ctxt
integer, intent(in), optional :: dev
-
+ oacc_do_maybe_free_buffer = .false.
end subroutine psb_oacc_init
subroutine psb_oacc_exit()
From 740609a4d8527de07a27d647f7c8b973142439aa Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Mon, 7 Oct 2024 12:45:18 +0200
Subject: [PATCH 46/86] Fix present() clauses
---
openacc/psb_c_oacc_vect_mod.F90 | 80 ++++++++++++++++++++-------------
openacc/psb_d_oacc_vect_mod.F90 | 80 ++++++++++++++++++++-------------
openacc/psb_i_oacc_vect_mod.F90 | 58 +++++++++++++++---------
openacc/psb_l_oacc_vect_mod.F90 | 58 +++++++++++++++---------
openacc/psb_s_oacc_vect_mod.F90 | 80 ++++++++++++++++++++-------------
openacc/psb_z_oacc_vect_mod.F90 | 80 ++++++++++++++++++++-------------
6 files changed, 266 insertions(+), 170 deletions(-)
diff --git a/openacc/psb_c_oacc_vect_mod.F90 b/openacc/psb_c_oacc_vect_mod.F90
index e479f58d..40437184 100644
--- a/openacc/psb_c_oacc_vect_mod.F90
+++ b/openacc/psb_c_oacc_vect_mod.F90
@@ -113,7 +113,7 @@ contains
complex(psb_spk_), intent(inout) :: x(:)
integer(psb_ipk_) :: n
integer(psb_ipk_) :: i
- !$acc parallel loop
+ !$acc parallel loop present(x)
do i = 1, n
x(i) = abs(x(i))
end do
@@ -144,7 +144,7 @@ contains
complex(psb_spk_), intent(inout) :: x(:),y(:)
integer(psb_ipk_) :: n
integer(psb_ipk_) :: i
- !$acc parallel loop
+ !$acc parallel loop present(x,y)
do i = 1, n
y(i) = abs(x(i))
end do
@@ -164,7 +164,7 @@ contains
complex(psb_spk_), intent(in) :: alpha
complex(psb_spk_), intent(inout) :: x(:)
integer(psb_ipk_) :: i
- !$acc parallel loop
+ !$acc parallel loop present(x)
do i = 1, size(x)
x(i) = alpha * x(i)
end do
@@ -189,7 +189,7 @@ contains
real(psb_spk_) :: sum, mx
integer(psb_ipk_) :: i
mx = szero
- !$acc parallel loop reduction(max:mx)
+ !$acc parallel loop reduction(max:mx) present(x)
do i = 1, n
if (abs(x(i)) > mx) mx = abs(x(i))
end do
@@ -197,7 +197,7 @@ contains
res = mx
else
sum = szero
- !$acc parallel loop reduction(+:sum)
+ !$acc parallel loop reduction(+:sum) present(x)
do i = 1, n
sum = sum + abs(x(i)/mx)**2
end do
@@ -223,7 +223,7 @@ contains
real(psb_spk_) :: max_val
integer(psb_ipk_) :: i
max_val = szero
- !$acc parallel loop reduction(max:max_val)
+ !$acc parallel loop reduction(max:max_val) present(x)
do i = 1, n
if (abs(x(i)) > max_val) max_val = abs(x(i))
end do
@@ -248,7 +248,7 @@ contains
real(psb_spk_) :: res
integer(psb_ipk_) :: i
res = szero
- !$acc parallel loop reduction(+:res)
+ !$acc parallel loop reduction(+:res) present(x)
do i = 1, n
res = res + abs(x(i))
end do
@@ -265,7 +265,7 @@ contains
info = 0
if (y%is_dev()) call y%sync()
- !$acc parallel loop
+ !$acc parallel loop present(x,y)
do i = 1, size(x)
y%v(i) = y%v(i) * x(i)
end do
@@ -283,7 +283,7 @@ contains
info = 0
if (z%is_dev()) call z%sync()
- !$acc parallel loop
+ !$acc parallel loop present(x,y,z%v)
do i = 1, size(x)
z%v(i) = alpha * x(i) * y(i) + beta * z%v(i)
end do
@@ -327,7 +327,7 @@ contains
complex(psb_spk_), intent(inout) :: y(:)
complex(psb_spk_), intent(in) :: alpha, beta
integer(psb_ipk_), intent(out) :: info
- !$acc parallel
+ !$acc parallel present(x,y)
!$acc loop
do i = 1, m
y(i) = alpha * x(i) + beta * y(i)
@@ -384,7 +384,7 @@ contains
if ((nx < m) .or. (ny < m) .or. (nz < m)) then
info = psb_err_internal_error_
else
- !$acc parallel loop
+ !$acc parallel loop present(xx%v,yy%v,zz%v)
do i = 1, m
yy%v(i) = alpha * xx%v(i) + beta * yy%v(i)
zz%v(i) = gamma * yy%v(i) + delta * zz%v(i)
@@ -416,6 +416,7 @@ contains
integer(psb_ipk_) :: info, k
logical :: acc_done
if (.not.allocated(y%combuf)) then
+ write(0,*) 'allocation error for y%combuf '
call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
return
end if
@@ -443,8 +444,8 @@ contains
integer(psb_ipk_) :: n, idx(:)
complex(psb_spk_) :: beta,x(:), y(:)
integer(psb_ipk_) :: k
- !$acc update device(x(1:n)) async
- !$acc parallel loop
+ !$acc update device(x(1:n))
+ !$acc parallel loop present(x,y)
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
end do
@@ -488,8 +489,8 @@ contains
integer(psb_ipk_) :: n, idx(:)
complex(psb_spk_) :: beta, x(:), y(:)
integer(psb_ipk_) :: k
- !$acc update device(x(1:n)) async
- !$acc parallel loop
+ !$acc update device(x(1:n))
+ !$acc parallel loop present(x,y)
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
end do
@@ -531,6 +532,7 @@ contains
acc_done = .false.
if (.not.allocated(x%combuf)) then
+ write(0,*) 'oacc allocation error combuf gthzbuf '
call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
return
end if
@@ -556,13 +558,13 @@ contains
integer(psb_ipk_) :: n, idx(:)
complex(psb_spk_) :: x(:), y(:)
integer(psb_ipk_) :: k
-
- !$acc parallel loop present(y)
+ !
+ !$acc parallel loop present(x,y)
do k = 1, n
y(k) = x(idx(k))
end do
!$acc end parallel loop
- !$acc update self(y(1:n)) async
+ !$acc update self(y(1:n))
end subroutine inner_gth
end subroutine c_oacc_gthzbuf
@@ -600,13 +602,13 @@ contains
integer(psb_ipk_) :: n, idx(:)
complex(psb_spk_) :: x(:), y(:)
integer(psb_ipk_) :: k
-
- !$acc parallel loop present(y)
+ !
+ !$acc parallel loop present(x,y)
do k = 1, n
y(k) = x(idx(k))
end do
!$acc end parallel loop
- !$acc update self(y(1:n)) async
+ !$acc update self(y(1:n))
end subroutine inner_gth
end subroutine c_oacc_gthzv_x
@@ -633,7 +635,7 @@ contains
if (vval%is_host()) call vval%sync()
if (virl%is_host()) call virl%sync()
if (x%is_host()) call x%sync()
- !$acc parallel loop
+ !$acc parallel loop present(x%v,virl%v,vval%v)
do i = 1, n
x%v(virl%v(i)) = vval%v(i)
end do
@@ -757,7 +759,7 @@ contains
if (present(first)) first_ = max(1, first)
if (present(last)) last_ = min(last, last_)
- !$acc parallel loop
+ !$acc parallel loop present(x%v)
do i = first_, last_
x%v(i) = val
end do
@@ -849,26 +851,36 @@ contains
class(psb_c_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n
integer(psb_ipk_), intent(out) :: info
- if (n /= psb_size(x%combuf)) then
+
+ !write(0,*) 'oacc new_buffer',n,psb_size(x%combuf)
+ if (n > psb_size(x%combuf)) then
+ !write(0,*) 'oacc new_buffer: reallocating '
+ if (allocated(x%combuf)) then
+ !if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
+ !$acc exit data delete(x%combuf)
+ end if
call x%psb_c_base_vect_type%new_buffer(n,info)
!$acc enter data copyin(x%combuf)
+ ! call acc_copyin(x%combuf)
end if
end subroutine c_oacc_new_buffer
subroutine c_oacc_sync_dev_space(x)
implicit none
class(psb_c_vect_oacc), intent(inout) :: x
- if (allocated(x%v)) call acc_create(x%v)
+!!$ write(0,*) 'oacc sync_dev_space'
+ if (psb_size(x%v)>0) call acc_copyin(x%v)
end subroutine c_oacc_sync_dev_space
subroutine c_oacc_sync(x)
implicit none
class(psb_c_vect_oacc), intent(inout) :: x
if (x%is_dev()) then
- call acc_update_self(x%v)
+ if (psb_size(x%v)>0) call acc_update_self(x%v)
end if
if (x%is_host()) then
- call acc_update_device(x%v)
+ if (.not.acc_is_present(x%v)) call c_oacc_sync_dev_space(x)
+ if (psb_size(x%v)>0) call acc_update_device(x%v)
end if
call x%set_sync()
end subroutine c_oacc_sync
@@ -941,6 +953,8 @@ contains
type(psb_c_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
info = 0
+!!$ write(0,*) 'oacc final_vect_free'
+ call x%free_buffer(info)
if (allocated(x%v)) then
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
deallocate(x%v, stat=info)
@@ -953,8 +967,9 @@ contains
class(psb_c_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
+!!$ write(0,*) 'oacc vect_free'
+ call x%free_buffer(info)
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
- if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
call x%psb_c_base_vect_type%free(info)
end subroutine c_oacc_vect_free
@@ -964,8 +979,10 @@ contains
integer(psb_ipk_), intent(out) :: info
info = 0
- if (psb_oacc_get_maybe_free_buffer())&
- & call x%free_buffer(info)
+ if (psb_oacc_get_maybe_free_buffer()) then
+ !write(0,*) 'psb_oacc_get_maybe_free_buffer() ',psb_oacc_get_maybe_free_buffer()
+ call x%free_buffer(info)
+ end if
end subroutine c_oacc_vect_maybe_free_buffer
@@ -973,7 +990,7 @@ contains
implicit none
class(psb_c_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
-
+! write(0,*) 'oacc free_buffer'
info = 0
if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
call x%psb_c_base_vect_type%free_buffer(info)
@@ -985,7 +1002,6 @@ contains
class(psb_c_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: res
- if (x%is_dev()) call x%sync()
res = size(x%v)
end function c_oacc_get_size
diff --git a/openacc/psb_d_oacc_vect_mod.F90 b/openacc/psb_d_oacc_vect_mod.F90
index 7fd2a441..84441c8a 100644
--- a/openacc/psb_d_oacc_vect_mod.F90
+++ b/openacc/psb_d_oacc_vect_mod.F90
@@ -113,7 +113,7 @@ contains
real(psb_dpk_), intent(inout) :: x(:)
integer(psb_ipk_) :: n
integer(psb_ipk_) :: i
- !$acc parallel loop
+ !$acc parallel loop present(x)
do i = 1, n
x(i) = abs(x(i))
end do
@@ -144,7 +144,7 @@ contains
real(psb_dpk_), intent(inout) :: x(:),y(:)
integer(psb_ipk_) :: n
integer(psb_ipk_) :: i
- !$acc parallel loop
+ !$acc parallel loop present(x,y)
do i = 1, n
y(i) = abs(x(i))
end do
@@ -164,7 +164,7 @@ contains
real(psb_dpk_), intent(in) :: alpha
real(psb_dpk_), intent(inout) :: x(:)
integer(psb_ipk_) :: i
- !$acc parallel loop
+ !$acc parallel loop present(x)
do i = 1, size(x)
x(i) = alpha * x(i)
end do
@@ -189,7 +189,7 @@ contains
real(psb_dpk_) :: sum, mx
integer(psb_ipk_) :: i
mx = dzero
- !$acc parallel loop reduction(max:mx)
+ !$acc parallel loop reduction(max:mx) present(x)
do i = 1, n
if (abs(x(i)) > mx) mx = abs(x(i))
end do
@@ -197,7 +197,7 @@ contains
res = mx
else
sum = dzero
- !$acc parallel loop reduction(+:sum)
+ !$acc parallel loop reduction(+:sum) present(x)
do i = 1, n
sum = sum + abs(x(i)/mx)**2
end do
@@ -223,7 +223,7 @@ contains
real(psb_dpk_) :: max_val
integer(psb_ipk_) :: i
max_val = dzero
- !$acc parallel loop reduction(max:max_val)
+ !$acc parallel loop reduction(max:max_val) present(x)
do i = 1, n
if (abs(x(i)) > max_val) max_val = abs(x(i))
end do
@@ -248,7 +248,7 @@ contains
real(psb_dpk_) :: res
integer(psb_ipk_) :: i
res = dzero
- !$acc parallel loop reduction(+:res)
+ !$acc parallel loop reduction(+:res) present(x)
do i = 1, n
res = res + abs(x(i))
end do
@@ -265,7 +265,7 @@ contains
info = 0
if (y%is_dev()) call y%sync()
- !$acc parallel loop
+ !$acc parallel loop present(x,y)
do i = 1, size(x)
y%v(i) = y%v(i) * x(i)
end do
@@ -283,7 +283,7 @@ contains
info = 0
if (z%is_dev()) call z%sync()
- !$acc parallel loop
+ !$acc parallel loop present(x,y,z%v)
do i = 1, size(x)
z%v(i) = alpha * x(i) * y(i) + beta * z%v(i)
end do
@@ -327,7 +327,7 @@ contains
real(psb_dpk_), intent(inout) :: y(:)
real(psb_dpk_), intent(in) :: alpha, beta
integer(psb_ipk_), intent(out) :: info
- !$acc parallel
+ !$acc parallel present(x,y)
!$acc loop
do i = 1, m
y(i) = alpha * x(i) + beta * y(i)
@@ -384,7 +384,7 @@ contains
if ((nx < m) .or. (ny < m) .or. (nz < m)) then
info = psb_err_internal_error_
else
- !$acc parallel loop
+ !$acc parallel loop present(xx%v,yy%v,zz%v)
do i = 1, m
yy%v(i) = alpha * xx%v(i) + beta * yy%v(i)
zz%v(i) = gamma * yy%v(i) + delta * zz%v(i)
@@ -416,6 +416,7 @@ contains
integer(psb_ipk_) :: info, k
logical :: acc_done
if (.not.allocated(y%combuf)) then
+ write(0,*) 'allocation error for y%combuf '
call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
return
end if
@@ -443,8 +444,8 @@ contains
integer(psb_ipk_) :: n, idx(:)
real(psb_dpk_) :: beta,x(:), y(:)
integer(psb_ipk_) :: k
- !$acc update device(x(1:n)) async
- !$acc parallel loop
+ !$acc update device(x(1:n))
+ !$acc parallel loop present(x,y)
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
end do
@@ -488,8 +489,8 @@ contains
integer(psb_ipk_) :: n, idx(:)
real(psb_dpk_) :: beta, x(:), y(:)
integer(psb_ipk_) :: k
- !$acc update device(x(1:n)) async
- !$acc parallel loop
+ !$acc update device(x(1:n))
+ !$acc parallel loop present(x,y)
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
end do
@@ -531,6 +532,7 @@ contains
acc_done = .false.
if (.not.allocated(x%combuf)) then
+ write(0,*) 'oacc allocation error combuf gthzbuf '
call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
return
end if
@@ -556,13 +558,13 @@ contains
integer(psb_ipk_) :: n, idx(:)
real(psb_dpk_) :: x(:), y(:)
integer(psb_ipk_) :: k
-
- !$acc parallel loop present(y)
+ !
+ !$acc parallel loop present(x,y)
do k = 1, n
y(k) = x(idx(k))
end do
!$acc end parallel loop
- !$acc update self(y(1:n)) async
+ !$acc update self(y(1:n))
end subroutine inner_gth
end subroutine d_oacc_gthzbuf
@@ -600,13 +602,13 @@ contains
integer(psb_ipk_) :: n, idx(:)
real(psb_dpk_) :: x(:), y(:)
integer(psb_ipk_) :: k
-
- !$acc parallel loop present(y)
+ !
+ !$acc parallel loop present(x,y)
do k = 1, n
y(k) = x(idx(k))
end do
!$acc end parallel loop
- !$acc update self(y(1:n)) async
+ !$acc update self(y(1:n))
end subroutine inner_gth
end subroutine d_oacc_gthzv_x
@@ -633,7 +635,7 @@ contains
if (vval%is_host()) call vval%sync()
if (virl%is_host()) call virl%sync()
if (x%is_host()) call x%sync()
- !$acc parallel loop
+ !$acc parallel loop present(x%v,virl%v,vval%v)
do i = 1, n
x%v(virl%v(i)) = vval%v(i)
end do
@@ -757,7 +759,7 @@ contains
if (present(first)) first_ = max(1, first)
if (present(last)) last_ = min(last, last_)
- !$acc parallel loop
+ !$acc parallel loop present(x%v)
do i = first_, last_
x%v(i) = val
end do
@@ -849,26 +851,36 @@ contains
class(psb_d_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n
integer(psb_ipk_), intent(out) :: info
- if (n /= psb_size(x%combuf)) then
+
+ !write(0,*) 'oacc new_buffer',n,psb_size(x%combuf)
+ if (n > psb_size(x%combuf)) then
+ !write(0,*) 'oacc new_buffer: reallocating '
+ if (allocated(x%combuf)) then
+ !if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
+ !$acc exit data delete(x%combuf)
+ end if
call x%psb_d_base_vect_type%new_buffer(n,info)
!$acc enter data copyin(x%combuf)
+ ! call acc_copyin(x%combuf)
end if
end subroutine d_oacc_new_buffer
subroutine d_oacc_sync_dev_space(x)
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
- if (allocated(x%v)) call acc_create(x%v)
+!!$ write(0,*) 'oacc sync_dev_space'
+ if (psb_size(x%v)>0) call acc_copyin(x%v)
end subroutine d_oacc_sync_dev_space
subroutine d_oacc_sync(x)
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
if (x%is_dev()) then
- call acc_update_self(x%v)
+ if (psb_size(x%v)>0) call acc_update_self(x%v)
end if
if (x%is_host()) then
- call acc_update_device(x%v)
+ if (.not.acc_is_present(x%v)) call d_oacc_sync_dev_space(x)
+ if (psb_size(x%v)>0) call acc_update_device(x%v)
end if
call x%set_sync()
end subroutine d_oacc_sync
@@ -941,6 +953,8 @@ contains
type(psb_d_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
info = 0
+!!$ write(0,*) 'oacc final_vect_free'
+ call x%free_buffer(info)
if (allocated(x%v)) then
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
deallocate(x%v, stat=info)
@@ -953,8 +967,9 @@ contains
class(psb_d_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
+!!$ write(0,*) 'oacc vect_free'
+ call x%free_buffer(info)
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
- if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
call x%psb_d_base_vect_type%free(info)
end subroutine d_oacc_vect_free
@@ -964,8 +979,10 @@ contains
integer(psb_ipk_), intent(out) :: info
info = 0
- if (psb_oacc_get_maybe_free_buffer())&
- & call x%free_buffer(info)
+ if (psb_oacc_get_maybe_free_buffer()) then
+ !write(0,*) 'psb_oacc_get_maybe_free_buffer() ',psb_oacc_get_maybe_free_buffer()
+ call x%free_buffer(info)
+ end if
end subroutine d_oacc_vect_maybe_free_buffer
@@ -973,7 +990,7 @@ contains
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
-
+! write(0,*) 'oacc free_buffer'
info = 0
if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
call x%psb_d_base_vect_type%free_buffer(info)
@@ -985,7 +1002,6 @@ contains
class(psb_d_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: res
- if (x%is_dev()) call x%sync()
res = size(x%v)
end function d_oacc_get_size
diff --git a/openacc/psb_i_oacc_vect_mod.F90 b/openacc/psb_i_oacc_vect_mod.F90
index 455453a1..42cdc18c 100644
--- a/openacc/psb_i_oacc_vect_mod.F90
+++ b/openacc/psb_i_oacc_vect_mod.F90
@@ -70,6 +70,7 @@ contains
integer(psb_ipk_) :: info, k
logical :: acc_done
if (.not.allocated(y%combuf)) then
+ write(0,*) 'allocation error for y%combuf '
call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
return
end if
@@ -97,8 +98,8 @@ contains
integer(psb_ipk_) :: n, idx(:)
integer(psb_ipk_) :: beta,x(:), y(:)
integer(psb_ipk_) :: k
- !$acc update device(x(1:n)) async
- !$acc parallel loop
+ !$acc update device(x(1:n))
+ !$acc parallel loop present(x,y)
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
end do
@@ -142,8 +143,8 @@ contains
integer(psb_ipk_) :: n, idx(:)
integer(psb_ipk_) :: beta, x(:), y(:)
integer(psb_ipk_) :: k
- !$acc update device(x(1:n)) async
- !$acc parallel loop
+ !$acc update device(x(1:n))
+ !$acc parallel loop present(x,y)
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
end do
@@ -185,6 +186,7 @@ contains
acc_done = .false.
if (.not.allocated(x%combuf)) then
+ write(0,*) 'oacc allocation error combuf gthzbuf '
call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
return
end if
@@ -210,13 +212,13 @@ contains
integer(psb_ipk_) :: n, idx(:)
integer(psb_ipk_) :: x(:), y(:)
integer(psb_ipk_) :: k
-
- !$acc parallel loop present(y)
+ !
+ !$acc parallel loop present(x,y)
do k = 1, n
y(k) = x(idx(k))
end do
!$acc end parallel loop
- !$acc update self(y(1:n)) async
+ !$acc update self(y(1:n))
end subroutine inner_gth
end subroutine i_oacc_gthzbuf
@@ -254,13 +256,13 @@ contains
integer(psb_ipk_) :: n, idx(:)
integer(psb_ipk_) :: x(:), y(:)
integer(psb_ipk_) :: k
-
- !$acc parallel loop present(y)
+ !
+ !$acc parallel loop present(x,y)
do k = 1, n
y(k) = x(idx(k))
end do
!$acc end parallel loop
- !$acc update self(y(1:n)) async
+ !$acc update self(y(1:n))
end subroutine inner_gth
end subroutine i_oacc_gthzv_x
@@ -287,7 +289,7 @@ contains
if (vval%is_host()) call vval%sync()
if (virl%is_host()) call virl%sync()
if (x%is_host()) call x%sync()
- !$acc parallel loop
+ !$acc parallel loop present(x%v,virl%v,vval%v)
do i = 1, n
x%v(virl%v(i)) = vval%v(i)
end do
@@ -411,7 +413,7 @@ contains
if (present(first)) first_ = max(1, first)
if (present(last)) last_ = min(last, last_)
- !$acc parallel loop
+ !$acc parallel loop present(x%v)
do i = first_, last_
x%v(i) = val
end do
@@ -449,26 +451,36 @@ contains
class(psb_i_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n
integer(psb_ipk_), intent(out) :: info
- if (n /= psb_size(x%combuf)) then
+
+ !write(0,*) 'oacc new_buffer',n,psb_size(x%combuf)
+ if (n > psb_size(x%combuf)) then
+ !write(0,*) 'oacc new_buffer: reallocating '
+ if (allocated(x%combuf)) then
+ !if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
+ !$acc exit data delete(x%combuf)
+ end if
call x%psb_i_base_vect_type%new_buffer(n,info)
!$acc enter data copyin(x%combuf)
+ ! call acc_copyin(x%combuf)
end if
end subroutine i_oacc_new_buffer
subroutine i_oacc_sync_dev_space(x)
implicit none
class(psb_i_vect_oacc), intent(inout) :: x
- if (allocated(x%v)) call acc_create(x%v)
+!!$ write(0,*) 'oacc sync_dev_space'
+ if (psb_size(x%v)>0) call acc_copyin(x%v)
end subroutine i_oacc_sync_dev_space
subroutine i_oacc_sync(x)
implicit none
class(psb_i_vect_oacc), intent(inout) :: x
if (x%is_dev()) then
- call acc_update_self(x%v)
+ if (psb_size(x%v)>0) call acc_update_self(x%v)
end if
if (x%is_host()) then
- call acc_update_device(x%v)
+ if (.not.acc_is_present(x%v)) call i_oacc_sync_dev_space(x)
+ if (psb_size(x%v)>0) call acc_update_device(x%v)
end if
call x%set_sync()
end subroutine i_oacc_sync
@@ -541,6 +553,8 @@ contains
type(psb_i_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
info = 0
+!!$ write(0,*) 'oacc final_vect_free'
+ call x%free_buffer(info)
if (allocated(x%v)) then
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
deallocate(x%v, stat=info)
@@ -553,8 +567,9 @@ contains
class(psb_i_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
+!!$ write(0,*) 'oacc vect_free'
+ call x%free_buffer(info)
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
- if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
call x%psb_i_base_vect_type%free(info)
end subroutine i_oacc_vect_free
@@ -564,8 +579,10 @@ contains
integer(psb_ipk_), intent(out) :: info
info = 0
- if (psb_oacc_get_maybe_free_buffer())&
- & call x%free_buffer(info)
+ if (psb_oacc_get_maybe_free_buffer()) then
+ !write(0,*) 'psb_oacc_get_maybe_free_buffer() ',psb_oacc_get_maybe_free_buffer()
+ call x%free_buffer(info)
+ end if
end subroutine i_oacc_vect_maybe_free_buffer
@@ -573,7 +590,7 @@ contains
implicit none
class(psb_i_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
-
+! write(0,*) 'oacc free_buffer'
info = 0
if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
call x%psb_i_base_vect_type%free_buffer(info)
@@ -585,7 +602,6 @@ contains
class(psb_i_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: res
- if (x%is_dev()) call x%sync()
res = size(x%v)
end function i_oacc_get_size
diff --git a/openacc/psb_l_oacc_vect_mod.F90 b/openacc/psb_l_oacc_vect_mod.F90
index d35e9141..60cdee35 100644
--- a/openacc/psb_l_oacc_vect_mod.F90
+++ b/openacc/psb_l_oacc_vect_mod.F90
@@ -72,6 +72,7 @@ contains
integer(psb_ipk_) :: info, k
logical :: acc_done
if (.not.allocated(y%combuf)) then
+ write(0,*) 'allocation error for y%combuf '
call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
return
end if
@@ -99,8 +100,8 @@ contains
integer(psb_ipk_) :: n, idx(:)
integer(psb_lpk_) :: beta,x(:), y(:)
integer(psb_ipk_) :: k
- !$acc update device(x(1:n)) async
- !$acc parallel loop
+ !$acc update device(x(1:n))
+ !$acc parallel loop present(x,y)
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
end do
@@ -144,8 +145,8 @@ contains
integer(psb_ipk_) :: n, idx(:)
integer(psb_lpk_) :: beta, x(:), y(:)
integer(psb_ipk_) :: k
- !$acc update device(x(1:n)) async
- !$acc parallel loop
+ !$acc update device(x(1:n))
+ !$acc parallel loop present(x,y)
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
end do
@@ -187,6 +188,7 @@ contains
acc_done = .false.
if (.not.allocated(x%combuf)) then
+ write(0,*) 'oacc allocation error combuf gthzbuf '
call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
return
end if
@@ -212,13 +214,13 @@ contains
integer(psb_ipk_) :: n, idx(:)
integer(psb_lpk_) :: x(:), y(:)
integer(psb_ipk_) :: k
-
- !$acc parallel loop present(y)
+ !
+ !$acc parallel loop present(x,y)
do k = 1, n
y(k) = x(idx(k))
end do
!$acc end parallel loop
- !$acc update self(y(1:n)) async
+ !$acc update self(y(1:n))
end subroutine inner_gth
end subroutine l_oacc_gthzbuf
@@ -256,13 +258,13 @@ contains
integer(psb_ipk_) :: n, idx(:)
integer(psb_lpk_) :: x(:), y(:)
integer(psb_ipk_) :: k
-
- !$acc parallel loop present(y)
+ !
+ !$acc parallel loop present(x,y)
do k = 1, n
y(k) = x(idx(k))
end do
!$acc end parallel loop
- !$acc update self(y(1:n)) async
+ !$acc update self(y(1:n))
end subroutine inner_gth
end subroutine l_oacc_gthzv_x
@@ -289,7 +291,7 @@ contains
if (vval%is_host()) call vval%sync()
if (virl%is_host()) call virl%sync()
if (x%is_host()) call x%sync()
- !$acc parallel loop
+ !$acc parallel loop present(x%v,virl%v,vval%v)
do i = 1, n
x%v(virl%v(i)) = vval%v(i)
end do
@@ -413,7 +415,7 @@ contains
if (present(first)) first_ = max(1, first)
if (present(last)) last_ = min(last, last_)
- !$acc parallel loop
+ !$acc parallel loop present(x%v)
do i = first_, last_
x%v(i) = val
end do
@@ -451,26 +453,36 @@ contains
class(psb_l_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n
integer(psb_ipk_), intent(out) :: info
- if (n /= psb_size(x%combuf)) then
+
+ !write(0,*) 'oacc new_buffer',n,psb_size(x%combuf)
+ if (n > psb_size(x%combuf)) then
+ !write(0,*) 'oacc new_buffer: reallocating '
+ if (allocated(x%combuf)) then
+ !if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
+ !$acc exit data delete(x%combuf)
+ end if
call x%psb_l_base_vect_type%new_buffer(n,info)
!$acc enter data copyin(x%combuf)
+ ! call acc_copyin(x%combuf)
end if
end subroutine l_oacc_new_buffer
subroutine l_oacc_sync_dev_space(x)
implicit none
class(psb_l_vect_oacc), intent(inout) :: x
- if (allocated(x%v)) call acc_create(x%v)
+!!$ write(0,*) 'oacc sync_dev_space'
+ if (psb_size(x%v)>0) call acc_copyin(x%v)
end subroutine l_oacc_sync_dev_space
subroutine l_oacc_sync(x)
implicit none
class(psb_l_vect_oacc), intent(inout) :: x
if (x%is_dev()) then
- call acc_update_self(x%v)
+ if (psb_size(x%v)>0) call acc_update_self(x%v)
end if
if (x%is_host()) then
- call acc_update_device(x%v)
+ if (.not.acc_is_present(x%v)) call l_oacc_sync_dev_space(x)
+ if (psb_size(x%v)>0) call acc_update_device(x%v)
end if
call x%set_sync()
end subroutine l_oacc_sync
@@ -543,6 +555,8 @@ contains
type(psb_l_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
info = 0
+!!$ write(0,*) 'oacc final_vect_free'
+ call x%free_buffer(info)
if (allocated(x%v)) then
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
deallocate(x%v, stat=info)
@@ -555,8 +569,9 @@ contains
class(psb_l_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
+!!$ write(0,*) 'oacc vect_free'
+ call x%free_buffer(info)
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
- if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
call x%psb_l_base_vect_type%free(info)
end subroutine l_oacc_vect_free
@@ -566,8 +581,10 @@ contains
integer(psb_ipk_), intent(out) :: info
info = 0
- if (psb_oacc_get_maybe_free_buffer())&
- & call x%free_buffer(info)
+ if (psb_oacc_get_maybe_free_buffer()) then
+ !write(0,*) 'psb_oacc_get_maybe_free_buffer() ',psb_oacc_get_maybe_free_buffer()
+ call x%free_buffer(info)
+ end if
end subroutine l_oacc_vect_maybe_free_buffer
@@ -575,7 +592,7 @@ contains
implicit none
class(psb_l_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
-
+! write(0,*) 'oacc free_buffer'
info = 0
if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
call x%psb_l_base_vect_type%free_buffer(info)
@@ -587,7 +604,6 @@ contains
class(psb_l_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: res
- if (x%is_dev()) call x%sync()
res = size(x%v)
end function l_oacc_get_size
diff --git a/openacc/psb_s_oacc_vect_mod.F90 b/openacc/psb_s_oacc_vect_mod.F90
index 87eeccea..70c9dd49 100644
--- a/openacc/psb_s_oacc_vect_mod.F90
+++ b/openacc/psb_s_oacc_vect_mod.F90
@@ -113,7 +113,7 @@ contains
real(psb_spk_), intent(inout) :: x(:)
integer(psb_ipk_) :: n
integer(psb_ipk_) :: i
- !$acc parallel loop
+ !$acc parallel loop present(x)
do i = 1, n
x(i) = abs(x(i))
end do
@@ -144,7 +144,7 @@ contains
real(psb_spk_), intent(inout) :: x(:),y(:)
integer(psb_ipk_) :: n
integer(psb_ipk_) :: i
- !$acc parallel loop
+ !$acc parallel loop present(x,y)
do i = 1, n
y(i) = abs(x(i))
end do
@@ -164,7 +164,7 @@ contains
real(psb_spk_), intent(in) :: alpha
real(psb_spk_), intent(inout) :: x(:)
integer(psb_ipk_) :: i
- !$acc parallel loop
+ !$acc parallel loop present(x)
do i = 1, size(x)
x(i) = alpha * x(i)
end do
@@ -189,7 +189,7 @@ contains
real(psb_spk_) :: sum, mx
integer(psb_ipk_) :: i
mx = szero
- !$acc parallel loop reduction(max:mx)
+ !$acc parallel loop reduction(max:mx) present(x)
do i = 1, n
if (abs(x(i)) > mx) mx = abs(x(i))
end do
@@ -197,7 +197,7 @@ contains
res = mx
else
sum = szero
- !$acc parallel loop reduction(+:sum)
+ !$acc parallel loop reduction(+:sum) present(x)
do i = 1, n
sum = sum + abs(x(i)/mx)**2
end do
@@ -223,7 +223,7 @@ contains
real(psb_spk_) :: max_val
integer(psb_ipk_) :: i
max_val = szero
- !$acc parallel loop reduction(max:max_val)
+ !$acc parallel loop reduction(max:max_val) present(x)
do i = 1, n
if (abs(x(i)) > max_val) max_val = abs(x(i))
end do
@@ -248,7 +248,7 @@ contains
real(psb_spk_) :: res
integer(psb_ipk_) :: i
res = szero
- !$acc parallel loop reduction(+:res)
+ !$acc parallel loop reduction(+:res) present(x)
do i = 1, n
res = res + abs(x(i))
end do
@@ -265,7 +265,7 @@ contains
info = 0
if (y%is_dev()) call y%sync()
- !$acc parallel loop
+ !$acc parallel loop present(x,y)
do i = 1, size(x)
y%v(i) = y%v(i) * x(i)
end do
@@ -283,7 +283,7 @@ contains
info = 0
if (z%is_dev()) call z%sync()
- !$acc parallel loop
+ !$acc parallel loop present(x,y,z%v)
do i = 1, size(x)
z%v(i) = alpha * x(i) * y(i) + beta * z%v(i)
end do
@@ -327,7 +327,7 @@ contains
real(psb_spk_), intent(inout) :: y(:)
real(psb_spk_), intent(in) :: alpha, beta
integer(psb_ipk_), intent(out) :: info
- !$acc parallel
+ !$acc parallel present(x,y)
!$acc loop
do i = 1, m
y(i) = alpha * x(i) + beta * y(i)
@@ -384,7 +384,7 @@ contains
if ((nx < m) .or. (ny < m) .or. (nz < m)) then
info = psb_err_internal_error_
else
- !$acc parallel loop
+ !$acc parallel loop present(xx%v,yy%v,zz%v)
do i = 1, m
yy%v(i) = alpha * xx%v(i) + beta * yy%v(i)
zz%v(i) = gamma * yy%v(i) + delta * zz%v(i)
@@ -416,6 +416,7 @@ contains
integer(psb_ipk_) :: info, k
logical :: acc_done
if (.not.allocated(y%combuf)) then
+ write(0,*) 'allocation error for y%combuf '
call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
return
end if
@@ -443,8 +444,8 @@ contains
integer(psb_ipk_) :: n, idx(:)
real(psb_spk_) :: beta,x(:), y(:)
integer(psb_ipk_) :: k
- !$acc update device(x(1:n)) async
- !$acc parallel loop
+ !$acc update device(x(1:n))
+ !$acc parallel loop present(x,y)
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
end do
@@ -488,8 +489,8 @@ contains
integer(psb_ipk_) :: n, idx(:)
real(psb_spk_) :: beta, x(:), y(:)
integer(psb_ipk_) :: k
- !$acc update device(x(1:n)) async
- !$acc parallel loop
+ !$acc update device(x(1:n))
+ !$acc parallel loop present(x,y)
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
end do
@@ -531,6 +532,7 @@ contains
acc_done = .false.
if (.not.allocated(x%combuf)) then
+ write(0,*) 'oacc allocation error combuf gthzbuf '
call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
return
end if
@@ -556,13 +558,13 @@ contains
integer(psb_ipk_) :: n, idx(:)
real(psb_spk_) :: x(:), y(:)
integer(psb_ipk_) :: k
-
- !$acc parallel loop present(y)
+ !
+ !$acc parallel loop present(x,y)
do k = 1, n
y(k) = x(idx(k))
end do
!$acc end parallel loop
- !$acc update self(y(1:n)) async
+ !$acc update self(y(1:n))
end subroutine inner_gth
end subroutine s_oacc_gthzbuf
@@ -600,13 +602,13 @@ contains
integer(psb_ipk_) :: n, idx(:)
real(psb_spk_) :: x(:), y(:)
integer(psb_ipk_) :: k
-
- !$acc parallel loop present(y)
+ !
+ !$acc parallel loop present(x,y)
do k = 1, n
y(k) = x(idx(k))
end do
!$acc end parallel loop
- !$acc update self(y(1:n)) async
+ !$acc update self(y(1:n))
end subroutine inner_gth
end subroutine s_oacc_gthzv_x
@@ -633,7 +635,7 @@ contains
if (vval%is_host()) call vval%sync()
if (virl%is_host()) call virl%sync()
if (x%is_host()) call x%sync()
- !$acc parallel loop
+ !$acc parallel loop present(x%v,virl%v,vval%v)
do i = 1, n
x%v(virl%v(i)) = vval%v(i)
end do
@@ -757,7 +759,7 @@ contains
if (present(first)) first_ = max(1, first)
if (present(last)) last_ = min(last, last_)
- !$acc parallel loop
+ !$acc parallel loop present(x%v)
do i = first_, last_
x%v(i) = val
end do
@@ -849,26 +851,36 @@ contains
class(psb_s_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n
integer(psb_ipk_), intent(out) :: info
- if (n /= psb_size(x%combuf)) then
+
+ !write(0,*) 'oacc new_buffer',n,psb_size(x%combuf)
+ if (n > psb_size(x%combuf)) then
+ !write(0,*) 'oacc new_buffer: reallocating '
+ if (allocated(x%combuf)) then
+ !if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
+ !$acc exit data delete(x%combuf)
+ end if
call x%psb_s_base_vect_type%new_buffer(n,info)
!$acc enter data copyin(x%combuf)
+ ! call acc_copyin(x%combuf)
end if
end subroutine s_oacc_new_buffer
subroutine s_oacc_sync_dev_space(x)
implicit none
class(psb_s_vect_oacc), intent(inout) :: x
- if (allocated(x%v)) call acc_create(x%v)
+!!$ write(0,*) 'oacc sync_dev_space'
+ if (psb_size(x%v)>0) call acc_copyin(x%v)
end subroutine s_oacc_sync_dev_space
subroutine s_oacc_sync(x)
implicit none
class(psb_s_vect_oacc), intent(inout) :: x
if (x%is_dev()) then
- call acc_update_self(x%v)
+ if (psb_size(x%v)>0) call acc_update_self(x%v)
end if
if (x%is_host()) then
- call acc_update_device(x%v)
+ if (.not.acc_is_present(x%v)) call s_oacc_sync_dev_space(x)
+ if (psb_size(x%v)>0) call acc_update_device(x%v)
end if
call x%set_sync()
end subroutine s_oacc_sync
@@ -941,6 +953,8 @@ contains
type(psb_s_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
info = 0
+!!$ write(0,*) 'oacc final_vect_free'
+ call x%free_buffer(info)
if (allocated(x%v)) then
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
deallocate(x%v, stat=info)
@@ -953,8 +967,9 @@ contains
class(psb_s_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
+!!$ write(0,*) 'oacc vect_free'
+ call x%free_buffer(info)
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
- if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
call x%psb_s_base_vect_type%free(info)
end subroutine s_oacc_vect_free
@@ -964,8 +979,10 @@ contains
integer(psb_ipk_), intent(out) :: info
info = 0
- if (psb_oacc_get_maybe_free_buffer())&
- & call x%free_buffer(info)
+ if (psb_oacc_get_maybe_free_buffer()) then
+ !write(0,*) 'psb_oacc_get_maybe_free_buffer() ',psb_oacc_get_maybe_free_buffer()
+ call x%free_buffer(info)
+ end if
end subroutine s_oacc_vect_maybe_free_buffer
@@ -973,7 +990,7 @@ contains
implicit none
class(psb_s_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
-
+! write(0,*) 'oacc free_buffer'
info = 0
if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
call x%psb_s_base_vect_type%free_buffer(info)
@@ -985,7 +1002,6 @@ contains
class(psb_s_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: res
- if (x%is_dev()) call x%sync()
res = size(x%v)
end function s_oacc_get_size
diff --git a/openacc/psb_z_oacc_vect_mod.F90 b/openacc/psb_z_oacc_vect_mod.F90
index 0fe1adaa..0bc10283 100644
--- a/openacc/psb_z_oacc_vect_mod.F90
+++ b/openacc/psb_z_oacc_vect_mod.F90
@@ -113,7 +113,7 @@ contains
complex(psb_dpk_), intent(inout) :: x(:)
integer(psb_ipk_) :: n
integer(psb_ipk_) :: i
- !$acc parallel loop
+ !$acc parallel loop present(x)
do i = 1, n
x(i) = abs(x(i))
end do
@@ -144,7 +144,7 @@ contains
complex(psb_dpk_), intent(inout) :: x(:),y(:)
integer(psb_ipk_) :: n
integer(psb_ipk_) :: i
- !$acc parallel loop
+ !$acc parallel loop present(x,y)
do i = 1, n
y(i) = abs(x(i))
end do
@@ -164,7 +164,7 @@ contains
complex(psb_dpk_), intent(in) :: alpha
complex(psb_dpk_), intent(inout) :: x(:)
integer(psb_ipk_) :: i
- !$acc parallel loop
+ !$acc parallel loop present(x)
do i = 1, size(x)
x(i) = alpha * x(i)
end do
@@ -189,7 +189,7 @@ contains
real(psb_dpk_) :: sum, mx
integer(psb_ipk_) :: i
mx = dzero
- !$acc parallel loop reduction(max:mx)
+ !$acc parallel loop reduction(max:mx) present(x)
do i = 1, n
if (abs(x(i)) > mx) mx = abs(x(i))
end do
@@ -197,7 +197,7 @@ contains
res = mx
else
sum = dzero
- !$acc parallel loop reduction(+:sum)
+ !$acc parallel loop reduction(+:sum) present(x)
do i = 1, n
sum = sum + abs(x(i)/mx)**2
end do
@@ -223,7 +223,7 @@ contains
real(psb_dpk_) :: max_val
integer(psb_ipk_) :: i
max_val = dzero
- !$acc parallel loop reduction(max:max_val)
+ !$acc parallel loop reduction(max:max_val) present(x)
do i = 1, n
if (abs(x(i)) > max_val) max_val = abs(x(i))
end do
@@ -248,7 +248,7 @@ contains
real(psb_dpk_) :: res
integer(psb_ipk_) :: i
res = dzero
- !$acc parallel loop reduction(+:res)
+ !$acc parallel loop reduction(+:res) present(x)
do i = 1, n
res = res + abs(x(i))
end do
@@ -265,7 +265,7 @@ contains
info = 0
if (y%is_dev()) call y%sync()
- !$acc parallel loop
+ !$acc parallel loop present(x,y)
do i = 1, size(x)
y%v(i) = y%v(i) * x(i)
end do
@@ -283,7 +283,7 @@ contains
info = 0
if (z%is_dev()) call z%sync()
- !$acc parallel loop
+ !$acc parallel loop present(x,y,z%v)
do i = 1, size(x)
z%v(i) = alpha * x(i) * y(i) + beta * z%v(i)
end do
@@ -327,7 +327,7 @@ contains
complex(psb_dpk_), intent(inout) :: y(:)
complex(psb_dpk_), intent(in) :: alpha, beta
integer(psb_ipk_), intent(out) :: info
- !$acc parallel
+ !$acc parallel present(x,y)
!$acc loop
do i = 1, m
y(i) = alpha * x(i) + beta * y(i)
@@ -384,7 +384,7 @@ contains
if ((nx < m) .or. (ny < m) .or. (nz < m)) then
info = psb_err_internal_error_
else
- !$acc parallel loop
+ !$acc parallel loop present(xx%v,yy%v,zz%v)
do i = 1, m
yy%v(i) = alpha * xx%v(i) + beta * yy%v(i)
zz%v(i) = gamma * yy%v(i) + delta * zz%v(i)
@@ -416,6 +416,7 @@ contains
integer(psb_ipk_) :: info, k
logical :: acc_done
if (.not.allocated(y%combuf)) then
+ write(0,*) 'allocation error for y%combuf '
call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
return
end if
@@ -443,8 +444,8 @@ contains
integer(psb_ipk_) :: n, idx(:)
complex(psb_dpk_) :: beta,x(:), y(:)
integer(psb_ipk_) :: k
- !$acc update device(x(1:n)) async
- !$acc parallel loop
+ !$acc update device(x(1:n))
+ !$acc parallel loop present(x,y)
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
end do
@@ -488,8 +489,8 @@ contains
integer(psb_ipk_) :: n, idx(:)
complex(psb_dpk_) :: beta, x(:), y(:)
integer(psb_ipk_) :: k
- !$acc update device(x(1:n)) async
- !$acc parallel loop
+ !$acc update device(x(1:n))
+ !$acc parallel loop present(x,y)
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
end do
@@ -531,6 +532,7 @@ contains
acc_done = .false.
if (.not.allocated(x%combuf)) then
+ write(0,*) 'oacc allocation error combuf gthzbuf '
call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
return
end if
@@ -556,13 +558,13 @@ contains
integer(psb_ipk_) :: n, idx(:)
complex(psb_dpk_) :: x(:), y(:)
integer(psb_ipk_) :: k
-
- !$acc parallel loop present(y)
+ !
+ !$acc parallel loop present(x,y)
do k = 1, n
y(k) = x(idx(k))
end do
!$acc end parallel loop
- !$acc update self(y(1:n)) async
+ !$acc update self(y(1:n))
end subroutine inner_gth
end subroutine z_oacc_gthzbuf
@@ -600,13 +602,13 @@ contains
integer(psb_ipk_) :: n, idx(:)
complex(psb_dpk_) :: x(:), y(:)
integer(psb_ipk_) :: k
-
- !$acc parallel loop present(y)
+ !
+ !$acc parallel loop present(x,y)
do k = 1, n
y(k) = x(idx(k))
end do
!$acc end parallel loop
- !$acc update self(y(1:n)) async
+ !$acc update self(y(1:n))
end subroutine inner_gth
end subroutine z_oacc_gthzv_x
@@ -633,7 +635,7 @@ contains
if (vval%is_host()) call vval%sync()
if (virl%is_host()) call virl%sync()
if (x%is_host()) call x%sync()
- !$acc parallel loop
+ !$acc parallel loop present(x%v,virl%v,vval%v)
do i = 1, n
x%v(virl%v(i)) = vval%v(i)
end do
@@ -757,7 +759,7 @@ contains
if (present(first)) first_ = max(1, first)
if (present(last)) last_ = min(last, last_)
- !$acc parallel loop
+ !$acc parallel loop present(x%v)
do i = first_, last_
x%v(i) = val
end do
@@ -849,26 +851,36 @@ contains
class(psb_z_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n
integer(psb_ipk_), intent(out) :: info
- if (n /= psb_size(x%combuf)) then
+
+ !write(0,*) 'oacc new_buffer',n,psb_size(x%combuf)
+ if (n > psb_size(x%combuf)) then
+ !write(0,*) 'oacc new_buffer: reallocating '
+ if (allocated(x%combuf)) then
+ !if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
+ !$acc exit data delete(x%combuf)
+ end if
call x%psb_z_base_vect_type%new_buffer(n,info)
!$acc enter data copyin(x%combuf)
+ ! call acc_copyin(x%combuf)
end if
end subroutine z_oacc_new_buffer
subroutine z_oacc_sync_dev_space(x)
implicit none
class(psb_z_vect_oacc), intent(inout) :: x
- if (allocated(x%v)) call acc_create(x%v)
+!!$ write(0,*) 'oacc sync_dev_space'
+ if (psb_size(x%v)>0) call acc_copyin(x%v)
end subroutine z_oacc_sync_dev_space
subroutine z_oacc_sync(x)
implicit none
class(psb_z_vect_oacc), intent(inout) :: x
if (x%is_dev()) then
- call acc_update_self(x%v)
+ if (psb_size(x%v)>0) call acc_update_self(x%v)
end if
if (x%is_host()) then
- call acc_update_device(x%v)
+ if (.not.acc_is_present(x%v)) call z_oacc_sync_dev_space(x)
+ if (psb_size(x%v)>0) call acc_update_device(x%v)
end if
call x%set_sync()
end subroutine z_oacc_sync
@@ -941,6 +953,8 @@ contains
type(psb_z_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
info = 0
+!!$ write(0,*) 'oacc final_vect_free'
+ call x%free_buffer(info)
if (allocated(x%v)) then
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
deallocate(x%v, stat=info)
@@ -953,8 +967,9 @@ contains
class(psb_z_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
+!!$ write(0,*) 'oacc vect_free'
+ call x%free_buffer(info)
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
- if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
call x%psb_z_base_vect_type%free(info)
end subroutine z_oacc_vect_free
@@ -964,8 +979,10 @@ contains
integer(psb_ipk_), intent(out) :: info
info = 0
- if (psb_oacc_get_maybe_free_buffer())&
- & call x%free_buffer(info)
+ if (psb_oacc_get_maybe_free_buffer()) then
+ !write(0,*) 'psb_oacc_get_maybe_free_buffer() ',psb_oacc_get_maybe_free_buffer()
+ call x%free_buffer(info)
+ end if
end subroutine z_oacc_vect_maybe_free_buffer
@@ -973,7 +990,7 @@ contains
implicit none
class(psb_z_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
-
+! write(0,*) 'oacc free_buffer'
info = 0
if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
call x%psb_z_base_vect_type%free_buffer(info)
@@ -985,7 +1002,6 @@ contains
class(psb_z_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: res
- if (x%is_dev()) call x%sync()
res = size(x%v)
end function z_oacc_get_size
From ee56c6be3c0fd99efb30c9b13c9875deb63e8bf6 Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Tue, 8 Oct 2024 11:47:37 +0200
Subject: [PATCH 47/86] Cosmetic changes to OpenACC vectors
---
base/modules/serial/psb_c_vect_mod.F90 | 1 -
base/modules/serial/psb_d_vect_mod.F90 | 1 -
base/modules/serial/psb_s_vect_mod.F90 | 1 -
base/modules/serial/psb_z_vect_mod.F90 | 1 -
4 files changed, 4 deletions(-)
diff --git a/base/modules/serial/psb_c_vect_mod.F90 b/base/modules/serial/psb_c_vect_mod.F90
index 1e9510f2..a0a34621 100644
--- a/base/modules/serial/psb_c_vect_mod.F90
+++ b/base/modules/serial/psb_c_vect_mod.F90
@@ -104,7 +104,6 @@ module psb_c_vect_mod
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: upd_xyz => c_vect_upd_xyz
procedure, pass(z) :: xyzw => c_vect_xyzw
-
procedure, pass(y) :: mlt_v => c_vect_mlt_v
procedure, pass(y) :: mlt_a => c_vect_mlt_a
procedure, pass(z) :: mlt_a_2 => c_vect_mlt_a_2
diff --git a/base/modules/serial/psb_d_vect_mod.F90 b/base/modules/serial/psb_d_vect_mod.F90
index ae3062dd..acdce5fd 100644
--- a/base/modules/serial/psb_d_vect_mod.F90
+++ b/base/modules/serial/psb_d_vect_mod.F90
@@ -104,7 +104,6 @@ module psb_d_vect_mod
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: upd_xyz => d_vect_upd_xyz
procedure, pass(z) :: xyzw => d_vect_xyzw
-
procedure, pass(y) :: mlt_v => d_vect_mlt_v
procedure, pass(y) :: mlt_a => d_vect_mlt_a
procedure, pass(z) :: mlt_a_2 => d_vect_mlt_a_2
diff --git a/base/modules/serial/psb_s_vect_mod.F90 b/base/modules/serial/psb_s_vect_mod.F90
index cad4659c..aeccae4d 100644
--- a/base/modules/serial/psb_s_vect_mod.F90
+++ b/base/modules/serial/psb_s_vect_mod.F90
@@ -104,7 +104,6 @@ module psb_s_vect_mod
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: upd_xyz => s_vect_upd_xyz
procedure, pass(z) :: xyzw => s_vect_xyzw
-
procedure, pass(y) :: mlt_v => s_vect_mlt_v
procedure, pass(y) :: mlt_a => s_vect_mlt_a
procedure, pass(z) :: mlt_a_2 => s_vect_mlt_a_2
diff --git a/base/modules/serial/psb_z_vect_mod.F90 b/base/modules/serial/psb_z_vect_mod.F90
index 48f2e947..484d6423 100644
--- a/base/modules/serial/psb_z_vect_mod.F90
+++ b/base/modules/serial/psb_z_vect_mod.F90
@@ -104,7 +104,6 @@ module psb_z_vect_mod
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: upd_xyz => z_vect_upd_xyz
procedure, pass(z) :: xyzw => z_vect_xyzw
-
procedure, pass(y) :: mlt_v => z_vect_mlt_v
procedure, pass(y) :: mlt_a => z_vect_mlt_a
procedure, pass(z) :: mlt_a_2 => z_vect_mlt_a_2
From c74be820ea1ef6556168359f445db8a1f274d038 Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Tue, 8 Oct 2024 11:48:15 +0200
Subject: [PATCH 48/86] Rework configry for CUDA
---
Make.inc.in | 5 ++++-
configure | 28 ++++++++++++++++++++--------
configure.ac | 27 ++++++++++++++++++---------
cuda/Makefile | 9 +++++++++
cuda/impl/Makefile | 9 +++++++++
cuda/spgpu/kernels/sdot.cu | 1 -
test/cudakern/Makefile | 5 +++++
7 files changed, 65 insertions(+), 19 deletions(-)
diff --git a/Make.inc.in b/Make.inc.in
index 38c8ef86..a62abd50 100755
--- a/Make.inc.in
+++ b/Make.inc.in
@@ -80,11 +80,14 @@ LCUDA=@LCUDA@
SPGPU_LIBS=@SPGPU_LIBS@
CUDA_DIR=@CUDA_DIR@
-CUDA_DEFINES=@CUDA_DEFINES@
CUDA_INCLUDES=@CUDA_INCLUDES@
CUDA_LIBS=@CUDA_LIBS@
CUDA_VERSION=@CUDA_VERSION@
CUDA_SHORT_VERSION=@CUDA_SHORT_VERSION@
+CUDA_DEFINES=@CUDA_DEFINES@
+FCUDEFINES=@FCUDEFINES@
+CCUDEFINES=@CCUDEFINES@
+CXXCUDEFINES=@CXXCUDEFINES@
NVCC=@CUDA_NVCC@
CUDEFINES=@CUDEFINES@
diff --git a/configure b/configure
index dbc419fe..de173d6e 100755
--- a/configure
+++ b/configure
@@ -668,6 +668,9 @@ CUDA_SHORT_VERSION
CUDA_VERSION
CUDA_LIBS
CUDA_INCLUDES
+CXXCUDEFINES
+CCUDEFINES
+FCUDEFINES
CUDA_DEFINES
CUDA_DIR
EXTRALDLIBS
@@ -10856,9 +10859,10 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
LIBS="$SAVE_LIBS"
CPPFLAGS="$SAVE_CPPFLAGS"
+ HAVE_CUDA="yes";
CUDA_VERSION="$pac_cv_cuda_version";
CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000);
- HAVE_CUDA="yes";
+ CUDA_DEFINES="-DHAVE_CUDA -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}";
SPGPU_LIBS="-lspgpu";
CUDAD=cudad;
CUDALD=cudald;
@@ -10879,7 +10883,7 @@ fi
if test "x$pac_cv_cudacc" == "x"; then
- pac_cv_cudacc="50,60,70,75";
+ pac_cv_cudacc="50,60,70,75,80,86";
CUDA_CC="$pac_cv_cudacc";
fi
if (( $pac_cv_cuda_version >= 11070 ))
@@ -10891,9 +10895,10 @@ fi
CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc";
done
if test "x$pac_cv_cuda_version" != "xunknown"; then
- CUDEFINES="$CUDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}"
- FDEFINES="$FDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}"
- CDEFINES="$CDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}"
+ CUDEFINES="$CUDEFINES ${CUDA_DEFINES}"
+ FCUDEFINES=" ${CUDA_DEFINES}"
+ CCUDEFINES=" ${CUDA_DEFINES}"
+ CXXCUDEFINES=" ${CUDA_DEFINES}"
fi
fi
@@ -10907,8 +10912,12 @@ printf "%s\n" "$as_me: For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_
CUDAD="";
CUDALD="";
CUDEFINES="";
+ CUDA_DEFINES="";
CUDA_INCLUDES="";
CUDA_LIBS="";
+ FCUDEFINES="";
+ CCUDEFINES="";
+ CXXCUDEFINES="";
fi
fi
@@ -11373,9 +11382,9 @@ UTILLIBNAME=libpsb_util.a
PSBLASRULES='
PSBLDLIBS=$(LAPACK) $(BLAS) $(METIS_LIB) $(AMD_LIB) $(LIBS)
-CXXDEFINES=$(PSBCXXDEFINES) $(CUDA_DEFINES)
-CDEFINES=$(PSBCDEFINES) $(CUDA_DEFINES)
-FDEFINES=$(PSBFDEFINES) $(CUDA_DEFINES)
+CXXDEFINES=$(PSBCXXDEFINES)
+CDEFINES=$(PSBCDEFINES)
+FDEFINES=$(PSBFDEFINES)
# These should be portable rules, arent they?
@@ -11420,6 +11429,9 @@ FDEFINES=$(PSBFDEFINES) $(CUDA_DEFINES)
+
+
+
diff --git a/configure.ac b/configure.ac
index 5966801c..c670f6c8 100755
--- a/configure.ac
+++ b/configure.ac
@@ -803,10 +803,11 @@ PAC_CHECK_CUDA()
if test "x$pac_cv_have_cuda" == "xyes"; then
PAC_CHECK_CUDA_VERSION()
- CUDA_VERSION="$pac_cv_cuda_version";
- CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000);
dnl PAC_CHECK_SPGPU()
HAVE_CUDA="yes";
+ CUDA_VERSION="$pac_cv_cuda_version";
+ CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000);
+ CUDA_DEFINES="-DHAVE_CUDA -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}";
SPGPU_LIBS="-lspgpu";
CUDAD=cudad;
CUDALD=cudald;
@@ -816,7 +817,7 @@ if test "x$pac_cv_have_cuda" == "xyes"; then
PAC_ARG_WITH_CUDACC()
if test "x$pac_cv_cudacc" == "x"; then
- pac_cv_cudacc="50,60,70,75";
+ pac_cv_cudacc="50,60,70,75,80,86";
CUDA_CC="$pac_cv_cudacc";
fi
if (( $pac_cv_cuda_version >= 11070 ))
@@ -828,9 +829,10 @@ if test "x$pac_cv_have_cuda" == "xyes"; then
CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc";
done
if test "x$pac_cv_cuda_version" != "xunknown"; then
- CUDEFINES="$CUDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}"
- FDEFINES="$FDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}"
- CDEFINES="$CDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}"
+ CUDEFINES="$CUDEFINES ${CUDA_DEFINES}"
+ FCUDEFINES=" ${CUDA_DEFINES}"
+ CCUDEFINES=" ${CUDA_DEFINES}"
+ CXXCUDEFINES=" ${CUDA_DEFINES}"
fi
fi
@@ -843,8 +845,12 @@ if test "x$pac_cv_ipk_size" != "x4"; then
CUDAD="";
CUDALD="";
CUDEFINES="";
+ CUDA_DEFINES="";
CUDA_INCLUDES="";
CUDA_LIBS="";
+ FCUDEFINES="";
+ CCUDEFINES="";
+ CXXCUDEFINES="";
fi
fi
@@ -970,9 +976,9 @@ AC_SUBST(FINCLUDES)
PSBLASRULES='
PSBLDLIBS=$(LAPACK) $(BLAS) $(METIS_LIB) $(AMD_LIB) $(LIBS)
-CXXDEFINES=$(PSBCXXDEFINES) $(CUDA_DEFINES)
-CDEFINES=$(PSBCDEFINES) $(CUDA_DEFINES)
-FDEFINES=$(PSBFDEFINES) $(CUDA_DEFINES)
+CXXDEFINES=$(PSBCXXDEFINES)
+CDEFINES=$(PSBCDEFINES)
+FDEFINES=$(PSBFDEFINES)
# These should be portable rules, arent they?
@@ -1008,6 +1014,9 @@ dnl AC_SUBST(SPGPU_INCDIR)
AC_SUBST(EXTRALDLIBS)
AC_SUBST(CUDA_DIR)
AC_SUBST(CUDA_DEFINES)
+AC_SUBST(FCUDEFINES)
+AC_SUBST(CCUDEFINES)
+AC_SUBST(CXXCUDEFINES)
AC_SUBST(CUDA_INCLUDES)
AC_SUBST(CUDA_LIBS)
AC_SUBST(CUDA_VERSION)
diff --git a/cuda/Makefile b/cuda/Makefile
index 7e428629..a6757fe7 100755
--- a/cuda/Makefile
+++ b/cuda/Makefile
@@ -141,3 +141,12 @@ spgpuclean:
$(MAKE) -C spgpu clean
veryclean: clean
+
+.c.o:
+ $(CC) $(CCOPT) $(CCUDEFINES) $(CINCLUDES) $(CDEFINES) -c $< -o $@
+.f90.o:
+ $(FC) $(FCOPT) $(FCUDEFINES) $(FINCLUDES) -c $< -o $@
+.F90.o:
+ $(FC) $(FCOPT) $(FCUDEFINES) $(FINCLUDES) $(FDEFINES) -c $< -o $@
+.cpp.o:
+ $(CXX) $(CXXOPT) $(CXXCUDEFINES) $(CXXINCLUDES) $(CXXDEFINES) -c $< -o $@
diff --git a/cuda/impl/Makefile b/cuda/impl/Makefile
index 12bf0747..9ceb4575 100755
--- a/cuda/impl/Makefile
+++ b/cuda/impl/Makefile
@@ -295,3 +295,12 @@ lib: objs
clean:
/bin/rm -f $(OBJS)
+
+.c.o:
+ $(CC) $(CCOPT) $(CCUDEFINES) $(CINCLUDES) $(CDEFINES) -c $< -o $@
+.f90.o:
+ $(FC) $(FCOPT) $(FCUDEFINES) $(FINCLUDES) -c $< -o $@
+.F90.o:
+ $(FC) $(FCOPT) $(FCUDEFINES) $(FINCLUDES) $(FDEFINES) -c $< -o $@
+.cpp.o:
+ $(CXX) $(CXXOPT) $(CXXCUDEFINES) $(CXXINCLUDES) $(CXXDEFINES) -c $< -o $@
diff --git a/cuda/spgpu/kernels/sdot.cu b/cuda/spgpu/kernels/sdot.cu
index c19c7710..f91fab43 100644
--- a/cuda/spgpu/kernels/sdot.cu
+++ b/cuda/spgpu/kernels/sdot.cu
@@ -96,7 +96,6 @@ __global__ void spgpuSdot_kern(int n, float* x, float* y)
{
#endif
-
#ifdef ASSUME_LOCK_SYNC_PARALLELISM
volatile float* vsSum = sSum;
vsSum[threadIdx.x] = res;
diff --git a/test/cudakern/Makefile b/test/cudakern/Makefile
index 41cef197..4b8091d5 100755
--- a/test/cudakern/Makefile
+++ b/test/cudakern/Makefile
@@ -41,3 +41,8 @@ lib:
(cd ../../; make library)
verycleanlib:
(cd ../../; make veryclean)
+
+%.o: %.F90
+ $(FC) $(FFLAGS) $(FINCLUDES) $(FCUDEFINES) -c $< -o $@
+%.o: %.f90
+ $(FC) $(FFLAGS) $(FINCLUDES) $(FCUDEFINES) -c $< -o $@
From 949499265eecdaa813dfc278fb7331e91f192ba8 Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Tue, 8 Oct 2024 11:48:48 +0200
Subject: [PATCH 49/86] Simplify clean_zeros
---
base/serial/impl/psb_c_coo_impl.F90 | 20 ++++----------------
base/serial/impl/psb_c_csc_impl.F90 | 20 ++++----------------
base/serial/impl/psb_c_csr_impl.F90 | 20 ++++----------------
base/serial/impl/psb_d_coo_impl.F90 | 20 ++++----------------
base/serial/impl/psb_d_csc_impl.F90 | 20 ++++----------------
base/serial/impl/psb_d_csr_impl.F90 | 20 ++++----------------
base/serial/impl/psb_s_coo_impl.F90 | 20 ++++----------------
base/serial/impl/psb_s_csc_impl.F90 | 20 ++++----------------
base/serial/impl/psb_s_csr_impl.F90 | 20 ++++----------------
base/serial/impl/psb_z_coo_impl.F90 | 20 ++++----------------
base/serial/impl/psb_z_csc_impl.F90 | 20 ++++----------------
base/serial/impl/psb_z_csr_impl.F90 | 20 ++++----------------
12 files changed, 48 insertions(+), 192 deletions(-)
diff --git a/base/serial/impl/psb_c_coo_impl.F90 b/base/serial/impl/psb_c_coo_impl.F90
index 5b681d22..a56a79a4 100644
--- a/base/serial/impl/psb_c_coo_impl.F90
+++ b/base/serial/impl/psb_c_coo_impl.F90
@@ -595,19 +595,13 @@ subroutine psb_c_coo_clean_zeros(a, info)
integer(psb_ipk_), intent(out) :: info
!
integer(psb_ipk_) :: i,j,k, nzin
- logical :: cpy
info = 0
nzin = a%get_nzeros()
j = 0
do i=1, nzin
- if (a%val(i) /= czero) then
- cpy = .true.
- else
- ! Always keep the diagonal, even if numerically zero
- cpy = (a%ia(i) == a%ja(i))
- end if
- if (cpy) then
+ ! Always keep the diagonal, even if numerically zero
+ if ((a%val(i) /= czero).or.(a%ia(i) == a%ja(i))) then
j = j + 1
a%val(j) = a%val(i)
a%ia(j) = a%ia(i)
@@ -5933,19 +5927,13 @@ subroutine psb_lc_coo_clean_zeros(a, info)
integer(psb_ipk_), intent(out) :: info
!
integer(psb_lpk_) :: i,j,k, nzin
- logical :: cpy
info = 0
nzin = a%get_nzeros()
j = 0
do i=1, nzin
- if (a%val(i) /= czero) then
- cpy = .true.
- else
- ! Always keep the diagonal, even if numerically zero
- cpy = (a%ia(i) == a%ja(i))
- end if
- if (cpy) then
+ ! Always keep the diagonal, even if numerically zero
+ if ((a%val(i) /= czero).or.(a%ia(i) == a%ja(i))) then
j = j + 1
a%val(j) = a%val(i)
a%ia(j) = a%ia(i)
diff --git a/base/serial/impl/psb_c_csc_impl.F90 b/base/serial/impl/psb_c_csc_impl.F90
index 190a4d5b..7916d954 100644
--- a/base/serial/impl/psb_c_csc_impl.F90
+++ b/base/serial/impl/psb_c_csc_impl.F90
@@ -2412,7 +2412,6 @@ subroutine psb_c_csc_clean_zeros(a, info)
!
integer(psb_ipk_) :: i, j, k, nc
integer(psb_ipk_), allocatable :: ilcp(:)
- logical :: cpy
info = 0
call a%sync()
@@ -2422,13 +2421,8 @@ subroutine psb_c_csc_clean_zeros(a, info)
j = a%icp(1)
do i=1, nc
do k = ilcp(i), ilcp(i+1) -1
- if (a%val(k) /= czero) then
- cpy = .true.
- else
- ! Always keep the diagonal, even if numerically zero
- cpy = (i == a%ia(k))
- end if
- if (cpy) then
+ ! Always keep the diagonal, even if numerically zero
+ if ((a%val(k) /= czero).or.(i == a%ia(k))) then
a%val(j) = a%val(k)
a%ia(j) = a%ia(k)
j = j + 1
@@ -4320,7 +4314,6 @@ subroutine psb_lc_csc_clean_zeros(a, info)
!
integer(psb_lpk_) :: i, j, k, nc
integer(psb_lpk_), allocatable :: ilcp(:)
- logical :: cpy
info = 0
call a%sync()
@@ -4330,13 +4323,8 @@ subroutine psb_lc_csc_clean_zeros(a, info)
j = a%icp(1)
do i=1, nc
do k = ilcp(i), ilcp(i+1) -1
- if (a%val(k) /= czero) then
- cpy = .true.
- else
- ! Always keep the diagonal, even if numerically zero
- cpy = (i == a%ia(k))
- end if
- if (cpy) then
+ ! Always keep the diagonal, even if numerically zero
+ if ((a%val(k) /= czero).or.(i == a%ia(k))) then
a%val(j) = a%val(k)
a%ia(j) = a%ia(k)
j = j + 1
diff --git a/base/serial/impl/psb_c_csr_impl.F90 b/base/serial/impl/psb_c_csr_impl.F90
index 0db9f3fa..6a31e522 100644
--- a/base/serial/impl/psb_c_csr_impl.F90
+++ b/base/serial/impl/psb_c_csr_impl.F90
@@ -3633,7 +3633,6 @@ subroutine psb_c_csr_clean_zeros(a, info)
!
integer(psb_ipk_) :: i, j, k, nr
integer(psb_ipk_), allocatable :: ilrp(:)
- logical :: cpy
info = 0
call a%sync()
@@ -3643,13 +3642,8 @@ subroutine psb_c_csr_clean_zeros(a, info)
j = a%irp(1)
do i=1, nr
do k = ilrp(i), ilrp(i+1) -1
- if (a%val(k) /= czero) then
- cpy = .true.
- else
- ! Always keep the diagonal, even if numerically zero
- cpy = (i == a%ja(k))
- end if
- if (cpy) then
+ ! Always keep the diagonal, even if numerically zero
+ if ((a%val(k) /= czero).or.(i == a%ja(k))) then
a%val(j) = a%val(k)
a%ja(j) = a%ja(k)
j = j + 1
@@ -6559,7 +6553,6 @@ subroutine psb_lc_csr_clean_zeros(a, info)
!
integer(psb_lpk_) :: i, j, k, nr
integer(psb_lpk_), allocatable :: ilrp(:)
- logical :: cpy
info = 0
call a%sync()
@@ -6569,13 +6562,8 @@ subroutine psb_lc_csr_clean_zeros(a, info)
j = a%irp(1)
do i=1, nr
do k = ilrp(i), ilrp(i+1) -1
- if (a%val(k) /= czero) then
- cpy = .true.
- else
- ! Always keep the diagonal, even if numerically zero
- cpy = (i == a%ja(k))
- end if
- if (cpy) then
+ ! Always keep the diagonal, even if numerically zero
+ if ((a%val(k) /= czero).or.(i == a%ja(k))) then
a%val(j) = a%val(k)
a%ja(j) = a%ja(k)
j = j + 1
diff --git a/base/serial/impl/psb_d_coo_impl.F90 b/base/serial/impl/psb_d_coo_impl.F90
index a802775a..e3e7b42c 100644
--- a/base/serial/impl/psb_d_coo_impl.F90
+++ b/base/serial/impl/psb_d_coo_impl.F90
@@ -595,19 +595,13 @@ subroutine psb_d_coo_clean_zeros(a, info)
integer(psb_ipk_), intent(out) :: info
!
integer(psb_ipk_) :: i,j,k, nzin
- logical :: cpy
info = 0
nzin = a%get_nzeros()
j = 0
do i=1, nzin
- if (a%val(i) /= dzero) then
- cpy = .true.
- else
- ! Always keep the diagonal, even if numerically zero
- cpy = (a%ia(i) == a%ja(i))
- end if
- if (cpy) then
+ ! Always keep the diagonal, even if numerically zero
+ if ((a%val(i) /= dzero).or.(a%ia(i) == a%ja(i))) then
j = j + 1
a%val(j) = a%val(i)
a%ia(j) = a%ia(i)
@@ -5933,19 +5927,13 @@ subroutine psb_ld_coo_clean_zeros(a, info)
integer(psb_ipk_), intent(out) :: info
!
integer(psb_lpk_) :: i,j,k, nzin
- logical :: cpy
info = 0
nzin = a%get_nzeros()
j = 0
do i=1, nzin
- if (a%val(i) /= dzero) then
- cpy = .true.
- else
- ! Always keep the diagonal, even if numerically zero
- cpy = (a%ia(i) == a%ja(i))
- end if
- if (cpy) then
+ ! Always keep the diagonal, even if numerically zero
+ if ((a%val(i) /= dzero).or.(a%ia(i) == a%ja(i))) then
j = j + 1
a%val(j) = a%val(i)
a%ia(j) = a%ia(i)
diff --git a/base/serial/impl/psb_d_csc_impl.F90 b/base/serial/impl/psb_d_csc_impl.F90
index 61e2ad14..886add04 100644
--- a/base/serial/impl/psb_d_csc_impl.F90
+++ b/base/serial/impl/psb_d_csc_impl.F90
@@ -2412,7 +2412,6 @@ subroutine psb_d_csc_clean_zeros(a, info)
!
integer(psb_ipk_) :: i, j, k, nc
integer(psb_ipk_), allocatable :: ilcp(:)
- logical :: cpy
info = 0
call a%sync()
@@ -2422,13 +2421,8 @@ subroutine psb_d_csc_clean_zeros(a, info)
j = a%icp(1)
do i=1, nc
do k = ilcp(i), ilcp(i+1) -1
- if (a%val(k) /= dzero) then
- cpy = .true.
- else
- ! Always keep the diagonal, even if numerically zero
- cpy = (i == a%ia(k))
- end if
- if (cpy) then
+ ! Always keep the diagonal, even if numerically zero
+ if ((a%val(k) /= dzero).or.(i == a%ia(k))) then
a%val(j) = a%val(k)
a%ia(j) = a%ia(k)
j = j + 1
@@ -4320,7 +4314,6 @@ subroutine psb_ld_csc_clean_zeros(a, info)
!
integer(psb_lpk_) :: i, j, k, nc
integer(psb_lpk_), allocatable :: ilcp(:)
- logical :: cpy
info = 0
call a%sync()
@@ -4330,13 +4323,8 @@ subroutine psb_ld_csc_clean_zeros(a, info)
j = a%icp(1)
do i=1, nc
do k = ilcp(i), ilcp(i+1) -1
- if (a%val(k) /= dzero) then
- cpy = .true.
- else
- ! Always keep the diagonal, even if numerically zero
- cpy = (i == a%ia(k))
- end if
- if (cpy) then
+ ! Always keep the diagonal, even if numerically zero
+ if ((a%val(k) /= dzero).or.(i == a%ia(k))) then
a%val(j) = a%val(k)
a%ia(j) = a%ia(k)
j = j + 1
diff --git a/base/serial/impl/psb_d_csr_impl.F90 b/base/serial/impl/psb_d_csr_impl.F90
index 56ba8c63..f5891870 100644
--- a/base/serial/impl/psb_d_csr_impl.F90
+++ b/base/serial/impl/psb_d_csr_impl.F90
@@ -3633,7 +3633,6 @@ subroutine psb_d_csr_clean_zeros(a, info)
!
integer(psb_ipk_) :: i, j, k, nr
integer(psb_ipk_), allocatable :: ilrp(:)
- logical :: cpy
info = 0
call a%sync()
@@ -3643,13 +3642,8 @@ subroutine psb_d_csr_clean_zeros(a, info)
j = a%irp(1)
do i=1, nr
do k = ilrp(i), ilrp(i+1) -1
- if (a%val(k) /= dzero) then
- cpy = .true.
- else
- ! Always keep the diagonal, even if numerically zero
- cpy = (i == a%ja(k))
- end if
- if (cpy) then
+ ! Always keep the diagonal, even if numerically zero
+ if ((a%val(k) /= dzero).or.(i == a%ja(k))) then
a%val(j) = a%val(k)
a%ja(j) = a%ja(k)
j = j + 1
@@ -6559,7 +6553,6 @@ subroutine psb_ld_csr_clean_zeros(a, info)
!
integer(psb_lpk_) :: i, j, k, nr
integer(psb_lpk_), allocatable :: ilrp(:)
- logical :: cpy
info = 0
call a%sync()
@@ -6569,13 +6562,8 @@ subroutine psb_ld_csr_clean_zeros(a, info)
j = a%irp(1)
do i=1, nr
do k = ilrp(i), ilrp(i+1) -1
- if (a%val(k) /= dzero) then
- cpy = .true.
- else
- ! Always keep the diagonal, even if numerically zero
- cpy = (i == a%ja(k))
- end if
- if (cpy) then
+ ! Always keep the diagonal, even if numerically zero
+ if ((a%val(k) /= dzero).or.(i == a%ja(k))) then
a%val(j) = a%val(k)
a%ja(j) = a%ja(k)
j = j + 1
diff --git a/base/serial/impl/psb_s_coo_impl.F90 b/base/serial/impl/psb_s_coo_impl.F90
index 0979ff85..023cde51 100644
--- a/base/serial/impl/psb_s_coo_impl.F90
+++ b/base/serial/impl/psb_s_coo_impl.F90
@@ -595,19 +595,13 @@ subroutine psb_s_coo_clean_zeros(a, info)
integer(psb_ipk_), intent(out) :: info
!
integer(psb_ipk_) :: i,j,k, nzin
- logical :: cpy
info = 0
nzin = a%get_nzeros()
j = 0
do i=1, nzin
- if (a%val(i) /= szero) then
- cpy = .true.
- else
- ! Always keep the diagonal, even if numerically zero
- cpy = (a%ia(i) == a%ja(i))
- end if
- if (cpy) then
+ ! Always keep the diagonal, even if numerically zero
+ if ((a%val(i) /= szero).or.(a%ia(i) == a%ja(i))) then
j = j + 1
a%val(j) = a%val(i)
a%ia(j) = a%ia(i)
@@ -5933,19 +5927,13 @@ subroutine psb_ls_coo_clean_zeros(a, info)
integer(psb_ipk_), intent(out) :: info
!
integer(psb_lpk_) :: i,j,k, nzin
- logical :: cpy
info = 0
nzin = a%get_nzeros()
j = 0
do i=1, nzin
- if (a%val(i) /= szero) then
- cpy = .true.
- else
- ! Always keep the diagonal, even if numerically zero
- cpy = (a%ia(i) == a%ja(i))
- end if
- if (cpy) then
+ ! Always keep the diagonal, even if numerically zero
+ if ((a%val(i) /= szero).or.(a%ia(i) == a%ja(i))) then
j = j + 1
a%val(j) = a%val(i)
a%ia(j) = a%ia(i)
diff --git a/base/serial/impl/psb_s_csc_impl.F90 b/base/serial/impl/psb_s_csc_impl.F90
index ca41d705..3bb47d95 100644
--- a/base/serial/impl/psb_s_csc_impl.F90
+++ b/base/serial/impl/psb_s_csc_impl.F90
@@ -2412,7 +2412,6 @@ subroutine psb_s_csc_clean_zeros(a, info)
!
integer(psb_ipk_) :: i, j, k, nc
integer(psb_ipk_), allocatable :: ilcp(:)
- logical :: cpy
info = 0
call a%sync()
@@ -2422,13 +2421,8 @@ subroutine psb_s_csc_clean_zeros(a, info)
j = a%icp(1)
do i=1, nc
do k = ilcp(i), ilcp(i+1) -1
- if (a%val(k) /= szero) then
- cpy = .true.
- else
- ! Always keep the diagonal, even if numerically zero
- cpy = (i == a%ia(k))
- end if
- if (cpy) then
+ ! Always keep the diagonal, even if numerically zero
+ if ((a%val(k) /= szero).or.(i == a%ia(k))) then
a%val(j) = a%val(k)
a%ia(j) = a%ia(k)
j = j + 1
@@ -4320,7 +4314,6 @@ subroutine psb_ls_csc_clean_zeros(a, info)
!
integer(psb_lpk_) :: i, j, k, nc
integer(psb_lpk_), allocatable :: ilcp(:)
- logical :: cpy
info = 0
call a%sync()
@@ -4330,13 +4323,8 @@ subroutine psb_ls_csc_clean_zeros(a, info)
j = a%icp(1)
do i=1, nc
do k = ilcp(i), ilcp(i+1) -1
- if (a%val(k) /= szero) then
- cpy = .true.
- else
- ! Always keep the diagonal, even if numerically zero
- cpy = (i == a%ia(k))
- end if
- if (cpy) then
+ ! Always keep the diagonal, even if numerically zero
+ if ((a%val(k) /= szero).or.(i == a%ia(k))) then
a%val(j) = a%val(k)
a%ia(j) = a%ia(k)
j = j + 1
diff --git a/base/serial/impl/psb_s_csr_impl.F90 b/base/serial/impl/psb_s_csr_impl.F90
index 323601ca..0a166b0c 100644
--- a/base/serial/impl/psb_s_csr_impl.F90
+++ b/base/serial/impl/psb_s_csr_impl.F90
@@ -3633,7 +3633,6 @@ subroutine psb_s_csr_clean_zeros(a, info)
!
integer(psb_ipk_) :: i, j, k, nr
integer(psb_ipk_), allocatable :: ilrp(:)
- logical :: cpy
info = 0
call a%sync()
@@ -3643,13 +3642,8 @@ subroutine psb_s_csr_clean_zeros(a, info)
j = a%irp(1)
do i=1, nr
do k = ilrp(i), ilrp(i+1) -1
- if (a%val(k) /= szero) then
- cpy = .true.
- else
- ! Always keep the diagonal, even if numerically zero
- cpy = (i == a%ja(k))
- end if
- if (cpy) then
+ ! Always keep the diagonal, even if numerically zero
+ if ((a%val(k) /= szero).or.(i == a%ja(k))) then
a%val(j) = a%val(k)
a%ja(j) = a%ja(k)
j = j + 1
@@ -6559,7 +6553,6 @@ subroutine psb_ls_csr_clean_zeros(a, info)
!
integer(psb_lpk_) :: i, j, k, nr
integer(psb_lpk_), allocatable :: ilrp(:)
- logical :: cpy
info = 0
call a%sync()
@@ -6569,13 +6562,8 @@ subroutine psb_ls_csr_clean_zeros(a, info)
j = a%irp(1)
do i=1, nr
do k = ilrp(i), ilrp(i+1) -1
- if (a%val(k) /= szero) then
- cpy = .true.
- else
- ! Always keep the diagonal, even if numerically zero
- cpy = (i == a%ja(k))
- end if
- if (cpy) then
+ ! Always keep the diagonal, even if numerically zero
+ if ((a%val(k) /= szero).or.(i == a%ja(k))) then
a%val(j) = a%val(k)
a%ja(j) = a%ja(k)
j = j + 1
diff --git a/base/serial/impl/psb_z_coo_impl.F90 b/base/serial/impl/psb_z_coo_impl.F90
index 74c3f2cb..7dfceb06 100644
--- a/base/serial/impl/psb_z_coo_impl.F90
+++ b/base/serial/impl/psb_z_coo_impl.F90
@@ -595,19 +595,13 @@ subroutine psb_z_coo_clean_zeros(a, info)
integer(psb_ipk_), intent(out) :: info
!
integer(psb_ipk_) :: i,j,k, nzin
- logical :: cpy
info = 0
nzin = a%get_nzeros()
j = 0
do i=1, nzin
- if (a%val(i) /= zzero) then
- cpy = .true.
- else
- ! Always keep the diagonal, even if numerically zero
- cpy = (a%ia(i) == a%ja(i))
- end if
- if (cpy) then
+ ! Always keep the diagonal, even if numerically zero
+ if ((a%val(i) /= zzero).or.(a%ia(i) == a%ja(i))) then
j = j + 1
a%val(j) = a%val(i)
a%ia(j) = a%ia(i)
@@ -5933,19 +5927,13 @@ subroutine psb_lz_coo_clean_zeros(a, info)
integer(psb_ipk_), intent(out) :: info
!
integer(psb_lpk_) :: i,j,k, nzin
- logical :: cpy
info = 0
nzin = a%get_nzeros()
j = 0
do i=1, nzin
- if (a%val(i) /= zzero) then
- cpy = .true.
- else
- ! Always keep the diagonal, even if numerically zero
- cpy = (a%ia(i) == a%ja(i))
- end if
- if (cpy) then
+ ! Always keep the diagonal, even if numerically zero
+ if ((a%val(i) /= zzero).or.(a%ia(i) == a%ja(i))) then
j = j + 1
a%val(j) = a%val(i)
a%ia(j) = a%ia(i)
diff --git a/base/serial/impl/psb_z_csc_impl.F90 b/base/serial/impl/psb_z_csc_impl.F90
index 7ceff47f..32be36af 100644
--- a/base/serial/impl/psb_z_csc_impl.F90
+++ b/base/serial/impl/psb_z_csc_impl.F90
@@ -2412,7 +2412,6 @@ subroutine psb_z_csc_clean_zeros(a, info)
!
integer(psb_ipk_) :: i, j, k, nc
integer(psb_ipk_), allocatable :: ilcp(:)
- logical :: cpy
info = 0
call a%sync()
@@ -2422,13 +2421,8 @@ subroutine psb_z_csc_clean_zeros(a, info)
j = a%icp(1)
do i=1, nc
do k = ilcp(i), ilcp(i+1) -1
- if (a%val(k) /= zzero) then
- cpy = .true.
- else
- ! Always keep the diagonal, even if numerically zero
- cpy = (i == a%ia(k))
- end if
- if (cpy) then
+ ! Always keep the diagonal, even if numerically zero
+ if ((a%val(k) /= zzero).or.(i == a%ia(k))) then
a%val(j) = a%val(k)
a%ia(j) = a%ia(k)
j = j + 1
@@ -4320,7 +4314,6 @@ subroutine psb_lz_csc_clean_zeros(a, info)
!
integer(psb_lpk_) :: i, j, k, nc
integer(psb_lpk_), allocatable :: ilcp(:)
- logical :: cpy
info = 0
call a%sync()
@@ -4330,13 +4323,8 @@ subroutine psb_lz_csc_clean_zeros(a, info)
j = a%icp(1)
do i=1, nc
do k = ilcp(i), ilcp(i+1) -1
- if (a%val(k) /= zzero) then
- cpy = .true.
- else
- ! Always keep the diagonal, even if numerically zero
- cpy = (i == a%ia(k))
- end if
- if (cpy) then
+ ! Always keep the diagonal, even if numerically zero
+ if ((a%val(k) /= zzero).or.(i == a%ia(k))) then
a%val(j) = a%val(k)
a%ia(j) = a%ia(k)
j = j + 1
diff --git a/base/serial/impl/psb_z_csr_impl.F90 b/base/serial/impl/psb_z_csr_impl.F90
index 54659def..e2ddf0d7 100644
--- a/base/serial/impl/psb_z_csr_impl.F90
+++ b/base/serial/impl/psb_z_csr_impl.F90
@@ -3633,7 +3633,6 @@ subroutine psb_z_csr_clean_zeros(a, info)
!
integer(psb_ipk_) :: i, j, k, nr
integer(psb_ipk_), allocatable :: ilrp(:)
- logical :: cpy
info = 0
call a%sync()
@@ -3643,13 +3642,8 @@ subroutine psb_z_csr_clean_zeros(a, info)
j = a%irp(1)
do i=1, nr
do k = ilrp(i), ilrp(i+1) -1
- if (a%val(k) /= zzero) then
- cpy = .true.
- else
- ! Always keep the diagonal, even if numerically zero
- cpy = (i == a%ja(k))
- end if
- if (cpy) then
+ ! Always keep the diagonal, even if numerically zero
+ if ((a%val(k) /= zzero).or.(i == a%ja(k))) then
a%val(j) = a%val(k)
a%ja(j) = a%ja(k)
j = j + 1
@@ -6559,7 +6553,6 @@ subroutine psb_lz_csr_clean_zeros(a, info)
!
integer(psb_lpk_) :: i, j, k, nr
integer(psb_lpk_), allocatable :: ilrp(:)
- logical :: cpy
info = 0
call a%sync()
@@ -6569,13 +6562,8 @@ subroutine psb_lz_csr_clean_zeros(a, info)
j = a%irp(1)
do i=1, nr
do k = ilrp(i), ilrp(i+1) -1
- if (a%val(k) /= zzero) then
- cpy = .true.
- else
- ! Always keep the diagonal, even if numerically zero
- cpy = (i == a%ja(k))
- end if
- if (cpy) then
+ ! Always keep the diagonal, even if numerically zero
+ if ((a%val(k) /= zzero).or.(i == a%ja(k))) then
a%val(j) = a%val(k)
a%ja(j) = a%ja(k)
j = j + 1
From 3d9fee2dd7f65b0bcf72c0dce46b9908a5502bd1 Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Tue, 8 Oct 2024 17:07:10 +0200
Subject: [PATCH 50/86] Fix DOT on CUDA vectors.
---
cuda/dvectordev.c | 3 ++-
cuda/psb_c_cuda_vect_mod.F90 | 22 +++-------------------
cuda/psb_d_cuda_vect_mod.F90 | 18 +-----------------
cuda/psb_i_cuda_vect_mod.F90 | 12 ------------
cuda/psb_s_cuda_vect_mod.F90 | 22 +++-------------------
cuda/psb_z_cuda_vect_mod.F90 | 22 +++-------------------
cuda/svectordev.c | 3 ++-
cuda/zvectordev.c | 3 ++-
8 files changed, 16 insertions(+), 89 deletions(-)
diff --git a/cuda/dvectordev.c b/cuda/dvectordev.c
index a69e1b71..d4f5513b 100644
--- a/cuda/dvectordev.c
+++ b/cuda/dvectordev.c
@@ -220,7 +220,8 @@ int dotMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA, void* devM
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB;
spgpuHandle_t handle=psb_cudaGetHandle();
- spgpuDmdot(handle, y_res, n, (double*)devVecA->v_, (double*)devVecB->v_,devVecA->count_,devVecB->pitch_);
+ spgpuDmdot(handle, y_res, n, (double*)devVecA->v_, (double*)devVecB->v_,
+ devVecA->count_,devVecB->pitch_);
return(i);
}
diff --git a/cuda/psb_c_cuda_vect_mod.F90 b/cuda/psb_c_cuda_vect_mod.F90
index 45fafe0a..9755b386 100644
--- a/cuda/psb_c_cuda_vect_mod.F90
+++ b/cuda/psb_c_cuda_vect_mod.F90
@@ -813,18 +813,6 @@ contains
call x%set_dev()
end subroutine c_cuda_set_scal
-!!$
-!!$ subroutine c_cuda_set_vect(x,val)
-!!$ class(psb_c_vect_cuda), intent(inout) :: x
-!!$ complex(psb_spk_), intent(in) :: val(:)
-!!$ integer(psb_ipk_) :: nr
-!!$ integer(psb_ipk_) :: info
-!!$
-!!$ if (x%is_dev()) call x%sync()
-!!$ call x%psb_c_base_vect_type%set_vect(val)
-!!$ call x%set_host()
-!!$
-!!$ end subroutine c_cuda_set_vect
@@ -834,7 +822,6 @@ contains
class(psb_c_base_vect_type), intent(inout) :: y
integer(psb_ipk_), intent(in) :: n
complex(psb_spk_) :: res
- complex(psb_spk_), external :: ddot
integer(psb_ipk_) :: info
res = czero
@@ -844,9 +831,6 @@ contains
! TYPE psb_c_vect
!
select type(yy => y)
- type is (psb_c_base_vect_type)
- if (x%is_dev()) call x%sync()
- res = ddot(n,x%v,1,yy%v,1)
type is (psb_c_vect_cuda)
if (x%is_host()) call x%sync()
if (yy%is_host()) call yy%sync()
@@ -858,7 +842,7 @@ contains
class default
! y%sync is done in dot_a
- call x%sync()
+ if (x%is_dev()) call x%sync()
res = y%dot(n,x%v)
end select
@@ -870,10 +854,10 @@ contains
complex(psb_spk_), intent(in) :: y(:)
integer(psb_ipk_), intent(in) :: n
complex(psb_spk_) :: res
- complex(psb_spk_), external :: ddot
+ complex(psb_spk_), external :: cdot
if (x%is_dev()) call x%sync()
- res = ddot(n,y,1,x%v,1)
+ res = cdot(n,y,1,x%v,1)
end function c_cuda_dot_a
diff --git a/cuda/psb_d_cuda_vect_mod.F90 b/cuda/psb_d_cuda_vect_mod.F90
index e7e563ff..dfa83c60 100644
--- a/cuda/psb_d_cuda_vect_mod.F90
+++ b/cuda/psb_d_cuda_vect_mod.F90
@@ -813,18 +813,6 @@ contains
call x%set_dev()
end subroutine d_cuda_set_scal
-!!$
-!!$ subroutine d_cuda_set_vect(x,val)
-!!$ class(psb_d_vect_cuda), intent(inout) :: x
-!!$ real(psb_dpk_), intent(in) :: val(:)
-!!$ integer(psb_ipk_) :: nr
-!!$ integer(psb_ipk_) :: info
-!!$
-!!$ if (x%is_dev()) call x%sync()
-!!$ call x%psb_d_base_vect_type%set_vect(val)
-!!$ call x%set_host()
-!!$
-!!$ end subroutine d_cuda_set_vect
@@ -834,7 +822,6 @@ contains
class(psb_d_base_vect_type), intent(inout) :: y
integer(psb_ipk_), intent(in) :: n
real(psb_dpk_) :: res
- real(psb_dpk_), external :: ddot
integer(psb_ipk_) :: info
res = dzero
@@ -844,9 +831,6 @@ contains
! TYPE psb_d_vect
!
select type(yy => y)
- type is (psb_d_base_vect_type)
- if (x%is_dev()) call x%sync()
- res = ddot(n,x%v,1,yy%v,1)
type is (psb_d_vect_cuda)
if (x%is_host()) call x%sync()
if (yy%is_host()) call yy%sync()
@@ -858,7 +842,7 @@ contains
class default
! y%sync is done in dot_a
- call x%sync()
+ if (x%is_dev()) call x%sync()
res = y%dot(n,x%v)
end select
diff --git a/cuda/psb_i_cuda_vect_mod.F90 b/cuda/psb_i_cuda_vect_mod.F90
index 461d84d1..4be4679c 100644
--- a/cuda/psb_i_cuda_vect_mod.F90
+++ b/cuda/psb_i_cuda_vect_mod.F90
@@ -795,18 +795,6 @@ contains
call x%set_dev()
end subroutine i_cuda_set_scal
-!!$
-!!$ subroutine i_cuda_set_vect(x,val)
-!!$ class(psb_i_vect_cuda), intent(inout) :: x
-!!$ integer(psb_ipk_), intent(in) :: val(:)
-!!$ integer(psb_ipk_) :: nr
-!!$ integer(psb_ipk_) :: info
-!!$
-!!$ if (x%is_dev()) call x%sync()
-!!$ call x%psb_i_base_vect_type%set_vect(val)
-!!$ call x%set_host()
-!!$
-!!$ end subroutine i_cuda_set_vect
diff --git a/cuda/psb_s_cuda_vect_mod.F90 b/cuda/psb_s_cuda_vect_mod.F90
index a2c69934..39a108ab 100644
--- a/cuda/psb_s_cuda_vect_mod.F90
+++ b/cuda/psb_s_cuda_vect_mod.F90
@@ -813,18 +813,6 @@ contains
call x%set_dev()
end subroutine s_cuda_set_scal
-!!$
-!!$ subroutine s_cuda_set_vect(x,val)
-!!$ class(psb_s_vect_cuda), intent(inout) :: x
-!!$ real(psb_spk_), intent(in) :: val(:)
-!!$ integer(psb_ipk_) :: nr
-!!$ integer(psb_ipk_) :: info
-!!$
-!!$ if (x%is_dev()) call x%sync()
-!!$ call x%psb_s_base_vect_type%set_vect(val)
-!!$ call x%set_host()
-!!$
-!!$ end subroutine s_cuda_set_vect
@@ -834,7 +822,6 @@ contains
class(psb_s_base_vect_type), intent(inout) :: y
integer(psb_ipk_), intent(in) :: n
real(psb_spk_) :: res
- real(psb_spk_), external :: ddot
integer(psb_ipk_) :: info
res = szero
@@ -844,9 +831,6 @@ contains
! TYPE psb_s_vect
!
select type(yy => y)
- type is (psb_s_base_vect_type)
- if (x%is_dev()) call x%sync()
- res = ddot(n,x%v,1,yy%v,1)
type is (psb_s_vect_cuda)
if (x%is_host()) call x%sync()
if (yy%is_host()) call yy%sync()
@@ -858,7 +842,7 @@ contains
class default
! y%sync is done in dot_a
- call x%sync()
+ if (x%is_dev()) call x%sync()
res = y%dot(n,x%v)
end select
@@ -870,10 +854,10 @@ contains
real(psb_spk_), intent(in) :: y(:)
integer(psb_ipk_), intent(in) :: n
real(psb_spk_) :: res
- real(psb_spk_), external :: ddot
+ real(psb_spk_), external :: sdot
if (x%is_dev()) call x%sync()
- res = ddot(n,y,1,x%v,1)
+ res = sdot(n,y,1,x%v,1)
end function s_cuda_dot_a
diff --git a/cuda/psb_z_cuda_vect_mod.F90 b/cuda/psb_z_cuda_vect_mod.F90
index dfeafa6e..d4318bea 100644
--- a/cuda/psb_z_cuda_vect_mod.F90
+++ b/cuda/psb_z_cuda_vect_mod.F90
@@ -813,18 +813,6 @@ contains
call x%set_dev()
end subroutine z_cuda_set_scal
-!!$
-!!$ subroutine z_cuda_set_vect(x,val)
-!!$ class(psb_z_vect_cuda), intent(inout) :: x
-!!$ complex(psb_dpk_), intent(in) :: val(:)
-!!$ integer(psb_ipk_) :: nr
-!!$ integer(psb_ipk_) :: info
-!!$
-!!$ if (x%is_dev()) call x%sync()
-!!$ call x%psb_z_base_vect_type%set_vect(val)
-!!$ call x%set_host()
-!!$
-!!$ end subroutine z_cuda_set_vect
@@ -834,7 +822,6 @@ contains
class(psb_z_base_vect_type), intent(inout) :: y
integer(psb_ipk_), intent(in) :: n
complex(psb_dpk_) :: res
- complex(psb_dpk_), external :: ddot
integer(psb_ipk_) :: info
res = zzero
@@ -844,9 +831,6 @@ contains
! TYPE psb_z_vect
!
select type(yy => y)
- type is (psb_z_base_vect_type)
- if (x%is_dev()) call x%sync()
- res = ddot(n,x%v,1,yy%v,1)
type is (psb_z_vect_cuda)
if (x%is_host()) call x%sync()
if (yy%is_host()) call yy%sync()
@@ -858,7 +842,7 @@ contains
class default
! y%sync is done in dot_a
- call x%sync()
+ if (x%is_dev()) call x%sync()
res = y%dot(n,x%v)
end select
@@ -870,10 +854,10 @@ contains
complex(psb_dpk_), intent(in) :: y(:)
integer(psb_ipk_), intent(in) :: n
complex(psb_dpk_) :: res
- complex(psb_dpk_), external :: ddot
+ complex(psb_dpk_), external :: zdot
if (x%is_dev()) call x%sync()
- res = ddot(n,y,1,x%v,1)
+ res = zdot(n,y,1,x%v,1)
end function z_cuda_dot_a
diff --git a/cuda/svectordev.c b/cuda/svectordev.c
index cfaef5ce..ab4dd01b 100644
--- a/cuda/svectordev.c
+++ b/cuda/svectordev.c
@@ -220,7 +220,8 @@ int dotMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA, void* devMul
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB;
spgpuHandle_t handle=psb_cudaGetHandle();
- spgpuSmdot(handle, y_res, n, (float*)devVecA->v_, (float*)devVecB->v_,devVecA->count_,devVecB->pitch_);
+ spgpuSmdot(handle, y_res, n, (float*)devVecA->v_, (float*)devVecB->v_,
+ devVecA->count_,devVecB->pitch_);
return(i);
}
diff --git a/cuda/zvectordev.c b/cuda/zvectordev.c
index d7d88f1b..3a5b0738 100644
--- a/cuda/zvectordev.c
+++ b/cuda/zvectordev.c
@@ -223,7 +223,8 @@ int scalMultiVecDeviceDoubleComplex(cuDoubleComplex alpha, void* devMultiVecA)
return(i);
}
-int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA, void* devMultiVecB)
+int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n,
+ void* devMultiVecA, void* devMultiVecB)
{int i=0;
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB;
From 49469ce021df8e9c15f23d14a6d1929801b6d34d Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Tue, 8 Oct 2024 17:36:44 +0200
Subject: [PATCH 51/86] Various changes into openacc
---
openacc/psb_c_oacc_csr_mat_mod.F90 | 6 +--
openacc/psb_c_oacc_ell_mat_mod.F90 | 8 +--
openacc/psb_c_oacc_hll_mat_mod.F90 | 10 ++--
openacc/psb_c_oacc_vect_mod.F90 | 80 ++++++++++++++++++------------
openacc/psb_d_oacc_csr_mat_mod.F90 | 6 +--
openacc/psb_d_oacc_ell_mat_mod.F90 | 8 +--
openacc/psb_d_oacc_hll_mat_mod.F90 | 10 ++--
openacc/psb_d_oacc_vect_mod.F90 | 80 ++++++++++++++++++------------
openacc/psb_i_oacc_vect_mod.F90 | 58 ++++++++++++++--------
openacc/psb_l_oacc_vect_mod.F90 | 58 ++++++++++++++--------
openacc/psb_s_oacc_csr_mat_mod.F90 | 6 +--
openacc/psb_s_oacc_ell_mat_mod.F90 | 8 +--
openacc/psb_s_oacc_hll_mat_mod.F90 | 10 ++--
openacc/psb_s_oacc_vect_mod.F90 | 80 ++++++++++++++++++------------
openacc/psb_z_oacc_csr_mat_mod.F90 | 6 +--
openacc/psb_z_oacc_ell_mat_mod.F90 | 8 +--
openacc/psb_z_oacc_hll_mat_mod.F90 | 10 ++--
openacc/psb_z_oacc_vect_mod.F90 | 80 ++++++++++++++++++------------
18 files changed, 314 insertions(+), 218 deletions(-)
diff --git a/openacc/psb_c_oacc_csr_mat_mod.F90 b/openacc/psb_c_oacc_csr_mat_mod.F90
index 94edc5e9..c6461fe3 100644
--- a/openacc/psb_c_oacc_csr_mat_mod.F90
+++ b/openacc/psb_c_oacc_csr_mat_mod.F90
@@ -257,9 +257,9 @@ contains
! but with size 0, then CREATE,UPDATE and DELETE
! will fail
!
- if (psb_size(a%val)>0) call acc_create(a%val)
- if (psb_size(a%ja)>0) call acc_create(a%ja)
- if (psb_size(a%irp)>0) call acc_create(a%irp)
+ if (psb_size(a%val)>0) call acc_copyin(a%val)
+ if (psb_size(a%ja)>0) call acc_copyin(a%ja)
+ if (psb_size(a%irp)>0) call acc_copyin(a%irp)
end subroutine c_oacc_csr_sync_dev_space
subroutine c_oacc_csr_sync(a)
diff --git a/openacc/psb_c_oacc_ell_mat_mod.F90 b/openacc/psb_c_oacc_ell_mat_mod.F90
index f0b9779b..b2168646 100644
--- a/openacc/psb_c_oacc_ell_mat_mod.F90
+++ b/openacc/psb_c_oacc_ell_mat_mod.F90
@@ -186,10 +186,10 @@ contains
! but with size 0, then CREATE,UPDATE and DELETE
! will fail
!
- if (psb_size(a%val)>0) call acc_create(a%val)
- if (psb_size(a%ja)>0) call acc_create(a%ja)
- if (psb_size(a%irn)>0) call acc_create(a%irn)
- if (psb_size(a%idiag)>0) call acc_create(a%idiag)
+ if (psb_size(a%val)>0) call acc_copyin(a%val)
+ if (psb_size(a%ja)>0) call acc_copyin(a%ja)
+ if (psb_size(a%irn)>0) call acc_copyin(a%irn)
+ if (psb_size(a%idiag)>0) call acc_copyin(a%idiag)
end subroutine c_oacc_ell_sync_dev_space
function c_oacc_ell_is_host(a) result(res)
diff --git a/openacc/psb_c_oacc_hll_mat_mod.F90 b/openacc/psb_c_oacc_hll_mat_mod.F90
index 98c6a2ee..f8c19275 100644
--- a/openacc/psb_c_oacc_hll_mat_mod.F90
+++ b/openacc/psb_c_oacc_hll_mat_mod.F90
@@ -240,11 +240,11 @@ contains
! but with size 0, then CREATE,UPDATE and DELETE
! will fail
!
- if (psb_size(a%val)>0) call acc_create(a%val)
- if (psb_size(a%ja)>0) call acc_create(a%ja)
- if (psb_size(a%irn)>0) call acc_create(a%irn)
- if (psb_size(a%idiag)>0) call acc_create(a%idiag)
- if (psb_size(a%hkoffs)>0) call acc_create(a%hkoffs)
+ if (psb_size(a%val)>0) call acc_copyin(a%val)
+ if (psb_size(a%ja)>0) call acc_copyin(a%ja)
+ if (psb_size(a%irn)>0) call acc_copyin(a%irn)
+ if (psb_size(a%idiag)>0) call acc_copyin(a%idiag)
+ if (psb_size(a%hkoffs)>0) call acc_copyin(a%hkoffs)
end subroutine c_oacc_hll_sync_dev_space
diff --git a/openacc/psb_c_oacc_vect_mod.F90 b/openacc/psb_c_oacc_vect_mod.F90
index e479f58d..40437184 100644
--- a/openacc/psb_c_oacc_vect_mod.F90
+++ b/openacc/psb_c_oacc_vect_mod.F90
@@ -113,7 +113,7 @@ contains
complex(psb_spk_), intent(inout) :: x(:)
integer(psb_ipk_) :: n
integer(psb_ipk_) :: i
- !$acc parallel loop
+ !$acc parallel loop present(x)
do i = 1, n
x(i) = abs(x(i))
end do
@@ -144,7 +144,7 @@ contains
complex(psb_spk_), intent(inout) :: x(:),y(:)
integer(psb_ipk_) :: n
integer(psb_ipk_) :: i
- !$acc parallel loop
+ !$acc parallel loop present(x,y)
do i = 1, n
y(i) = abs(x(i))
end do
@@ -164,7 +164,7 @@ contains
complex(psb_spk_), intent(in) :: alpha
complex(psb_spk_), intent(inout) :: x(:)
integer(psb_ipk_) :: i
- !$acc parallel loop
+ !$acc parallel loop present(x)
do i = 1, size(x)
x(i) = alpha * x(i)
end do
@@ -189,7 +189,7 @@ contains
real(psb_spk_) :: sum, mx
integer(psb_ipk_) :: i
mx = szero
- !$acc parallel loop reduction(max:mx)
+ !$acc parallel loop reduction(max:mx) present(x)
do i = 1, n
if (abs(x(i)) > mx) mx = abs(x(i))
end do
@@ -197,7 +197,7 @@ contains
res = mx
else
sum = szero
- !$acc parallel loop reduction(+:sum)
+ !$acc parallel loop reduction(+:sum) present(x)
do i = 1, n
sum = sum + abs(x(i)/mx)**2
end do
@@ -223,7 +223,7 @@ contains
real(psb_spk_) :: max_val
integer(psb_ipk_) :: i
max_val = szero
- !$acc parallel loop reduction(max:max_val)
+ !$acc parallel loop reduction(max:max_val) present(x)
do i = 1, n
if (abs(x(i)) > max_val) max_val = abs(x(i))
end do
@@ -248,7 +248,7 @@ contains
real(psb_spk_) :: res
integer(psb_ipk_) :: i
res = szero
- !$acc parallel loop reduction(+:res)
+ !$acc parallel loop reduction(+:res) present(x)
do i = 1, n
res = res + abs(x(i))
end do
@@ -265,7 +265,7 @@ contains
info = 0
if (y%is_dev()) call y%sync()
- !$acc parallel loop
+ !$acc parallel loop present(x,y)
do i = 1, size(x)
y%v(i) = y%v(i) * x(i)
end do
@@ -283,7 +283,7 @@ contains
info = 0
if (z%is_dev()) call z%sync()
- !$acc parallel loop
+ !$acc parallel loop present(x,y,z%v)
do i = 1, size(x)
z%v(i) = alpha * x(i) * y(i) + beta * z%v(i)
end do
@@ -327,7 +327,7 @@ contains
complex(psb_spk_), intent(inout) :: y(:)
complex(psb_spk_), intent(in) :: alpha, beta
integer(psb_ipk_), intent(out) :: info
- !$acc parallel
+ !$acc parallel present(x,y)
!$acc loop
do i = 1, m
y(i) = alpha * x(i) + beta * y(i)
@@ -384,7 +384,7 @@ contains
if ((nx < m) .or. (ny < m) .or. (nz < m)) then
info = psb_err_internal_error_
else
- !$acc parallel loop
+ !$acc parallel loop present(xx%v,yy%v,zz%v)
do i = 1, m
yy%v(i) = alpha * xx%v(i) + beta * yy%v(i)
zz%v(i) = gamma * yy%v(i) + delta * zz%v(i)
@@ -416,6 +416,7 @@ contains
integer(psb_ipk_) :: info, k
logical :: acc_done
if (.not.allocated(y%combuf)) then
+ write(0,*) 'allocation error for y%combuf '
call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
return
end if
@@ -443,8 +444,8 @@ contains
integer(psb_ipk_) :: n, idx(:)
complex(psb_spk_) :: beta,x(:), y(:)
integer(psb_ipk_) :: k
- !$acc update device(x(1:n)) async
- !$acc parallel loop
+ !$acc update device(x(1:n))
+ !$acc parallel loop present(x,y)
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
end do
@@ -488,8 +489,8 @@ contains
integer(psb_ipk_) :: n, idx(:)
complex(psb_spk_) :: beta, x(:), y(:)
integer(psb_ipk_) :: k
- !$acc update device(x(1:n)) async
- !$acc parallel loop
+ !$acc update device(x(1:n))
+ !$acc parallel loop present(x,y)
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
end do
@@ -531,6 +532,7 @@ contains
acc_done = .false.
if (.not.allocated(x%combuf)) then
+ write(0,*) 'oacc allocation error combuf gthzbuf '
call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
return
end if
@@ -556,13 +558,13 @@ contains
integer(psb_ipk_) :: n, idx(:)
complex(psb_spk_) :: x(:), y(:)
integer(psb_ipk_) :: k
-
- !$acc parallel loop present(y)
+ !
+ !$acc parallel loop present(x,y)
do k = 1, n
y(k) = x(idx(k))
end do
!$acc end parallel loop
- !$acc update self(y(1:n)) async
+ !$acc update self(y(1:n))
end subroutine inner_gth
end subroutine c_oacc_gthzbuf
@@ -600,13 +602,13 @@ contains
integer(psb_ipk_) :: n, idx(:)
complex(psb_spk_) :: x(:), y(:)
integer(psb_ipk_) :: k
-
- !$acc parallel loop present(y)
+ !
+ !$acc parallel loop present(x,y)
do k = 1, n
y(k) = x(idx(k))
end do
!$acc end parallel loop
- !$acc update self(y(1:n)) async
+ !$acc update self(y(1:n))
end subroutine inner_gth
end subroutine c_oacc_gthzv_x
@@ -633,7 +635,7 @@ contains
if (vval%is_host()) call vval%sync()
if (virl%is_host()) call virl%sync()
if (x%is_host()) call x%sync()
- !$acc parallel loop
+ !$acc parallel loop present(x%v,virl%v,vval%v)
do i = 1, n
x%v(virl%v(i)) = vval%v(i)
end do
@@ -757,7 +759,7 @@ contains
if (present(first)) first_ = max(1, first)
if (present(last)) last_ = min(last, last_)
- !$acc parallel loop
+ !$acc parallel loop present(x%v)
do i = first_, last_
x%v(i) = val
end do
@@ -849,26 +851,36 @@ contains
class(psb_c_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n
integer(psb_ipk_), intent(out) :: info
- if (n /= psb_size(x%combuf)) then
+
+ !write(0,*) 'oacc new_buffer',n,psb_size(x%combuf)
+ if (n > psb_size(x%combuf)) then
+ !write(0,*) 'oacc new_buffer: reallocating '
+ if (allocated(x%combuf)) then
+ !if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
+ !$acc exit data delete(x%combuf)
+ end if
call x%psb_c_base_vect_type%new_buffer(n,info)
!$acc enter data copyin(x%combuf)
+ ! call acc_copyin(x%combuf)
end if
end subroutine c_oacc_new_buffer
subroutine c_oacc_sync_dev_space(x)
implicit none
class(psb_c_vect_oacc), intent(inout) :: x
- if (allocated(x%v)) call acc_create(x%v)
+!!$ write(0,*) 'oacc sync_dev_space'
+ if (psb_size(x%v)>0) call acc_copyin(x%v)
end subroutine c_oacc_sync_dev_space
subroutine c_oacc_sync(x)
implicit none
class(psb_c_vect_oacc), intent(inout) :: x
if (x%is_dev()) then
- call acc_update_self(x%v)
+ if (psb_size(x%v)>0) call acc_update_self(x%v)
end if
if (x%is_host()) then
- call acc_update_device(x%v)
+ if (.not.acc_is_present(x%v)) call c_oacc_sync_dev_space(x)
+ if (psb_size(x%v)>0) call acc_update_device(x%v)
end if
call x%set_sync()
end subroutine c_oacc_sync
@@ -941,6 +953,8 @@ contains
type(psb_c_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
info = 0
+!!$ write(0,*) 'oacc final_vect_free'
+ call x%free_buffer(info)
if (allocated(x%v)) then
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
deallocate(x%v, stat=info)
@@ -953,8 +967,9 @@ contains
class(psb_c_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
+!!$ write(0,*) 'oacc vect_free'
+ call x%free_buffer(info)
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
- if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
call x%psb_c_base_vect_type%free(info)
end subroutine c_oacc_vect_free
@@ -964,8 +979,10 @@ contains
integer(psb_ipk_), intent(out) :: info
info = 0
- if (psb_oacc_get_maybe_free_buffer())&
- & call x%free_buffer(info)
+ if (psb_oacc_get_maybe_free_buffer()) then
+ !write(0,*) 'psb_oacc_get_maybe_free_buffer() ',psb_oacc_get_maybe_free_buffer()
+ call x%free_buffer(info)
+ end if
end subroutine c_oacc_vect_maybe_free_buffer
@@ -973,7 +990,7 @@ contains
implicit none
class(psb_c_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
-
+! write(0,*) 'oacc free_buffer'
info = 0
if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
call x%psb_c_base_vect_type%free_buffer(info)
@@ -985,7 +1002,6 @@ contains
class(psb_c_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: res
- if (x%is_dev()) call x%sync()
res = size(x%v)
end function c_oacc_get_size
diff --git a/openacc/psb_d_oacc_csr_mat_mod.F90 b/openacc/psb_d_oacc_csr_mat_mod.F90
index a3119b64..21907312 100644
--- a/openacc/psb_d_oacc_csr_mat_mod.F90
+++ b/openacc/psb_d_oacc_csr_mat_mod.F90
@@ -257,9 +257,9 @@ contains
! but with size 0, then CREATE,UPDATE and DELETE
! will fail
!
- if (psb_size(a%val)>0) call acc_create(a%val)
- if (psb_size(a%ja)>0) call acc_create(a%ja)
- if (psb_size(a%irp)>0) call acc_create(a%irp)
+ if (psb_size(a%val)>0) call acc_copyin(a%val)
+ if (psb_size(a%ja)>0) call acc_copyin(a%ja)
+ if (psb_size(a%irp)>0) call acc_copyin(a%irp)
end subroutine d_oacc_csr_sync_dev_space
subroutine d_oacc_csr_sync(a)
diff --git a/openacc/psb_d_oacc_ell_mat_mod.F90 b/openacc/psb_d_oacc_ell_mat_mod.F90
index 3932e286..021face3 100644
--- a/openacc/psb_d_oacc_ell_mat_mod.F90
+++ b/openacc/psb_d_oacc_ell_mat_mod.F90
@@ -186,10 +186,10 @@ contains
! but with size 0, then CREATE,UPDATE and DELETE
! will fail
!
- if (psb_size(a%val)>0) call acc_create(a%val)
- if (psb_size(a%ja)>0) call acc_create(a%ja)
- if (psb_size(a%irn)>0) call acc_create(a%irn)
- if (psb_size(a%idiag)>0) call acc_create(a%idiag)
+ if (psb_size(a%val)>0) call acc_copyin(a%val)
+ if (psb_size(a%ja)>0) call acc_copyin(a%ja)
+ if (psb_size(a%irn)>0) call acc_copyin(a%irn)
+ if (psb_size(a%idiag)>0) call acc_copyin(a%idiag)
end subroutine d_oacc_ell_sync_dev_space
function d_oacc_ell_is_host(a) result(res)
diff --git a/openacc/psb_d_oacc_hll_mat_mod.F90 b/openacc/psb_d_oacc_hll_mat_mod.F90
index fd0fe54f..264bbcce 100644
--- a/openacc/psb_d_oacc_hll_mat_mod.F90
+++ b/openacc/psb_d_oacc_hll_mat_mod.F90
@@ -240,11 +240,11 @@ contains
! but with size 0, then CREATE,UPDATE and DELETE
! will fail
!
- if (psb_size(a%val)>0) call acc_create(a%val)
- if (psb_size(a%ja)>0) call acc_create(a%ja)
- if (psb_size(a%irn)>0) call acc_create(a%irn)
- if (psb_size(a%idiag)>0) call acc_create(a%idiag)
- if (psb_size(a%hkoffs)>0) call acc_create(a%hkoffs)
+ if (psb_size(a%val)>0) call acc_copyin(a%val)
+ if (psb_size(a%ja)>0) call acc_copyin(a%ja)
+ if (psb_size(a%irn)>0) call acc_copyin(a%irn)
+ if (psb_size(a%idiag)>0) call acc_copyin(a%idiag)
+ if (psb_size(a%hkoffs)>0) call acc_copyin(a%hkoffs)
end subroutine d_oacc_hll_sync_dev_space
diff --git a/openacc/psb_d_oacc_vect_mod.F90 b/openacc/psb_d_oacc_vect_mod.F90
index 7fd2a441..84441c8a 100644
--- a/openacc/psb_d_oacc_vect_mod.F90
+++ b/openacc/psb_d_oacc_vect_mod.F90
@@ -113,7 +113,7 @@ contains
real(psb_dpk_), intent(inout) :: x(:)
integer(psb_ipk_) :: n
integer(psb_ipk_) :: i
- !$acc parallel loop
+ !$acc parallel loop present(x)
do i = 1, n
x(i) = abs(x(i))
end do
@@ -144,7 +144,7 @@ contains
real(psb_dpk_), intent(inout) :: x(:),y(:)
integer(psb_ipk_) :: n
integer(psb_ipk_) :: i
- !$acc parallel loop
+ !$acc parallel loop present(x,y)
do i = 1, n
y(i) = abs(x(i))
end do
@@ -164,7 +164,7 @@ contains
real(psb_dpk_), intent(in) :: alpha
real(psb_dpk_), intent(inout) :: x(:)
integer(psb_ipk_) :: i
- !$acc parallel loop
+ !$acc parallel loop present(x)
do i = 1, size(x)
x(i) = alpha * x(i)
end do
@@ -189,7 +189,7 @@ contains
real(psb_dpk_) :: sum, mx
integer(psb_ipk_) :: i
mx = dzero
- !$acc parallel loop reduction(max:mx)
+ !$acc parallel loop reduction(max:mx) present(x)
do i = 1, n
if (abs(x(i)) > mx) mx = abs(x(i))
end do
@@ -197,7 +197,7 @@ contains
res = mx
else
sum = dzero
- !$acc parallel loop reduction(+:sum)
+ !$acc parallel loop reduction(+:sum) present(x)
do i = 1, n
sum = sum + abs(x(i)/mx)**2
end do
@@ -223,7 +223,7 @@ contains
real(psb_dpk_) :: max_val
integer(psb_ipk_) :: i
max_val = dzero
- !$acc parallel loop reduction(max:max_val)
+ !$acc parallel loop reduction(max:max_val) present(x)
do i = 1, n
if (abs(x(i)) > max_val) max_val = abs(x(i))
end do
@@ -248,7 +248,7 @@ contains
real(psb_dpk_) :: res
integer(psb_ipk_) :: i
res = dzero
- !$acc parallel loop reduction(+:res)
+ !$acc parallel loop reduction(+:res) present(x)
do i = 1, n
res = res + abs(x(i))
end do
@@ -265,7 +265,7 @@ contains
info = 0
if (y%is_dev()) call y%sync()
- !$acc parallel loop
+ !$acc parallel loop present(x,y)
do i = 1, size(x)
y%v(i) = y%v(i) * x(i)
end do
@@ -283,7 +283,7 @@ contains
info = 0
if (z%is_dev()) call z%sync()
- !$acc parallel loop
+ !$acc parallel loop present(x,y,z%v)
do i = 1, size(x)
z%v(i) = alpha * x(i) * y(i) + beta * z%v(i)
end do
@@ -327,7 +327,7 @@ contains
real(psb_dpk_), intent(inout) :: y(:)
real(psb_dpk_), intent(in) :: alpha, beta
integer(psb_ipk_), intent(out) :: info
- !$acc parallel
+ !$acc parallel present(x,y)
!$acc loop
do i = 1, m
y(i) = alpha * x(i) + beta * y(i)
@@ -384,7 +384,7 @@ contains
if ((nx < m) .or. (ny < m) .or. (nz < m)) then
info = psb_err_internal_error_
else
- !$acc parallel loop
+ !$acc parallel loop present(xx%v,yy%v,zz%v)
do i = 1, m
yy%v(i) = alpha * xx%v(i) + beta * yy%v(i)
zz%v(i) = gamma * yy%v(i) + delta * zz%v(i)
@@ -416,6 +416,7 @@ contains
integer(psb_ipk_) :: info, k
logical :: acc_done
if (.not.allocated(y%combuf)) then
+ write(0,*) 'allocation error for y%combuf '
call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
return
end if
@@ -443,8 +444,8 @@ contains
integer(psb_ipk_) :: n, idx(:)
real(psb_dpk_) :: beta,x(:), y(:)
integer(psb_ipk_) :: k
- !$acc update device(x(1:n)) async
- !$acc parallel loop
+ !$acc update device(x(1:n))
+ !$acc parallel loop present(x,y)
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
end do
@@ -488,8 +489,8 @@ contains
integer(psb_ipk_) :: n, idx(:)
real(psb_dpk_) :: beta, x(:), y(:)
integer(psb_ipk_) :: k
- !$acc update device(x(1:n)) async
- !$acc parallel loop
+ !$acc update device(x(1:n))
+ !$acc parallel loop present(x,y)
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
end do
@@ -531,6 +532,7 @@ contains
acc_done = .false.
if (.not.allocated(x%combuf)) then
+ write(0,*) 'oacc allocation error combuf gthzbuf '
call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
return
end if
@@ -556,13 +558,13 @@ contains
integer(psb_ipk_) :: n, idx(:)
real(psb_dpk_) :: x(:), y(:)
integer(psb_ipk_) :: k
-
- !$acc parallel loop present(y)
+ !
+ !$acc parallel loop present(x,y)
do k = 1, n
y(k) = x(idx(k))
end do
!$acc end parallel loop
- !$acc update self(y(1:n)) async
+ !$acc update self(y(1:n))
end subroutine inner_gth
end subroutine d_oacc_gthzbuf
@@ -600,13 +602,13 @@ contains
integer(psb_ipk_) :: n, idx(:)
real(psb_dpk_) :: x(:), y(:)
integer(psb_ipk_) :: k
-
- !$acc parallel loop present(y)
+ !
+ !$acc parallel loop present(x,y)
do k = 1, n
y(k) = x(idx(k))
end do
!$acc end parallel loop
- !$acc update self(y(1:n)) async
+ !$acc update self(y(1:n))
end subroutine inner_gth
end subroutine d_oacc_gthzv_x
@@ -633,7 +635,7 @@ contains
if (vval%is_host()) call vval%sync()
if (virl%is_host()) call virl%sync()
if (x%is_host()) call x%sync()
- !$acc parallel loop
+ !$acc parallel loop present(x%v,virl%v,vval%v)
do i = 1, n
x%v(virl%v(i)) = vval%v(i)
end do
@@ -757,7 +759,7 @@ contains
if (present(first)) first_ = max(1, first)
if (present(last)) last_ = min(last, last_)
- !$acc parallel loop
+ !$acc parallel loop present(x%v)
do i = first_, last_
x%v(i) = val
end do
@@ -849,26 +851,36 @@ contains
class(psb_d_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n
integer(psb_ipk_), intent(out) :: info
- if (n /= psb_size(x%combuf)) then
+
+ !write(0,*) 'oacc new_buffer',n,psb_size(x%combuf)
+ if (n > psb_size(x%combuf)) then
+ !write(0,*) 'oacc new_buffer: reallocating '
+ if (allocated(x%combuf)) then
+ !if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
+ !$acc exit data delete(x%combuf)
+ end if
call x%psb_d_base_vect_type%new_buffer(n,info)
!$acc enter data copyin(x%combuf)
+ ! call acc_copyin(x%combuf)
end if
end subroutine d_oacc_new_buffer
subroutine d_oacc_sync_dev_space(x)
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
- if (allocated(x%v)) call acc_create(x%v)
+!!$ write(0,*) 'oacc sync_dev_space'
+ if (psb_size(x%v)>0) call acc_copyin(x%v)
end subroutine d_oacc_sync_dev_space
subroutine d_oacc_sync(x)
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
if (x%is_dev()) then
- call acc_update_self(x%v)
+ if (psb_size(x%v)>0) call acc_update_self(x%v)
end if
if (x%is_host()) then
- call acc_update_device(x%v)
+ if (.not.acc_is_present(x%v)) call d_oacc_sync_dev_space(x)
+ if (psb_size(x%v)>0) call acc_update_device(x%v)
end if
call x%set_sync()
end subroutine d_oacc_sync
@@ -941,6 +953,8 @@ contains
type(psb_d_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
info = 0
+!!$ write(0,*) 'oacc final_vect_free'
+ call x%free_buffer(info)
if (allocated(x%v)) then
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
deallocate(x%v, stat=info)
@@ -953,8 +967,9 @@ contains
class(psb_d_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
+!!$ write(0,*) 'oacc vect_free'
+ call x%free_buffer(info)
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
- if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
call x%psb_d_base_vect_type%free(info)
end subroutine d_oacc_vect_free
@@ -964,8 +979,10 @@ contains
integer(psb_ipk_), intent(out) :: info
info = 0
- if (psb_oacc_get_maybe_free_buffer())&
- & call x%free_buffer(info)
+ if (psb_oacc_get_maybe_free_buffer()) then
+ !write(0,*) 'psb_oacc_get_maybe_free_buffer() ',psb_oacc_get_maybe_free_buffer()
+ call x%free_buffer(info)
+ end if
end subroutine d_oacc_vect_maybe_free_buffer
@@ -973,7 +990,7 @@ contains
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
-
+! write(0,*) 'oacc free_buffer'
info = 0
if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
call x%psb_d_base_vect_type%free_buffer(info)
@@ -985,7 +1002,6 @@ contains
class(psb_d_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: res
- if (x%is_dev()) call x%sync()
res = size(x%v)
end function d_oacc_get_size
diff --git a/openacc/psb_i_oacc_vect_mod.F90 b/openacc/psb_i_oacc_vect_mod.F90
index 455453a1..42cdc18c 100644
--- a/openacc/psb_i_oacc_vect_mod.F90
+++ b/openacc/psb_i_oacc_vect_mod.F90
@@ -70,6 +70,7 @@ contains
integer(psb_ipk_) :: info, k
logical :: acc_done
if (.not.allocated(y%combuf)) then
+ write(0,*) 'allocation error for y%combuf '
call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
return
end if
@@ -97,8 +98,8 @@ contains
integer(psb_ipk_) :: n, idx(:)
integer(psb_ipk_) :: beta,x(:), y(:)
integer(psb_ipk_) :: k
- !$acc update device(x(1:n)) async
- !$acc parallel loop
+ !$acc update device(x(1:n))
+ !$acc parallel loop present(x,y)
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
end do
@@ -142,8 +143,8 @@ contains
integer(psb_ipk_) :: n, idx(:)
integer(psb_ipk_) :: beta, x(:), y(:)
integer(psb_ipk_) :: k
- !$acc update device(x(1:n)) async
- !$acc parallel loop
+ !$acc update device(x(1:n))
+ !$acc parallel loop present(x,y)
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
end do
@@ -185,6 +186,7 @@ contains
acc_done = .false.
if (.not.allocated(x%combuf)) then
+ write(0,*) 'oacc allocation error combuf gthzbuf '
call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
return
end if
@@ -210,13 +212,13 @@ contains
integer(psb_ipk_) :: n, idx(:)
integer(psb_ipk_) :: x(:), y(:)
integer(psb_ipk_) :: k
-
- !$acc parallel loop present(y)
+ !
+ !$acc parallel loop present(x,y)
do k = 1, n
y(k) = x(idx(k))
end do
!$acc end parallel loop
- !$acc update self(y(1:n)) async
+ !$acc update self(y(1:n))
end subroutine inner_gth
end subroutine i_oacc_gthzbuf
@@ -254,13 +256,13 @@ contains
integer(psb_ipk_) :: n, idx(:)
integer(psb_ipk_) :: x(:), y(:)
integer(psb_ipk_) :: k
-
- !$acc parallel loop present(y)
+ !
+ !$acc parallel loop present(x,y)
do k = 1, n
y(k) = x(idx(k))
end do
!$acc end parallel loop
- !$acc update self(y(1:n)) async
+ !$acc update self(y(1:n))
end subroutine inner_gth
end subroutine i_oacc_gthzv_x
@@ -287,7 +289,7 @@ contains
if (vval%is_host()) call vval%sync()
if (virl%is_host()) call virl%sync()
if (x%is_host()) call x%sync()
- !$acc parallel loop
+ !$acc parallel loop present(x%v,virl%v,vval%v)
do i = 1, n
x%v(virl%v(i)) = vval%v(i)
end do
@@ -411,7 +413,7 @@ contains
if (present(first)) first_ = max(1, first)
if (present(last)) last_ = min(last, last_)
- !$acc parallel loop
+ !$acc parallel loop present(x%v)
do i = first_, last_
x%v(i) = val
end do
@@ -449,26 +451,36 @@ contains
class(psb_i_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n
integer(psb_ipk_), intent(out) :: info
- if (n /= psb_size(x%combuf)) then
+
+ !write(0,*) 'oacc new_buffer',n,psb_size(x%combuf)
+ if (n > psb_size(x%combuf)) then
+ !write(0,*) 'oacc new_buffer: reallocating '
+ if (allocated(x%combuf)) then
+ !if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
+ !$acc exit data delete(x%combuf)
+ end if
call x%psb_i_base_vect_type%new_buffer(n,info)
!$acc enter data copyin(x%combuf)
+ ! call acc_copyin(x%combuf)
end if
end subroutine i_oacc_new_buffer
subroutine i_oacc_sync_dev_space(x)
implicit none
class(psb_i_vect_oacc), intent(inout) :: x
- if (allocated(x%v)) call acc_create(x%v)
+!!$ write(0,*) 'oacc sync_dev_space'
+ if (psb_size(x%v)>0) call acc_copyin(x%v)
end subroutine i_oacc_sync_dev_space
subroutine i_oacc_sync(x)
implicit none
class(psb_i_vect_oacc), intent(inout) :: x
if (x%is_dev()) then
- call acc_update_self(x%v)
+ if (psb_size(x%v)>0) call acc_update_self(x%v)
end if
if (x%is_host()) then
- call acc_update_device(x%v)
+ if (.not.acc_is_present(x%v)) call i_oacc_sync_dev_space(x)
+ if (psb_size(x%v)>0) call acc_update_device(x%v)
end if
call x%set_sync()
end subroutine i_oacc_sync
@@ -541,6 +553,8 @@ contains
type(psb_i_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
info = 0
+!!$ write(0,*) 'oacc final_vect_free'
+ call x%free_buffer(info)
if (allocated(x%v)) then
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
deallocate(x%v, stat=info)
@@ -553,8 +567,9 @@ contains
class(psb_i_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
+!!$ write(0,*) 'oacc vect_free'
+ call x%free_buffer(info)
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
- if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
call x%psb_i_base_vect_type%free(info)
end subroutine i_oacc_vect_free
@@ -564,8 +579,10 @@ contains
integer(psb_ipk_), intent(out) :: info
info = 0
- if (psb_oacc_get_maybe_free_buffer())&
- & call x%free_buffer(info)
+ if (psb_oacc_get_maybe_free_buffer()) then
+ !write(0,*) 'psb_oacc_get_maybe_free_buffer() ',psb_oacc_get_maybe_free_buffer()
+ call x%free_buffer(info)
+ end if
end subroutine i_oacc_vect_maybe_free_buffer
@@ -573,7 +590,7 @@ contains
implicit none
class(psb_i_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
-
+! write(0,*) 'oacc free_buffer'
info = 0
if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
call x%psb_i_base_vect_type%free_buffer(info)
@@ -585,7 +602,6 @@ contains
class(psb_i_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: res
- if (x%is_dev()) call x%sync()
res = size(x%v)
end function i_oacc_get_size
diff --git a/openacc/psb_l_oacc_vect_mod.F90 b/openacc/psb_l_oacc_vect_mod.F90
index d35e9141..60cdee35 100644
--- a/openacc/psb_l_oacc_vect_mod.F90
+++ b/openacc/psb_l_oacc_vect_mod.F90
@@ -72,6 +72,7 @@ contains
integer(psb_ipk_) :: info, k
logical :: acc_done
if (.not.allocated(y%combuf)) then
+ write(0,*) 'allocation error for y%combuf '
call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
return
end if
@@ -99,8 +100,8 @@ contains
integer(psb_ipk_) :: n, idx(:)
integer(psb_lpk_) :: beta,x(:), y(:)
integer(psb_ipk_) :: k
- !$acc update device(x(1:n)) async
- !$acc parallel loop
+ !$acc update device(x(1:n))
+ !$acc parallel loop present(x,y)
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
end do
@@ -144,8 +145,8 @@ contains
integer(psb_ipk_) :: n, idx(:)
integer(psb_lpk_) :: beta, x(:), y(:)
integer(psb_ipk_) :: k
- !$acc update device(x(1:n)) async
- !$acc parallel loop
+ !$acc update device(x(1:n))
+ !$acc parallel loop present(x,y)
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
end do
@@ -187,6 +188,7 @@ contains
acc_done = .false.
if (.not.allocated(x%combuf)) then
+ write(0,*) 'oacc allocation error combuf gthzbuf '
call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
return
end if
@@ -212,13 +214,13 @@ contains
integer(psb_ipk_) :: n, idx(:)
integer(psb_lpk_) :: x(:), y(:)
integer(psb_ipk_) :: k
-
- !$acc parallel loop present(y)
+ !
+ !$acc parallel loop present(x,y)
do k = 1, n
y(k) = x(idx(k))
end do
!$acc end parallel loop
- !$acc update self(y(1:n)) async
+ !$acc update self(y(1:n))
end subroutine inner_gth
end subroutine l_oacc_gthzbuf
@@ -256,13 +258,13 @@ contains
integer(psb_ipk_) :: n, idx(:)
integer(psb_lpk_) :: x(:), y(:)
integer(psb_ipk_) :: k
-
- !$acc parallel loop present(y)
+ !
+ !$acc parallel loop present(x,y)
do k = 1, n
y(k) = x(idx(k))
end do
!$acc end parallel loop
- !$acc update self(y(1:n)) async
+ !$acc update self(y(1:n))
end subroutine inner_gth
end subroutine l_oacc_gthzv_x
@@ -289,7 +291,7 @@ contains
if (vval%is_host()) call vval%sync()
if (virl%is_host()) call virl%sync()
if (x%is_host()) call x%sync()
- !$acc parallel loop
+ !$acc parallel loop present(x%v,virl%v,vval%v)
do i = 1, n
x%v(virl%v(i)) = vval%v(i)
end do
@@ -413,7 +415,7 @@ contains
if (present(first)) first_ = max(1, first)
if (present(last)) last_ = min(last, last_)
- !$acc parallel loop
+ !$acc parallel loop present(x%v)
do i = first_, last_
x%v(i) = val
end do
@@ -451,26 +453,36 @@ contains
class(psb_l_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n
integer(psb_ipk_), intent(out) :: info
- if (n /= psb_size(x%combuf)) then
+
+ !write(0,*) 'oacc new_buffer',n,psb_size(x%combuf)
+ if (n > psb_size(x%combuf)) then
+ !write(0,*) 'oacc new_buffer: reallocating '
+ if (allocated(x%combuf)) then
+ !if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
+ !$acc exit data delete(x%combuf)
+ end if
call x%psb_l_base_vect_type%new_buffer(n,info)
!$acc enter data copyin(x%combuf)
+ ! call acc_copyin(x%combuf)
end if
end subroutine l_oacc_new_buffer
subroutine l_oacc_sync_dev_space(x)
implicit none
class(psb_l_vect_oacc), intent(inout) :: x
- if (allocated(x%v)) call acc_create(x%v)
+!!$ write(0,*) 'oacc sync_dev_space'
+ if (psb_size(x%v)>0) call acc_copyin(x%v)
end subroutine l_oacc_sync_dev_space
subroutine l_oacc_sync(x)
implicit none
class(psb_l_vect_oacc), intent(inout) :: x
if (x%is_dev()) then
- call acc_update_self(x%v)
+ if (psb_size(x%v)>0) call acc_update_self(x%v)
end if
if (x%is_host()) then
- call acc_update_device(x%v)
+ if (.not.acc_is_present(x%v)) call l_oacc_sync_dev_space(x)
+ if (psb_size(x%v)>0) call acc_update_device(x%v)
end if
call x%set_sync()
end subroutine l_oacc_sync
@@ -543,6 +555,8 @@ contains
type(psb_l_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
info = 0
+!!$ write(0,*) 'oacc final_vect_free'
+ call x%free_buffer(info)
if (allocated(x%v)) then
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
deallocate(x%v, stat=info)
@@ -555,8 +569,9 @@ contains
class(psb_l_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
+!!$ write(0,*) 'oacc vect_free'
+ call x%free_buffer(info)
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
- if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
call x%psb_l_base_vect_type%free(info)
end subroutine l_oacc_vect_free
@@ -566,8 +581,10 @@ contains
integer(psb_ipk_), intent(out) :: info
info = 0
- if (psb_oacc_get_maybe_free_buffer())&
- & call x%free_buffer(info)
+ if (psb_oacc_get_maybe_free_buffer()) then
+ !write(0,*) 'psb_oacc_get_maybe_free_buffer() ',psb_oacc_get_maybe_free_buffer()
+ call x%free_buffer(info)
+ end if
end subroutine l_oacc_vect_maybe_free_buffer
@@ -575,7 +592,7 @@ contains
implicit none
class(psb_l_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
-
+! write(0,*) 'oacc free_buffer'
info = 0
if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
call x%psb_l_base_vect_type%free_buffer(info)
@@ -587,7 +604,6 @@ contains
class(psb_l_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: res
- if (x%is_dev()) call x%sync()
res = size(x%v)
end function l_oacc_get_size
diff --git a/openacc/psb_s_oacc_csr_mat_mod.F90 b/openacc/psb_s_oacc_csr_mat_mod.F90
index 5eaf80f7..d66dca3b 100644
--- a/openacc/psb_s_oacc_csr_mat_mod.F90
+++ b/openacc/psb_s_oacc_csr_mat_mod.F90
@@ -257,9 +257,9 @@ contains
! but with size 0, then CREATE,UPDATE and DELETE
! will fail
!
- if (psb_size(a%val)>0) call acc_create(a%val)
- if (psb_size(a%ja)>0) call acc_create(a%ja)
- if (psb_size(a%irp)>0) call acc_create(a%irp)
+ if (psb_size(a%val)>0) call acc_copyin(a%val)
+ if (psb_size(a%ja)>0) call acc_copyin(a%ja)
+ if (psb_size(a%irp)>0) call acc_copyin(a%irp)
end subroutine s_oacc_csr_sync_dev_space
subroutine s_oacc_csr_sync(a)
diff --git a/openacc/psb_s_oacc_ell_mat_mod.F90 b/openacc/psb_s_oacc_ell_mat_mod.F90
index 56775879..600a08a7 100644
--- a/openacc/psb_s_oacc_ell_mat_mod.F90
+++ b/openacc/psb_s_oacc_ell_mat_mod.F90
@@ -186,10 +186,10 @@ contains
! but with size 0, then CREATE,UPDATE and DELETE
! will fail
!
- if (psb_size(a%val)>0) call acc_create(a%val)
- if (psb_size(a%ja)>0) call acc_create(a%ja)
- if (psb_size(a%irn)>0) call acc_create(a%irn)
- if (psb_size(a%idiag)>0) call acc_create(a%idiag)
+ if (psb_size(a%val)>0) call acc_copyin(a%val)
+ if (psb_size(a%ja)>0) call acc_copyin(a%ja)
+ if (psb_size(a%irn)>0) call acc_copyin(a%irn)
+ if (psb_size(a%idiag)>0) call acc_copyin(a%idiag)
end subroutine s_oacc_ell_sync_dev_space
function s_oacc_ell_is_host(a) result(res)
diff --git a/openacc/psb_s_oacc_hll_mat_mod.F90 b/openacc/psb_s_oacc_hll_mat_mod.F90
index 997433a1..33033248 100644
--- a/openacc/psb_s_oacc_hll_mat_mod.F90
+++ b/openacc/psb_s_oacc_hll_mat_mod.F90
@@ -240,11 +240,11 @@ contains
! but with size 0, then CREATE,UPDATE and DELETE
! will fail
!
- if (psb_size(a%val)>0) call acc_create(a%val)
- if (psb_size(a%ja)>0) call acc_create(a%ja)
- if (psb_size(a%irn)>0) call acc_create(a%irn)
- if (psb_size(a%idiag)>0) call acc_create(a%idiag)
- if (psb_size(a%hkoffs)>0) call acc_create(a%hkoffs)
+ if (psb_size(a%val)>0) call acc_copyin(a%val)
+ if (psb_size(a%ja)>0) call acc_copyin(a%ja)
+ if (psb_size(a%irn)>0) call acc_copyin(a%irn)
+ if (psb_size(a%idiag)>0) call acc_copyin(a%idiag)
+ if (psb_size(a%hkoffs)>0) call acc_copyin(a%hkoffs)
end subroutine s_oacc_hll_sync_dev_space
diff --git a/openacc/psb_s_oacc_vect_mod.F90 b/openacc/psb_s_oacc_vect_mod.F90
index 87eeccea..70c9dd49 100644
--- a/openacc/psb_s_oacc_vect_mod.F90
+++ b/openacc/psb_s_oacc_vect_mod.F90
@@ -113,7 +113,7 @@ contains
real(psb_spk_), intent(inout) :: x(:)
integer(psb_ipk_) :: n
integer(psb_ipk_) :: i
- !$acc parallel loop
+ !$acc parallel loop present(x)
do i = 1, n
x(i) = abs(x(i))
end do
@@ -144,7 +144,7 @@ contains
real(psb_spk_), intent(inout) :: x(:),y(:)
integer(psb_ipk_) :: n
integer(psb_ipk_) :: i
- !$acc parallel loop
+ !$acc parallel loop present(x,y)
do i = 1, n
y(i) = abs(x(i))
end do
@@ -164,7 +164,7 @@ contains
real(psb_spk_), intent(in) :: alpha
real(psb_spk_), intent(inout) :: x(:)
integer(psb_ipk_) :: i
- !$acc parallel loop
+ !$acc parallel loop present(x)
do i = 1, size(x)
x(i) = alpha * x(i)
end do
@@ -189,7 +189,7 @@ contains
real(psb_spk_) :: sum, mx
integer(psb_ipk_) :: i
mx = szero
- !$acc parallel loop reduction(max:mx)
+ !$acc parallel loop reduction(max:mx) present(x)
do i = 1, n
if (abs(x(i)) > mx) mx = abs(x(i))
end do
@@ -197,7 +197,7 @@ contains
res = mx
else
sum = szero
- !$acc parallel loop reduction(+:sum)
+ !$acc parallel loop reduction(+:sum) present(x)
do i = 1, n
sum = sum + abs(x(i)/mx)**2
end do
@@ -223,7 +223,7 @@ contains
real(psb_spk_) :: max_val
integer(psb_ipk_) :: i
max_val = szero
- !$acc parallel loop reduction(max:max_val)
+ !$acc parallel loop reduction(max:max_val) present(x)
do i = 1, n
if (abs(x(i)) > max_val) max_val = abs(x(i))
end do
@@ -248,7 +248,7 @@ contains
real(psb_spk_) :: res
integer(psb_ipk_) :: i
res = szero
- !$acc parallel loop reduction(+:res)
+ !$acc parallel loop reduction(+:res) present(x)
do i = 1, n
res = res + abs(x(i))
end do
@@ -265,7 +265,7 @@ contains
info = 0
if (y%is_dev()) call y%sync()
- !$acc parallel loop
+ !$acc parallel loop present(x,y)
do i = 1, size(x)
y%v(i) = y%v(i) * x(i)
end do
@@ -283,7 +283,7 @@ contains
info = 0
if (z%is_dev()) call z%sync()
- !$acc parallel loop
+ !$acc parallel loop present(x,y,z%v)
do i = 1, size(x)
z%v(i) = alpha * x(i) * y(i) + beta * z%v(i)
end do
@@ -327,7 +327,7 @@ contains
real(psb_spk_), intent(inout) :: y(:)
real(psb_spk_), intent(in) :: alpha, beta
integer(psb_ipk_), intent(out) :: info
- !$acc parallel
+ !$acc parallel present(x,y)
!$acc loop
do i = 1, m
y(i) = alpha * x(i) + beta * y(i)
@@ -384,7 +384,7 @@ contains
if ((nx < m) .or. (ny < m) .or. (nz < m)) then
info = psb_err_internal_error_
else
- !$acc parallel loop
+ !$acc parallel loop present(xx%v,yy%v,zz%v)
do i = 1, m
yy%v(i) = alpha * xx%v(i) + beta * yy%v(i)
zz%v(i) = gamma * yy%v(i) + delta * zz%v(i)
@@ -416,6 +416,7 @@ contains
integer(psb_ipk_) :: info, k
logical :: acc_done
if (.not.allocated(y%combuf)) then
+ write(0,*) 'allocation error for y%combuf '
call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
return
end if
@@ -443,8 +444,8 @@ contains
integer(psb_ipk_) :: n, idx(:)
real(psb_spk_) :: beta,x(:), y(:)
integer(psb_ipk_) :: k
- !$acc update device(x(1:n)) async
- !$acc parallel loop
+ !$acc update device(x(1:n))
+ !$acc parallel loop present(x,y)
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
end do
@@ -488,8 +489,8 @@ contains
integer(psb_ipk_) :: n, idx(:)
real(psb_spk_) :: beta, x(:), y(:)
integer(psb_ipk_) :: k
- !$acc update device(x(1:n)) async
- !$acc parallel loop
+ !$acc update device(x(1:n))
+ !$acc parallel loop present(x,y)
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
end do
@@ -531,6 +532,7 @@ contains
acc_done = .false.
if (.not.allocated(x%combuf)) then
+ write(0,*) 'oacc allocation error combuf gthzbuf '
call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
return
end if
@@ -556,13 +558,13 @@ contains
integer(psb_ipk_) :: n, idx(:)
real(psb_spk_) :: x(:), y(:)
integer(psb_ipk_) :: k
-
- !$acc parallel loop present(y)
+ !
+ !$acc parallel loop present(x,y)
do k = 1, n
y(k) = x(idx(k))
end do
!$acc end parallel loop
- !$acc update self(y(1:n)) async
+ !$acc update self(y(1:n))
end subroutine inner_gth
end subroutine s_oacc_gthzbuf
@@ -600,13 +602,13 @@ contains
integer(psb_ipk_) :: n, idx(:)
real(psb_spk_) :: x(:), y(:)
integer(psb_ipk_) :: k
-
- !$acc parallel loop present(y)
+ !
+ !$acc parallel loop present(x,y)
do k = 1, n
y(k) = x(idx(k))
end do
!$acc end parallel loop
- !$acc update self(y(1:n)) async
+ !$acc update self(y(1:n))
end subroutine inner_gth
end subroutine s_oacc_gthzv_x
@@ -633,7 +635,7 @@ contains
if (vval%is_host()) call vval%sync()
if (virl%is_host()) call virl%sync()
if (x%is_host()) call x%sync()
- !$acc parallel loop
+ !$acc parallel loop present(x%v,virl%v,vval%v)
do i = 1, n
x%v(virl%v(i)) = vval%v(i)
end do
@@ -757,7 +759,7 @@ contains
if (present(first)) first_ = max(1, first)
if (present(last)) last_ = min(last, last_)
- !$acc parallel loop
+ !$acc parallel loop present(x%v)
do i = first_, last_
x%v(i) = val
end do
@@ -849,26 +851,36 @@ contains
class(psb_s_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n
integer(psb_ipk_), intent(out) :: info
- if (n /= psb_size(x%combuf)) then
+
+ !write(0,*) 'oacc new_buffer',n,psb_size(x%combuf)
+ if (n > psb_size(x%combuf)) then
+ !write(0,*) 'oacc new_buffer: reallocating '
+ if (allocated(x%combuf)) then
+ !if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
+ !$acc exit data delete(x%combuf)
+ end if
call x%psb_s_base_vect_type%new_buffer(n,info)
!$acc enter data copyin(x%combuf)
+ ! call acc_copyin(x%combuf)
end if
end subroutine s_oacc_new_buffer
subroutine s_oacc_sync_dev_space(x)
implicit none
class(psb_s_vect_oacc), intent(inout) :: x
- if (allocated(x%v)) call acc_create(x%v)
+!!$ write(0,*) 'oacc sync_dev_space'
+ if (psb_size(x%v)>0) call acc_copyin(x%v)
end subroutine s_oacc_sync_dev_space
subroutine s_oacc_sync(x)
implicit none
class(psb_s_vect_oacc), intent(inout) :: x
if (x%is_dev()) then
- call acc_update_self(x%v)
+ if (psb_size(x%v)>0) call acc_update_self(x%v)
end if
if (x%is_host()) then
- call acc_update_device(x%v)
+ if (.not.acc_is_present(x%v)) call s_oacc_sync_dev_space(x)
+ if (psb_size(x%v)>0) call acc_update_device(x%v)
end if
call x%set_sync()
end subroutine s_oacc_sync
@@ -941,6 +953,8 @@ contains
type(psb_s_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
info = 0
+!!$ write(0,*) 'oacc final_vect_free'
+ call x%free_buffer(info)
if (allocated(x%v)) then
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
deallocate(x%v, stat=info)
@@ -953,8 +967,9 @@ contains
class(psb_s_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
+!!$ write(0,*) 'oacc vect_free'
+ call x%free_buffer(info)
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
- if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
call x%psb_s_base_vect_type%free(info)
end subroutine s_oacc_vect_free
@@ -964,8 +979,10 @@ contains
integer(psb_ipk_), intent(out) :: info
info = 0
- if (psb_oacc_get_maybe_free_buffer())&
- & call x%free_buffer(info)
+ if (psb_oacc_get_maybe_free_buffer()) then
+ !write(0,*) 'psb_oacc_get_maybe_free_buffer() ',psb_oacc_get_maybe_free_buffer()
+ call x%free_buffer(info)
+ end if
end subroutine s_oacc_vect_maybe_free_buffer
@@ -973,7 +990,7 @@ contains
implicit none
class(psb_s_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
-
+! write(0,*) 'oacc free_buffer'
info = 0
if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
call x%psb_s_base_vect_type%free_buffer(info)
@@ -985,7 +1002,6 @@ contains
class(psb_s_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: res
- if (x%is_dev()) call x%sync()
res = size(x%v)
end function s_oacc_get_size
diff --git a/openacc/psb_z_oacc_csr_mat_mod.F90 b/openacc/psb_z_oacc_csr_mat_mod.F90
index ed92373d..3b66787a 100644
--- a/openacc/psb_z_oacc_csr_mat_mod.F90
+++ b/openacc/psb_z_oacc_csr_mat_mod.F90
@@ -257,9 +257,9 @@ contains
! but with size 0, then CREATE,UPDATE and DELETE
! will fail
!
- if (psb_size(a%val)>0) call acc_create(a%val)
- if (psb_size(a%ja)>0) call acc_create(a%ja)
- if (psb_size(a%irp)>0) call acc_create(a%irp)
+ if (psb_size(a%val)>0) call acc_copyin(a%val)
+ if (psb_size(a%ja)>0) call acc_copyin(a%ja)
+ if (psb_size(a%irp)>0) call acc_copyin(a%irp)
end subroutine z_oacc_csr_sync_dev_space
subroutine z_oacc_csr_sync(a)
diff --git a/openacc/psb_z_oacc_ell_mat_mod.F90 b/openacc/psb_z_oacc_ell_mat_mod.F90
index d494922f..abfb11e3 100644
--- a/openacc/psb_z_oacc_ell_mat_mod.F90
+++ b/openacc/psb_z_oacc_ell_mat_mod.F90
@@ -186,10 +186,10 @@ contains
! but with size 0, then CREATE,UPDATE and DELETE
! will fail
!
- if (psb_size(a%val)>0) call acc_create(a%val)
- if (psb_size(a%ja)>0) call acc_create(a%ja)
- if (psb_size(a%irn)>0) call acc_create(a%irn)
- if (psb_size(a%idiag)>0) call acc_create(a%idiag)
+ if (psb_size(a%val)>0) call acc_copyin(a%val)
+ if (psb_size(a%ja)>0) call acc_copyin(a%ja)
+ if (psb_size(a%irn)>0) call acc_copyin(a%irn)
+ if (psb_size(a%idiag)>0) call acc_copyin(a%idiag)
end subroutine z_oacc_ell_sync_dev_space
function z_oacc_ell_is_host(a) result(res)
diff --git a/openacc/psb_z_oacc_hll_mat_mod.F90 b/openacc/psb_z_oacc_hll_mat_mod.F90
index 07739348..4c9f1b11 100644
--- a/openacc/psb_z_oacc_hll_mat_mod.F90
+++ b/openacc/psb_z_oacc_hll_mat_mod.F90
@@ -240,11 +240,11 @@ contains
! but with size 0, then CREATE,UPDATE and DELETE
! will fail
!
- if (psb_size(a%val)>0) call acc_create(a%val)
- if (psb_size(a%ja)>0) call acc_create(a%ja)
- if (psb_size(a%irn)>0) call acc_create(a%irn)
- if (psb_size(a%idiag)>0) call acc_create(a%idiag)
- if (psb_size(a%hkoffs)>0) call acc_create(a%hkoffs)
+ if (psb_size(a%val)>0) call acc_copyin(a%val)
+ if (psb_size(a%ja)>0) call acc_copyin(a%ja)
+ if (psb_size(a%irn)>0) call acc_copyin(a%irn)
+ if (psb_size(a%idiag)>0) call acc_copyin(a%idiag)
+ if (psb_size(a%hkoffs)>0) call acc_copyin(a%hkoffs)
end subroutine z_oacc_hll_sync_dev_space
diff --git a/openacc/psb_z_oacc_vect_mod.F90 b/openacc/psb_z_oacc_vect_mod.F90
index 0fe1adaa..0bc10283 100644
--- a/openacc/psb_z_oacc_vect_mod.F90
+++ b/openacc/psb_z_oacc_vect_mod.F90
@@ -113,7 +113,7 @@ contains
complex(psb_dpk_), intent(inout) :: x(:)
integer(psb_ipk_) :: n
integer(psb_ipk_) :: i
- !$acc parallel loop
+ !$acc parallel loop present(x)
do i = 1, n
x(i) = abs(x(i))
end do
@@ -144,7 +144,7 @@ contains
complex(psb_dpk_), intent(inout) :: x(:),y(:)
integer(psb_ipk_) :: n
integer(psb_ipk_) :: i
- !$acc parallel loop
+ !$acc parallel loop present(x,y)
do i = 1, n
y(i) = abs(x(i))
end do
@@ -164,7 +164,7 @@ contains
complex(psb_dpk_), intent(in) :: alpha
complex(psb_dpk_), intent(inout) :: x(:)
integer(psb_ipk_) :: i
- !$acc parallel loop
+ !$acc parallel loop present(x)
do i = 1, size(x)
x(i) = alpha * x(i)
end do
@@ -189,7 +189,7 @@ contains
real(psb_dpk_) :: sum, mx
integer(psb_ipk_) :: i
mx = dzero
- !$acc parallel loop reduction(max:mx)
+ !$acc parallel loop reduction(max:mx) present(x)
do i = 1, n
if (abs(x(i)) > mx) mx = abs(x(i))
end do
@@ -197,7 +197,7 @@ contains
res = mx
else
sum = dzero
- !$acc parallel loop reduction(+:sum)
+ !$acc parallel loop reduction(+:sum) present(x)
do i = 1, n
sum = sum + abs(x(i)/mx)**2
end do
@@ -223,7 +223,7 @@ contains
real(psb_dpk_) :: max_val
integer(psb_ipk_) :: i
max_val = dzero
- !$acc parallel loop reduction(max:max_val)
+ !$acc parallel loop reduction(max:max_val) present(x)
do i = 1, n
if (abs(x(i)) > max_val) max_val = abs(x(i))
end do
@@ -248,7 +248,7 @@ contains
real(psb_dpk_) :: res
integer(psb_ipk_) :: i
res = dzero
- !$acc parallel loop reduction(+:res)
+ !$acc parallel loop reduction(+:res) present(x)
do i = 1, n
res = res + abs(x(i))
end do
@@ -265,7 +265,7 @@ contains
info = 0
if (y%is_dev()) call y%sync()
- !$acc parallel loop
+ !$acc parallel loop present(x,y)
do i = 1, size(x)
y%v(i) = y%v(i) * x(i)
end do
@@ -283,7 +283,7 @@ contains
info = 0
if (z%is_dev()) call z%sync()
- !$acc parallel loop
+ !$acc parallel loop present(x,y,z%v)
do i = 1, size(x)
z%v(i) = alpha * x(i) * y(i) + beta * z%v(i)
end do
@@ -327,7 +327,7 @@ contains
complex(psb_dpk_), intent(inout) :: y(:)
complex(psb_dpk_), intent(in) :: alpha, beta
integer(psb_ipk_), intent(out) :: info
- !$acc parallel
+ !$acc parallel present(x,y)
!$acc loop
do i = 1, m
y(i) = alpha * x(i) + beta * y(i)
@@ -384,7 +384,7 @@ contains
if ((nx < m) .or. (ny < m) .or. (nz < m)) then
info = psb_err_internal_error_
else
- !$acc parallel loop
+ !$acc parallel loop present(xx%v,yy%v,zz%v)
do i = 1, m
yy%v(i) = alpha * xx%v(i) + beta * yy%v(i)
zz%v(i) = gamma * yy%v(i) + delta * zz%v(i)
@@ -416,6 +416,7 @@ contains
integer(psb_ipk_) :: info, k
logical :: acc_done
if (.not.allocated(y%combuf)) then
+ write(0,*) 'allocation error for y%combuf '
call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
return
end if
@@ -443,8 +444,8 @@ contains
integer(psb_ipk_) :: n, idx(:)
complex(psb_dpk_) :: beta,x(:), y(:)
integer(psb_ipk_) :: k
- !$acc update device(x(1:n)) async
- !$acc parallel loop
+ !$acc update device(x(1:n))
+ !$acc parallel loop present(x,y)
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
end do
@@ -488,8 +489,8 @@ contains
integer(psb_ipk_) :: n, idx(:)
complex(psb_dpk_) :: beta, x(:), y(:)
integer(psb_ipk_) :: k
- !$acc update device(x(1:n)) async
- !$acc parallel loop
+ !$acc update device(x(1:n))
+ !$acc parallel loop present(x,y)
do k = 1, n
y(idx(k)) = x(k) + beta *y(idx(k))
end do
@@ -531,6 +532,7 @@ contains
acc_done = .false.
if (.not.allocated(x%combuf)) then
+ write(0,*) 'oacc allocation error combuf gthzbuf '
call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
return
end if
@@ -556,13 +558,13 @@ contains
integer(psb_ipk_) :: n, idx(:)
complex(psb_dpk_) :: x(:), y(:)
integer(psb_ipk_) :: k
-
- !$acc parallel loop present(y)
+ !
+ !$acc parallel loop present(x,y)
do k = 1, n
y(k) = x(idx(k))
end do
!$acc end parallel loop
- !$acc update self(y(1:n)) async
+ !$acc update self(y(1:n))
end subroutine inner_gth
end subroutine z_oacc_gthzbuf
@@ -600,13 +602,13 @@ contains
integer(psb_ipk_) :: n, idx(:)
complex(psb_dpk_) :: x(:), y(:)
integer(psb_ipk_) :: k
-
- !$acc parallel loop present(y)
+ !
+ !$acc parallel loop present(x,y)
do k = 1, n
y(k) = x(idx(k))
end do
!$acc end parallel loop
- !$acc update self(y(1:n)) async
+ !$acc update self(y(1:n))
end subroutine inner_gth
end subroutine z_oacc_gthzv_x
@@ -633,7 +635,7 @@ contains
if (vval%is_host()) call vval%sync()
if (virl%is_host()) call virl%sync()
if (x%is_host()) call x%sync()
- !$acc parallel loop
+ !$acc parallel loop present(x%v,virl%v,vval%v)
do i = 1, n
x%v(virl%v(i)) = vval%v(i)
end do
@@ -757,7 +759,7 @@ contains
if (present(first)) first_ = max(1, first)
if (present(last)) last_ = min(last, last_)
- !$acc parallel loop
+ !$acc parallel loop present(x%v)
do i = first_, last_
x%v(i) = val
end do
@@ -849,26 +851,36 @@ contains
class(psb_z_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(in) :: n
integer(psb_ipk_), intent(out) :: info
- if (n /= psb_size(x%combuf)) then
+
+ !write(0,*) 'oacc new_buffer',n,psb_size(x%combuf)
+ if (n > psb_size(x%combuf)) then
+ !write(0,*) 'oacc new_buffer: reallocating '
+ if (allocated(x%combuf)) then
+ !if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
+ !$acc exit data delete(x%combuf)
+ end if
call x%psb_z_base_vect_type%new_buffer(n,info)
!$acc enter data copyin(x%combuf)
+ ! call acc_copyin(x%combuf)
end if
end subroutine z_oacc_new_buffer
subroutine z_oacc_sync_dev_space(x)
implicit none
class(psb_z_vect_oacc), intent(inout) :: x
- if (allocated(x%v)) call acc_create(x%v)
+!!$ write(0,*) 'oacc sync_dev_space'
+ if (psb_size(x%v)>0) call acc_copyin(x%v)
end subroutine z_oacc_sync_dev_space
subroutine z_oacc_sync(x)
implicit none
class(psb_z_vect_oacc), intent(inout) :: x
if (x%is_dev()) then
- call acc_update_self(x%v)
+ if (psb_size(x%v)>0) call acc_update_self(x%v)
end if
if (x%is_host()) then
- call acc_update_device(x%v)
+ if (.not.acc_is_present(x%v)) call z_oacc_sync_dev_space(x)
+ if (psb_size(x%v)>0) call acc_update_device(x%v)
end if
call x%set_sync()
end subroutine z_oacc_sync
@@ -941,6 +953,8 @@ contains
type(psb_z_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: info
info = 0
+!!$ write(0,*) 'oacc final_vect_free'
+ call x%free_buffer(info)
if (allocated(x%v)) then
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
deallocate(x%v, stat=info)
@@ -953,8 +967,9 @@ contains
class(psb_z_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
info = 0
+!!$ write(0,*) 'oacc vect_free'
+ call x%free_buffer(info)
if (acc_is_present(x%v)) call acc_delete_finalize(x%v)
- if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
call x%psb_z_base_vect_type%free(info)
end subroutine z_oacc_vect_free
@@ -964,8 +979,10 @@ contains
integer(psb_ipk_), intent(out) :: info
info = 0
- if (psb_oacc_get_maybe_free_buffer())&
- & call x%free_buffer(info)
+ if (psb_oacc_get_maybe_free_buffer()) then
+ !write(0,*) 'psb_oacc_get_maybe_free_buffer() ',psb_oacc_get_maybe_free_buffer()
+ call x%free_buffer(info)
+ end if
end subroutine z_oacc_vect_maybe_free_buffer
@@ -973,7 +990,7 @@ contains
implicit none
class(psb_z_vect_oacc), intent(inout) :: x
integer(psb_ipk_), intent(out) :: info
-
+! write(0,*) 'oacc free_buffer'
info = 0
if (acc_is_present(x%combuf)) call acc_delete_finalize(x%combuf)
call x%psb_z_base_vect_type%free_buffer(info)
@@ -985,7 +1002,6 @@ contains
class(psb_z_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: res
- if (x%is_dev()) call x%sync()
res = size(x%v)
end function z_oacc_get_size
From 5903c0b272dc1c40d10ada8ee50ef6d94b1b1183 Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Tue, 8 Oct 2024 17:41:42 +0200
Subject: [PATCH 52/86] Fix DOT in OpenACC
---
openacc/psb_c_oacc_vect_mod.F90 | 12 ++++--------
openacc/psb_d_oacc_vect_mod.F90 | 8 ++------
openacc/psb_s_oacc_vect_mod.F90 | 12 ++++--------
openacc/psb_z_oacc_vect_mod.F90 | 12 ++++--------
4 files changed, 14 insertions(+), 30 deletions(-)
diff --git a/openacc/psb_c_oacc_vect_mod.F90 b/openacc/psb_c_oacc_vect_mod.F90
index 40437184..067c571b 100644
--- a/openacc/psb_c_oacc_vect_mod.F90
+++ b/openacc/psb_c_oacc_vect_mod.F90
@@ -798,22 +798,18 @@ contains
class(psb_c_base_vect_type), intent(inout) :: y
integer(psb_ipk_), intent(in) :: n
complex(psb_spk_) :: res
- complex(psb_spk_), external :: ddot
integer(psb_ipk_) :: info
res = czero
!!$ write(0,*) 'oacc_dot_v'
select type(yy => y)
- type is (psb_c_base_vect_type)
- if (x%is_dev()) call x%sync()
- res = ddot(n, x%v, 1, yy%v, 1)
type is (psb_c_vect_oacc)
if (x%is_host()) call x%sync()
if (yy%is_host()) call yy%sync()
res = c_inner_oacc_dot(n, x%v, yy%v)
class default
- call x%sync()
- res = y%dot(n, x%v)
+ if (x%is_dev()) call x%sync()
+ res = y%dot(n, x%v)
end select
contains
function c_inner_oacc_dot(n, x, y) result(res)
@@ -838,10 +834,10 @@ contains
complex(psb_spk_), intent(in) :: y(:)
integer(psb_ipk_), intent(in) :: n
complex(psb_spk_) :: res
- complex(psb_spk_), external :: ddot
+ complex(psb_spk_), external :: cdot
if (x%is_dev()) call x%sync()
- res = ddot(n, y, 1, x%v, 1)
+ res = cdot(n, y, 1, x%v, 1)
end function c_oacc_dot_a
diff --git a/openacc/psb_d_oacc_vect_mod.F90 b/openacc/psb_d_oacc_vect_mod.F90
index 84441c8a..929066ae 100644
--- a/openacc/psb_d_oacc_vect_mod.F90
+++ b/openacc/psb_d_oacc_vect_mod.F90
@@ -798,22 +798,18 @@ contains
class(psb_d_base_vect_type), intent(inout) :: y
integer(psb_ipk_), intent(in) :: n
real(psb_dpk_) :: res
- real(psb_dpk_), external :: ddot
integer(psb_ipk_) :: info
res = dzero
!!$ write(0,*) 'oacc_dot_v'
select type(yy => y)
- type is (psb_d_base_vect_type)
- if (x%is_dev()) call x%sync()
- res = ddot(n, x%v, 1, yy%v, 1)
type is (psb_d_vect_oacc)
if (x%is_host()) call x%sync()
if (yy%is_host()) call yy%sync()
res = d_inner_oacc_dot(n, x%v, yy%v)
class default
- call x%sync()
- res = y%dot(n, x%v)
+ if (x%is_dev()) call x%sync()
+ res = y%dot(n, x%v)
end select
contains
function d_inner_oacc_dot(n, x, y) result(res)
diff --git a/openacc/psb_s_oacc_vect_mod.F90 b/openacc/psb_s_oacc_vect_mod.F90
index 70c9dd49..9cb42a95 100644
--- a/openacc/psb_s_oacc_vect_mod.F90
+++ b/openacc/psb_s_oacc_vect_mod.F90
@@ -798,22 +798,18 @@ contains
class(psb_s_base_vect_type), intent(inout) :: y
integer(psb_ipk_), intent(in) :: n
real(psb_spk_) :: res
- real(psb_spk_), external :: ddot
integer(psb_ipk_) :: info
res = szero
!!$ write(0,*) 'oacc_dot_v'
select type(yy => y)
- type is (psb_s_base_vect_type)
- if (x%is_dev()) call x%sync()
- res = ddot(n, x%v, 1, yy%v, 1)
type is (psb_s_vect_oacc)
if (x%is_host()) call x%sync()
if (yy%is_host()) call yy%sync()
res = s_inner_oacc_dot(n, x%v, yy%v)
class default
- call x%sync()
- res = y%dot(n, x%v)
+ if (x%is_dev()) call x%sync()
+ res = y%dot(n, x%v)
end select
contains
function s_inner_oacc_dot(n, x, y) result(res)
@@ -838,10 +834,10 @@ contains
real(psb_spk_), intent(in) :: y(:)
integer(psb_ipk_), intent(in) :: n
real(psb_spk_) :: res
- real(psb_spk_), external :: ddot
+ real(psb_spk_), external :: sdot
if (x%is_dev()) call x%sync()
- res = ddot(n, y, 1, x%v, 1)
+ res = sdot(n, y, 1, x%v, 1)
end function s_oacc_dot_a
diff --git a/openacc/psb_z_oacc_vect_mod.F90 b/openacc/psb_z_oacc_vect_mod.F90
index 0bc10283..90ddcf0d 100644
--- a/openacc/psb_z_oacc_vect_mod.F90
+++ b/openacc/psb_z_oacc_vect_mod.F90
@@ -798,22 +798,18 @@ contains
class(psb_z_base_vect_type), intent(inout) :: y
integer(psb_ipk_), intent(in) :: n
complex(psb_dpk_) :: res
- complex(psb_dpk_), external :: ddot
integer(psb_ipk_) :: info
res = zzero
!!$ write(0,*) 'oacc_dot_v'
select type(yy => y)
- type is (psb_z_base_vect_type)
- if (x%is_dev()) call x%sync()
- res = ddot(n, x%v, 1, yy%v, 1)
type is (psb_z_vect_oacc)
if (x%is_host()) call x%sync()
if (yy%is_host()) call yy%sync()
res = z_inner_oacc_dot(n, x%v, yy%v)
class default
- call x%sync()
- res = y%dot(n, x%v)
+ if (x%is_dev()) call x%sync()
+ res = y%dot(n, x%v)
end select
contains
function z_inner_oacc_dot(n, x, y) result(res)
@@ -838,10 +834,10 @@ contains
complex(psb_dpk_), intent(in) :: y(:)
integer(psb_ipk_), intent(in) :: n
complex(psb_dpk_) :: res
- complex(psb_dpk_), external :: ddot
+ complex(psb_dpk_), external :: zdot
if (x%is_dev()) call x%sync()
- res = ddot(n, y, 1, x%v, 1)
+ res = zdot(n, y, 1, x%v, 1)
end function z_oacc_dot_a
From 6972c505424888ba89c86cfcf0a001866066fa47 Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Wed, 9 Oct 2024 10:03:37 +0200
Subject: [PATCH 53/86] Updated readme
---
README.md | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/README.md b/README.md
index e51fe70b..3cacfb93 100644
--- a/README.md
+++ b/README.md
@@ -50,6 +50,15 @@ entities that were previouslty separated:
written by Davide Barbieri and Salvatore Filippone;
see the license file cuda/License-spgpu.md
+OpenACC
+-------------------
+There is a highly experimental version of an OpenACC interface,
+you can access it by speficifying
+--enable-openacc --with-extraopenacc="-foffload=nvptx-none=-march=sm_70"
+where the argument to the extraopenacc option depends on the compiler
+you are using (the example shown here is relevant for the GNU
+compiler).
+
INSTALLING
----------
To compile and run our software you will need the following
@@ -135,6 +144,7 @@ Salvatore Filippone
Contributors (roughly reverse cronological order):
+Theophane Loloum
Dimitri Walther
Andea Di Iorio
Stefano Petrilli
From abdf7fc05afc571e13b0c34c17609a16ce79973f Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Wed, 9 Oct 2024 13:32:29 +0200
Subject: [PATCH 54/86] Fix constructor name for multivector
---
cuda/psb_c_cuda_vect_mod.F90 | 8 ++++----
cuda/psb_d_cuda_vect_mod.F90 | 8 ++++----
cuda/psb_i_cuda_vect_mod.F90 | 8 ++++----
cuda/psb_s_cuda_vect_mod.F90 | 8 ++++----
cuda/psb_z_cuda_vect_mod.F90 | 8 ++++----
5 files changed, 20 insertions(+), 20 deletions(-)
diff --git a/cuda/psb_c_cuda_vect_mod.F90 b/cuda/psb_c_cuda_vect_mod.F90
index 9755b386..752d2bf1 100644
--- a/cuda/psb_c_cuda_vect_mod.F90
+++ b/cuda/psb_c_cuda_vect_mod.F90
@@ -1377,14 +1377,14 @@ module psb_c_cuda_multivect_mod
end type psb_c_multivect_cuda
public :: psb_c_multivect_cuda
- private :: constructor
+ private :: mconstructor
interface psb_c_multivect_cuda
- module procedure constructor
+ module procedure mconstructor
end interface
contains
- function constructor(x) result(this)
+ function mconstructor(x) result(this)
complex(psb_spk_) :: x(:,:)
type(psb_c_multivect_cuda) :: this
integer(psb_ipk_) :: info
@@ -1392,7 +1392,7 @@ contains
this%v = x
call this%asb(size(x,1),size(x,2),info)
- end function constructor
+ end function mconstructor
!!$ subroutine c_cuda_multi_gthzv_x(i,n,idx,x,y)
diff --git a/cuda/psb_d_cuda_vect_mod.F90 b/cuda/psb_d_cuda_vect_mod.F90
index dfa83c60..4e17be02 100644
--- a/cuda/psb_d_cuda_vect_mod.F90
+++ b/cuda/psb_d_cuda_vect_mod.F90
@@ -1377,14 +1377,14 @@ module psb_d_cuda_multivect_mod
end type psb_d_multivect_cuda
public :: psb_d_multivect_cuda
- private :: constructor
+ private :: mconstructor
interface psb_d_multivect_cuda
- module procedure constructor
+ module procedure mconstructor
end interface
contains
- function constructor(x) result(this)
+ function mconstructor(x) result(this)
real(psb_dpk_) :: x(:,:)
type(psb_d_multivect_cuda) :: this
integer(psb_ipk_) :: info
@@ -1392,7 +1392,7 @@ contains
this%v = x
call this%asb(size(x,1),size(x,2),info)
- end function constructor
+ end function mconstructor
!!$ subroutine d_cuda_multi_gthzv_x(i,n,idx,x,y)
diff --git a/cuda/psb_i_cuda_vect_mod.F90 b/cuda/psb_i_cuda_vect_mod.F90
index 4be4679c..eeafe90e 100644
--- a/cuda/psb_i_cuda_vect_mod.F90
+++ b/cuda/psb_i_cuda_vect_mod.F90
@@ -937,14 +937,14 @@ module psb_i_cuda_multivect_mod
end type psb_i_multivect_cuda
public :: psb_i_multivect_cuda
- private :: constructor
+ private :: mconstructor
interface psb_i_multivect_cuda
- module procedure constructor
+ module procedure mconstructor
end interface
contains
- function constructor(x) result(this)
+ function mconstructor(x) result(this)
integer(psb_ipk_) :: x(:,:)
type(psb_i_multivect_cuda) :: this
integer(psb_ipk_) :: info
@@ -952,7 +952,7 @@ contains
this%v = x
call this%asb(size(x,1),size(x,2),info)
- end function constructor
+ end function mconstructor
!!$ subroutine i_cuda_multi_gthzv_x(i,n,idx,x,y)
diff --git a/cuda/psb_s_cuda_vect_mod.F90 b/cuda/psb_s_cuda_vect_mod.F90
index 39a108ab..3006ebd8 100644
--- a/cuda/psb_s_cuda_vect_mod.F90
+++ b/cuda/psb_s_cuda_vect_mod.F90
@@ -1377,14 +1377,14 @@ module psb_s_cuda_multivect_mod
end type psb_s_multivect_cuda
public :: psb_s_multivect_cuda
- private :: constructor
+ private :: mconstructor
interface psb_s_multivect_cuda
- module procedure constructor
+ module procedure mconstructor
end interface
contains
- function constructor(x) result(this)
+ function mconstructor(x) result(this)
real(psb_spk_) :: x(:,:)
type(psb_s_multivect_cuda) :: this
integer(psb_ipk_) :: info
@@ -1392,7 +1392,7 @@ contains
this%v = x
call this%asb(size(x,1),size(x,2),info)
- end function constructor
+ end function mconstructor
!!$ subroutine s_cuda_multi_gthzv_x(i,n,idx,x,y)
diff --git a/cuda/psb_z_cuda_vect_mod.F90 b/cuda/psb_z_cuda_vect_mod.F90
index d4318bea..f4860911 100644
--- a/cuda/psb_z_cuda_vect_mod.F90
+++ b/cuda/psb_z_cuda_vect_mod.F90
@@ -1377,14 +1377,14 @@ module psb_z_cuda_multivect_mod
end type psb_z_multivect_cuda
public :: psb_z_multivect_cuda
- private :: constructor
+ private :: mconstructor
interface psb_z_multivect_cuda
- module procedure constructor
+ module procedure mconstructor
end interface
contains
- function constructor(x) result(this)
+ function mconstructor(x) result(this)
complex(psb_dpk_) :: x(:,:)
type(psb_z_multivect_cuda) :: this
integer(psb_ipk_) :: info
@@ -1392,7 +1392,7 @@ contains
this%v = x
call this%asb(size(x,1),size(x,2),info)
- end function constructor
+ end function mconstructor
!!$ subroutine z_cuda_multi_gthzv_x(i,n,idx,x,y)
From ade79bcc7e399c20c6cd3cdafa3faf64b76e7521 Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Wed, 9 Oct 2024 14:27:17 +0200
Subject: [PATCH 55/86] Fixes for compilation with CUDA
---
cuda/cvectordev.c | 6 +++---
cuda/cvectordev.h | 7 ++++---
cuda/dnsdev.c | 16 ++++++++--------
cuda/dvectordev.h | 1 +
cuda/fcusparse_fct.h | 4 ++--
cuda/ivectordev.h | 1 +
cuda/svectordev.h | 1 +
cuda/vectordev.h | 1 +
cuda/zvectordev.c | 6 +++---
cuda/zvectordev.h | 7 ++++---
10 files changed, 28 insertions(+), 22 deletions(-)
diff --git a/cuda/cvectordev.c b/cuda/cvectordev.c
index 65d41893..b05bca55 100644
--- a/cuda/cvectordev.c
+++ b/cuda/cvectordev.c
@@ -184,7 +184,7 @@ int iscatMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n,
}
-int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA)
+int nrm2MultiVecDeviceFloatComplex(float* y_res, int n, void* devMultiVecA)
{ int i=0;
spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
@@ -194,7 +194,7 @@ int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiV
return(i);
}
-int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA)
+int amaxMultiVecDeviceFloatComplex(float* y_res, int n, void* devMultiVecA)
{ int i=0;
spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
@@ -204,7 +204,7 @@ int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiV
return(i);
}
-int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA)
+int asumMultiVecDeviceFloatComplex(float* y_res, int n, void* devMultiVecA)
{ int i=0;
spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
diff --git a/cuda/cvectordev.h b/cuda/cvectordev.h
index 8c40b95d..423da33e 100644
--- a/cuda/cvectordev.h
+++ b/cuda/cvectordev.h
@@ -37,6 +37,7 @@
#include "vectordev.h"
#include "cuda_runtime.h"
#include "core.h"
+#include "vector.h"
int registerMappedFloatComplex(void *, void **, int, cuFloatComplex);
int writeMultiVecDeviceFloatComplex(void* deviceMultiVec, cuFloatComplex* hostMultiVec);
@@ -63,9 +64,9 @@ int iscatMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n, int fi
int hfirst, void* host_values, int indexBase, cuFloatComplex beta);
int scalMultiVecDeviceFloatComplex(cuFloatComplex alpha, void* devMultiVecA);
-int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA);
-int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA);
-int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA);
+int nrm2MultiVecDeviceFloatComplex(float* y_res, int n, void* devVecA);
+int amaxMultiVecDeviceFloatComplex(float* y_res, int n, void* devVecA);
+int asumMultiVecDeviceFloatComplex(float* y_res, int n, void* devVecA);
int dotMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA, void* devVecB);
int axpbyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void* devVecX, cuFloatComplex beta, void* devVecY);
diff --git a/cuda/dnsdev.c b/cuda/dnsdev.c
index 0a991012..3cf57976 100644
--- a/cuda/dnsdev.c
+++ b/cuda/dnsdev.c
@@ -178,12 +178,12 @@ int spmvDnsDeviceFloatComplex(char transa, int m, int n, int k, float complex *a
/* Note: the M,N,K choices according to TRANS have already been handled in the caller */
if (n == 1) {
status = cublasCgemv(handle, trans, m,k,
- alpha, devMat->cM,devMat->pitch, x->v_,1,
- beta, y->v_,1);
+ (const cuComplex *) alpha, devMat->cM,devMat->pitch, x->v_,1,
+ (const cuComplex *) beta, y->v_,1);
} else {
status = cublasCgemm(handle, trans, CUBLAS_OP_N, m,n,k,
- alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_,
- beta, y->v_,y->pitch_);
+ (const cuComplex *) alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_,
+ (const cuComplex *) beta, y->v_,y->pitch_);
}
if (status == CUBLAS_STATUS_SUCCESS)
@@ -205,12 +205,12 @@ int spmvDnsDeviceDoubleComplex(char transa, int m, int n, int k, double complex
/* Note: the M,N,K choices according to TRANS have already been handled in the caller */
if (n == 1) {
status = cublasZgemv(handle, trans, m,k,
- alpha, devMat->cM,devMat->pitch, x->v_,1,
- beta, y->v_,1);
+ (const cuDoubleComplex *) alpha, devMat->cM,devMat->pitch, x->v_,1,
+ (const cuDoubleComplex *) beta, y->v_,1);
} else {
status = cublasZgemm(handle, trans, CUBLAS_OP_N, m,n,k,
- alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_,
- beta, y->v_,y->pitch_);
+ (const cuDoubleComplex *) alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_,
+ (const cuDoubleComplex *) beta, y->v_,y->pitch_);
}
if (status == CUBLAS_STATUS_SUCCESS)
diff --git a/cuda/dvectordev.h b/cuda/dvectordev.h
index 3834c0d3..0d2d2ab3 100644
--- a/cuda/dvectordev.h
+++ b/cuda/dvectordev.h
@@ -35,6 +35,7 @@
#include "vectordev.h"
#include "cuda_runtime.h"
#include "core.h"
+#include "vector.h"
int registerMappedDouble(void *, void **, int, double);
int writeMultiVecDeviceDouble(void* deviceMultiVec, double* hostMultiVec);
diff --git a/cuda/fcusparse_fct.h b/cuda/fcusparse_fct.h
index 12be21bd..ec7166f5 100644
--- a/cuda/fcusparse_fct.h
+++ b/cuda/fcusparse_fct.h
@@ -439,7 +439,7 @@ int T_CSRGDeviceSetMatFillMode(T_Cmat *Matrix, int type)
T_CSRGDeviceMat *cMat= Matrix->mat;
cusparseFillMode_t mode=type;
- CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->spmvDescr,
+ CHECK_CUSPARSE(cusparseSpMatSetAttribute((*(cMat->spmvDescr)),
CUSPARSE_SPMAT_FILL_MODE,
(const void*) &mode,
sizeof(cusparseFillMode_t)));
@@ -450,7 +450,7 @@ int T_CSRGDeviceSetMatDiagType(T_Cmat *Matrix, int type)
{
T_CSRGDeviceMat *cMat= Matrix->mat;
cusparseDiagType_t cutype=type;
- CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->spmvDescr,
+ CHECK_CUSPARSE(cusparseSpMatSetAttribute((*(cMat->spmvDescr)),
CUSPARSE_SPMAT_DIAG_TYPE,
(const void*) &cutype,
sizeof(cusparseDiagType_t)));
diff --git a/cuda/ivectordev.h b/cuda/ivectordev.h
index 2db54be4..6f3a32a0 100644
--- a/cuda/ivectordev.h
+++ b/cuda/ivectordev.h
@@ -35,6 +35,7 @@
#include "vectordev.h"
#include "cuda_runtime.h"
#include "core.h"
+#include "vector.h"
int registerMappedInt(void *, void **, int, int);
int writeMultiVecDeviceInt(void* deviceMultiVec, int* hostMultiVec);
diff --git a/cuda/svectordev.h b/cuda/svectordev.h
index d5c85f78..887a7755 100644
--- a/cuda/svectordev.h
+++ b/cuda/svectordev.h
@@ -35,6 +35,7 @@
#include "vectordev.h"
#include "cuda_runtime.h"
#include "core.h"
+#include "vector.h"
int registerMappedFloat(void *, void **, int, float);
int writeMultiVecDeviceFloat(void* deviceMultiVec, float* hostMultiVec);
diff --git a/cuda/vectordev.h b/cuda/vectordev.h
index df5fbd82..93cf1189 100644
--- a/cuda/vectordev.h
+++ b/cuda/vectordev.h
@@ -34,6 +34,7 @@
#include "cuda_runtime.h"
//#include "common.h"
//#include "cintrf.h"
+#include "cuda_util.h"
#include
struct MultiVectDevice
diff --git a/cuda/zvectordev.c b/cuda/zvectordev.c
index 3a5b0738..49741582 100644
--- a/cuda/zvectordev.c
+++ b/cuda/zvectordev.c
@@ -183,7 +183,7 @@ int iscatMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n,
}
-int nrm2MultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA)
+int nrm2MultiVecDeviceDoubleComplex(double* y_res, int n, void* devMultiVecA)
{ int i=0;
spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
@@ -192,7 +192,7 @@ int nrm2MultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMult
return(i);
}
-int amaxMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA)
+int amaxMultiVecDeviceDoubleComplex(double* y_res, int n, void* devMultiVecA)
{ int i=0;
spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
@@ -202,7 +202,7 @@ int amaxMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMult
return(i);
}
-int asumMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA)
+int asumMultiVecDeviceDoubleComplex(double* y_res, int n, void* devMultiVecA)
{ int i=0;
spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
diff --git a/cuda/zvectordev.h b/cuda/zvectordev.h
index e15802f0..023c7f13 100644
--- a/cuda/zvectordev.h
+++ b/cuda/zvectordev.h
@@ -37,6 +37,7 @@
#include "vectordev.h"
#include "cuda_runtime.h"
#include "core.h"
+#include "vector.h"
int registerMappedDoubleComplex(void *, void **, int, cuDoubleComplex);
int writeMultiVecDeviceDoubleComplex(void* deviceMultiVec, cuDoubleComplex* hostMultiVec);
@@ -69,9 +70,9 @@ int iscatMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n,
int indexBase, cuDoubleComplex beta);
int scalMultiVecDeviceDoubleComplex(cuDoubleComplex alpha, void* devMultiVecA);
-int nrm2MultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devVecA);
-int amaxMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devVecA);
-int asumMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devVecA);
+int nrm2MultiVecDeviceDoubleComplex(double* y_res, int n, void* devVecA);
+int amaxMultiVecDeviceDoubleComplex(double* y_res, int n, void* devVecA);
+int asumMultiVecDeviceDoubleComplex(double* y_res, int n, void* devVecA);
int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n,
void* devVecA, void* devVecB);
From 5430ba0e222415e60e5857b236fb7f87b83c160c Mon Sep 17 00:00:00 2001
From: Salvatore Filippone
Date: Sat, 2 Nov 2024 12:28:08 +0100
Subject: [PATCH 56/86] Fix multivect constructor in CUDA
---
cuda/psb_c_cuda_vect_mod.F90 | 8 ++++----
cuda/psb_d_cuda_vect_mod.F90 | 8 ++++----
cuda/psb_i_cuda_vect_mod.F90 | 8 ++++----
cuda/psb_s_cuda_vect_mod.F90 | 8 ++++----
cuda/psb_z_cuda_vect_mod.F90 | 8 ++++----
5 files changed, 20 insertions(+), 20 deletions(-)
diff --git a/cuda/psb_c_cuda_vect_mod.F90 b/cuda/psb_c_cuda_vect_mod.F90
index 9755b386..752d2bf1 100644
--- a/cuda/psb_c_cuda_vect_mod.F90
+++ b/cuda/psb_c_cuda_vect_mod.F90
@@ -1377,14 +1377,14 @@ module psb_c_cuda_multivect_mod
end type psb_c_multivect_cuda
public :: psb_c_multivect_cuda
- private :: constructor
+ private :: mconstructor
interface psb_c_multivect_cuda
- module procedure constructor
+ module procedure mconstructor
end interface
contains
- function constructor(x) result(this)
+ function mconstructor(x) result(this)
complex(psb_spk_) :: x(:,:)
type(psb_c_multivect_cuda) :: this
integer(psb_ipk_) :: info
@@ -1392,7 +1392,7 @@ contains
this%v = x
call this%asb(size(x,1),size(x,2),info)
- end function constructor
+ end function mconstructor
!!$ subroutine c_cuda_multi_gthzv_x(i,n,idx,x,y)
diff --git a/cuda/psb_d_cuda_vect_mod.F90 b/cuda/psb_d_cuda_vect_mod.F90
index dfa83c60..4e17be02 100644
--- a/cuda/psb_d_cuda_vect_mod.F90
+++ b/cuda/psb_d_cuda_vect_mod.F90
@@ -1377,14 +1377,14 @@ module psb_d_cuda_multivect_mod
end type psb_d_multivect_cuda
public :: psb_d_multivect_cuda
- private :: constructor
+ private :: mconstructor
interface psb_d_multivect_cuda
- module procedure constructor
+ module procedure mconstructor
end interface
contains
- function constructor(x) result(this)
+ function mconstructor(x) result(this)
real(psb_dpk_) :: x(:,:)
type(psb_d_multivect_cuda) :: this
integer(psb_ipk_) :: info
@@ -1392,7 +1392,7 @@ contains
this%v = x
call this%asb(size(x,1),size(x,2),info)
- end function constructor
+ end function mconstructor
!!$ subroutine d_cuda_multi_gthzv_x(i,n,idx,x,y)
diff --git a/cuda/psb_i_cuda_vect_mod.F90 b/cuda/psb_i_cuda_vect_mod.F90
index 4be4679c..eeafe90e 100644
--- a/cuda/psb_i_cuda_vect_mod.F90
+++ b/cuda/psb_i_cuda_vect_mod.F90
@@ -937,14 +937,14 @@ module psb_i_cuda_multivect_mod
end type psb_i_multivect_cuda
public :: psb_i_multivect_cuda
- private :: constructor
+ private :: mconstructor
interface psb_i_multivect_cuda
- module procedure constructor
+ module procedure mconstructor
end interface
contains
- function constructor(x) result(this)
+ function mconstructor(x) result(this)
integer(psb_ipk_) :: x(:,:)
type(psb_i_multivect_cuda) :: this
integer(psb_ipk_) :: info
@@ -952,7 +952,7 @@ contains
this%v = x
call this%asb(size(x,1),size(x,2),info)
- end function constructor
+ end function mconstructor
!!$ subroutine i_cuda_multi_gthzv_x(i,n,idx,x,y)
diff --git a/cuda/psb_s_cuda_vect_mod.F90 b/cuda/psb_s_cuda_vect_mod.F90
index 39a108ab..3006ebd8 100644
--- a/cuda/psb_s_cuda_vect_mod.F90
+++ b/cuda/psb_s_cuda_vect_mod.F90
@@ -1377,14 +1377,14 @@ module psb_s_cuda_multivect_mod
end type psb_s_multivect_cuda
public :: psb_s_multivect_cuda
- private :: constructor
+ private :: mconstructor
interface psb_s_multivect_cuda
- module procedure constructor
+ module procedure mconstructor
end interface
contains
- function constructor(x) result(this)
+ function mconstructor(x) result(this)
real(psb_spk_) :: x(:,:)
type(psb_s_multivect_cuda) :: this
integer(psb_ipk_) :: info
@@ -1392,7 +1392,7 @@ contains
this%v = x
call this%asb(size(x,1),size(x,2),info)
- end function constructor
+ end function mconstructor
!!$ subroutine s_cuda_multi_gthzv_x(i,n,idx,x,y)
diff --git a/cuda/psb_z_cuda_vect_mod.F90 b/cuda/psb_z_cuda_vect_mod.F90
index d4318bea..f4860911 100644
--- a/cuda/psb_z_cuda_vect_mod.F90
+++ b/cuda/psb_z_cuda_vect_mod.F90
@@ -1377,14 +1377,14 @@ module psb_z_cuda_multivect_mod
end type psb_z_multivect_cuda
public :: psb_z_multivect_cuda
- private :: constructor
+ private :: mconstructor
interface psb_z_multivect_cuda
- module procedure constructor
+ module procedure mconstructor
end interface
contains
- function constructor(x) result(this)
+ function mconstructor(x) result(this)
complex(psb_dpk_) :: x(:,:)
type(psb_z_multivect_cuda) :: this
integer(psb_ipk_) :: info
@@ -1392,7 +1392,7 @@ contains
this%v = x
call this%asb(size(x,1),size(x,2),info)
- end function constructor
+ end function mconstructor
!!$ subroutine z_cuda_multi_gthzv_x(i,n,idx,x,y)
From f10c6c18225578e4719d65d2975eec1b22d6b730 Mon Sep 17 00:00:00 2001
From: Salvatore Filippone
Date: Sat, 2 Nov 2024 12:28:24 +0100
Subject: [PATCH 57/86] Fix GEPRT
---
base/serial/psb_cgeprt.f90 | 5 +++--
base/serial/psb_dgeprt.f90 | 9 +++++----
base/serial/psb_sgeprt.f90 | 9 +++++----
base/serial/psb_zgeprt.f90 | 5 +++--
4 files changed, 16 insertions(+), 12 deletions(-)
diff --git a/base/serial/psb_cgeprt.f90 b/base/serial/psb_cgeprt.f90
index 985c1eab..e05d673b 100644
--- a/base/serial/psb_cgeprt.f90
+++ b/base/serial/psb_cgeprt.f90
@@ -29,7 +29,7 @@
! POSSIBILITY OF SUCH DAMAGE.
!
!
-! File: psb_scsprt.f90
+! File: psb_geprt.f90
! Subroutine:
! Arguments:
@@ -131,6 +131,7 @@ subroutine psb_cgeprt2(iout,a,head)
ncol = size(a,2)
write(iout,*) nrow,ncol
+
write(frmtv,'(a,i3.3,a)') '(',ncol,'2(es26.18,1x))'
do i=1,nrow
@@ -161,7 +162,7 @@ subroutine psb_cgeprt1(iout,a,head)
write(iout,'(a)') '% '
nrow = size(a,1)
ncol = 1
- write(iout,*) nrow
+ write(iout,*) nrow,ncol
write(frmtv,'(a,i3.3,a)') '(',ncol,'2(es26.18,1x))'
diff --git a/base/serial/psb_dgeprt.f90 b/base/serial/psb_dgeprt.f90
index 07fb32f8..ae4c5b11 100644
--- a/base/serial/psb_dgeprt.f90
+++ b/base/serial/psb_dgeprt.f90
@@ -29,7 +29,7 @@
! POSSIBILITY OF SUCH DAMAGE.
!
!
-! File: psb_dcsprt.f90
+! File: psb_geprt.f90
! Subroutine:
! Arguments:
@@ -124,13 +124,14 @@ subroutine psb_dgeprt2(iout,a,head)
character(len=80) :: frmtv
integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nrow, ncol
- write(iout,'(a)') '%%MatrixMarket matrix array real general'
+ write(iout,'(a)') '%%MatrixMarket matrix array complex general'
write(iout,'(a)') '% '//trim(head)
write(iout,'(a)') '% '
nrow = size(a,1)
ncol = size(a,2)
write(iout,*) nrow,ncol
+
write(frmtv,'(a,i3.3,a)') '(',ncol,'(es26.18,1x))'
do i=1,nrow
@@ -156,12 +157,12 @@ subroutine psb_dgeprt1(iout,a,head)
character(len=80) :: frmtv
integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nrow, ncol
- write(iout,'(a)') '%%MatrixMarket matrix array real general'
+ write(iout,'(a)') '%%MatrixMarket matrix array complex general'
write(iout,'(a)') '% '//trim(head)
write(iout,'(a)') '% '
nrow = size(a,1)
ncol = 1
- write(iout,*) nrow
+ write(iout,*) nrow,ncol
write(frmtv,'(a,i3.3,a)') '(',ncol,'(es26.18,1x))'
diff --git a/base/serial/psb_sgeprt.f90 b/base/serial/psb_sgeprt.f90
index 3ebb975b..9f3205fd 100644
--- a/base/serial/psb_sgeprt.f90
+++ b/base/serial/psb_sgeprt.f90
@@ -29,7 +29,7 @@
! POSSIBILITY OF SUCH DAMAGE.
!
!
-! File: psb_scsprt.f90
+! File: psb_geprt.f90
! Subroutine:
! Arguments:
@@ -124,13 +124,14 @@ subroutine psb_sgeprt2(iout,a,head)
character(len=80) :: frmtv
integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nrow, ncol
- write(iout,'(a)') '%%MatrixMarket matrix array real general'
+ write(iout,'(a)') '%%MatrixMarket matrix array complex general'
write(iout,'(a)') '% '//trim(head)
write(iout,'(a)') '% '
nrow = size(a,1)
ncol = size(a,2)
write(iout,*) nrow,ncol
+
write(frmtv,'(a,i3.3,a)') '(',ncol,'(es26.18,1x))'
do i=1,nrow
@@ -156,12 +157,12 @@ subroutine psb_sgeprt1(iout,a,head)
character(len=80) :: frmtv
integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nrow, ncol
- write(iout,'(a)') '%%MatrixMarket matrix array real general'
+ write(iout,'(a)') '%%MatrixMarket matrix array complex general'
write(iout,'(a)') '% '//trim(head)
write(iout,'(a)') '% '
nrow = size(a,1)
ncol = 1
- write(iout,*) nrow
+ write(iout,*) nrow,ncol
write(frmtv,'(a,i3.3,a)') '(',ncol,'(es26.18,1x))'
diff --git a/base/serial/psb_zgeprt.f90 b/base/serial/psb_zgeprt.f90
index f7615473..3fc0eb0c 100644
--- a/base/serial/psb_zgeprt.f90
+++ b/base/serial/psb_zgeprt.f90
@@ -29,7 +29,7 @@
! POSSIBILITY OF SUCH DAMAGE.
!
!
-! File: psb_scsprt.f90
+! File: psb_geprt.f90
! Subroutine:
! Arguments:
@@ -131,6 +131,7 @@ subroutine psb_zgeprt2(iout,a,head)
ncol = size(a,2)
write(iout,*) nrow,ncol
+
write(frmtv,'(a,i3.3,a)') '(',ncol,'2(es26.18,1x))'
do i=1,nrow
@@ -161,7 +162,7 @@ subroutine psb_zgeprt1(iout,a,head)
write(iout,'(a)') '% '
nrow = size(a,1)
ncol = 1
- write(iout,*) nrow
+ write(iout,*) nrow,ncol
write(frmtv,'(a,i3.3,a)') '(',ncol,'2(es26.18,1x))'
From 029903dbadc9042a0c0270644ceff324b0dd0e93 Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Fri, 8 Nov 2024 18:22:43 +0100
Subject: [PATCH 58/86] New Richardson method.
---
krylov/Makefile | 1 +
krylov/psb_ckrylov.f90 | 1 +
krylov/psb_crichardson.f90 | 216 ++++++++++++++++++++++++++++++++++++
krylov/psb_dkrylov.f90 | 1 +
krylov/psb_drichardson.f90 | 216 ++++++++++++++++++++++++++++++++++++
krylov/psb_krylov_mod.f90 | 84 ++++++++++++++
krylov/psb_skrylov.f90 | 1 +
krylov/psb_srichardson.f90 | 216 ++++++++++++++++++++++++++++++++++++
krylov/psb_zkrylov.f90 | 1 +
krylov/psb_zrichardson.f90 | 216 ++++++++++++++++++++++++++++++++++++
test/cudakern/dpdegenmv.F90 | 10 +-
test/cudakern/spdegenmv.F90 | 14 ++-
12 files changed, 971 insertions(+), 6 deletions(-)
create mode 100644 krylov/psb_crichardson.f90
create mode 100644 krylov/psb_drichardson.f90
create mode 100644 krylov/psb_srichardson.f90
create mode 100644 krylov/psb_zrichardson.f90
diff --git a/krylov/Makefile b/krylov/Makefile
index f71dbb1a..c08d6740 100644
--- a/krylov/Makefile
+++ b/krylov/Makefile
@@ -11,6 +11,7 @@ MODOBJS= psb_base_krylov_conv_mod.o \
psb_d_krylov_conv_mod.o psb_z_krylov_conv_mod.o \
psb_krylov_mod.o
F90OBJS=psb_dkrylov.o psb_skrylov.o psb_ckrylov.o psb_zkrylov.o \
+ psb_drichardson.o psb_srichardson.o psb_crichardson.o psb_zrichardson.o \
psb_dcgstab.o psb_dcg.o psb_dfcg.o psb_dgcr.o psb_dcgs.o \
psb_dbicg.o psb_dcgstabl.o psb_drgmres.o\
psb_scgstab.o psb_scg.o psb_sfcg.o psb_sgcr.o psb_scgs.o \
diff --git a/krylov/psb_ckrylov.f90 b/krylov/psb_ckrylov.f90
index 01228234..3afa525e 100644
--- a/krylov/psb_ckrylov.f90
+++ b/krylov/psb_ckrylov.f90
@@ -42,6 +42,7 @@
!
! methd - character The specific method; can take the values:
! CG
+! FCG
! CGS
! BICG
! BICGSTAB
diff --git a/krylov/psb_crichardson.f90 b/krylov/psb_crichardson.f90
new file mode 100644
index 00000000..08678653
--- /dev/null
+++ b/krylov/psb_crichardson.f90
@@ -0,0 +1,216 @@
+!
+! Parallel Sparse BLAS version 3.5
+! (C) Copyright 2006-2018
+! Salvatore Filippone
+! Alfredo Buttari
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions
+! are met:
+! 1. Redistributions of source code must retain the above copyright
+! notice, this list of conditions and the following disclaimer.
+! 2. Redistributions in binary form must reproduce the above copyright
+! notice, this list of conditions, and the following disclaimer in the
+! documentation and/or other materials provided with the distribution.
+! 3. The name of the PSBLAS group or the names of its contributors may
+! not be used to endorse or promote products derived from this
+! software without specific written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+! POSSIBILITY OF SUCH DAMAGE.
+!
+!
+!
+! File: psb_richardson_mod.f90
+! Interfaces for Richardson iterative methods.
+!
+!
+! Subroutine: psb_crichardson
+!
+! Front-end for the Richardson iterations, complexversion
+!
+! Arguments:
+!
+! a - type(psb_cspmat_type) Input: sparse matrix containing A.
+! prec - class(psb_cprec_type) Input: preconditioner
+! b - complex,dimension(:) Input: vector containing the
+! right hand side B
+! x - complex,dimension(:) Input/Output: vector containing the
+! initial guess and final solution X.
+! eps - real Input: Stopping tolerance; the iteration is
+! stopped when the error
+! estimate |err| <= eps
+!
+! desc_a - type(psb_desc_type). Input: The communication descriptor.
+! info - integer. Output: Return code
+!
+! itmax - integer(optional) Input: maximum number of iterations to be
+! performed.
+! iter - integer(optional) Output: how many iterations have been
+! performed.
+! err - real (optional) Output: error estimate on exit
+! itrace - integer(optional) Input: print an informational message
+! with the error estimate every itrace
+! iterations
+! istop - integer(optional) Input: stopping criterion, or how
+! to estimate the error.
+! 1: err = |r|/(|a||x|+|b|)
+! 2: err = |r|/|b|
+! where r is the (preconditioned, recursive
+! estimate of) residual
+!
+Subroutine psb_crichardson_vect(a,prec,b,x,eps,desc_a,info,&
+ & itmax,iter,err,itrace,istop)
+
+ use psb_base_mod
+ use psb_prec_mod
+ use psb_c_krylov_conv_mod
+ use psb_krylov_mod, psb_protect_name => psb_crichardson_vect
+
+ Type(psb_cspmat_type), Intent(in) :: a
+ Type(psb_desc_type), Intent(in) :: desc_a
+ class(psb_cprec_type), intent(inout) :: prec
+ type(psb_c_vect_type), Intent(inout) :: b
+ type(psb_c_vect_type), Intent(inout) :: x
+ Real(psb_spk_), Intent(in) :: eps
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_), Optional, Intent(in) :: itmax, itrace, istop
+ integer(psb_ipk_), Optional, Intent(out) :: iter
+ Real(psb_spk_), Optional, Intent(out) :: err
+
+
+ logical :: do_alloc_wrk
+ type(psb_ctxt_type) :: ctxt
+ integer(psb_ipk_) :: me,np,err_act
+ complex(psb_spk_), allocatable, target :: aux(:)
+ type(psb_c_vect_type), allocatable, target :: wwrk(:)
+ type(psb_c_vect_type), pointer :: q, p, r, z, w
+ real(psb_dpk_) :: derr
+ integer(psb_ipk_) :: itmax_, istop_, naux, it, itx, itrace_,&
+ & n_col, n_row,ieg,nspl, istebz
+ integer(psb_lpk_) :: mglob
+ integer(psb_ipk_) :: debug_level, debug_unit
+ type(psb_itconv_type) :: stopdat
+ character(len=20) :: name
+ character(len=*), parameter :: methdname='RICHARDSON'
+
+ info = psb_success_
+ name = 'psb_crichardson'
+ call psb_erractionsave(err_act)
+
+ ctxt=desc_a%get_context()
+
+ call psb_info(ctxt, me, np)
+
+ if (present(itrace)) then
+ itrace_ = itrace
+ else
+ itrace_ = -1
+ end if
+
+ if (present(istop)) then
+ istop_ = istop
+ else
+ istop_ = 2
+ endif
+ if (present(itmax)) then
+ itmax_ = itmax
+ else
+ itmax_ = 1000
+ endif
+
+ do_alloc_wrk = .not.prec%is_allocated_wrk()
+ if (do_alloc_wrk) call prec%allocate_wrk(info,vmold=x%v,desc=desc_a)
+
+ if (.not.allocated(b%v)) then
+ info = psb_err_invalid_vect_state_
+ call psb_errpush(info,name)
+ goto 9999
+ endif
+ if (.not.allocated(x%v)) then
+ info = psb_err_invalid_vect_state_
+ call psb_errpush(info,name)
+ goto 9999
+ endif
+
+ mglob = desc_a%get_global_rows()
+ n_row = desc_a%get_local_rows()
+ n_col = desc_a%get_local_cols()
+
+ call psb_chkvect(mglob,lone,x%get_nrows(),lone,lone,desc_a,info)
+ if (info == psb_success_)&
+ & call psb_chkvect(mglob,lone,b%get_nrows(),lone,lone,desc_a,info)
+ if(info /= psb_success_) then
+ info=psb_err_from_subroutine_
+ call psb_errpush(info,name,a_err='psb_chkvect on X/B')
+ goto 9999
+ end if
+
+ naux=4*n_col
+ allocate(aux(naux), stat=info)
+ if (info == psb_success_) call psb_geall(wwrk,desc_a,info,n=5_psb_ipk_)
+ if (info == psb_success_) call psb_geasb(wwrk,desc_a,info,mold=x%v,scratch=.true.)
+ if (info /= psb_success_) then
+ info=psb_err_from_subroutine_non_
+ call psb_errpush(info,name)
+ goto 9999
+ end if
+ p => wwrk(1)
+ q => wwrk(2)
+ r => wwrk(3)
+ z => wwrk(4)
+ w => wwrk(5)
+
+ call psb_geaxpby(cone,b,czero,r,desc_a,info)
+ if (info == psb_success_) call psb_spmm(-cone,a,x,cone,r,desc_a,info,work=aux)
+ if (info /= psb_success_) then
+ info=psb_err_from_subroutine_non_
+ call psb_errpush(info,name)
+ goto 9999
+ end if
+
+
+ call psb_init_conv(methdname,istop_,itrace_,itmax_,a,x,b,eps,desc_a,stopdat,info)
+ if (info /= psb_success_) Then
+ call psb_errpush(psb_err_from_subroutine_non_,name)
+ goto 9999
+ End If
+
+ loop: do itx=1,itmax_
+ call prec%apply(r,z,desc_a,info,work=aux)
+ call psb_geaxpby(cone,z,cone,x,desc_a,info)
+ call psb_geaxpby(cone,b,czero,r,desc_a,info)
+ if (info == psb_success_) call psb_spmm(-cone,a,x,cone,r,desc_a,info,work=aux)
+ if (psb_check_conv(methdname,itx,x,r,desc_a,stopdat,info)) exit loop
+ end do loop
+ call psb_end_conv(methdname,itx,desc_a,stopdat,info,derr,iter)
+ if (present(err)) err = derr
+
+ if (info == psb_success_) call psb_gefree(wwrk,desc_a,info)
+ if (info == psb_success_) deallocate(aux,stat=info)
+ if ((info==psb_success_).and.do_alloc_wrk) call prec%free_wrk(info)
+
+ if(info /= psb_success_) then
+ info = psb_err_from_subroutine_
+ call psb_errpush(info,name,a_err=trim(methdname))
+ goto 9999
+ end if
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(ctxt,err_act)
+
+ return
+
+end subroutine psb_crichardson_vect
+
diff --git a/krylov/psb_dkrylov.f90 b/krylov/psb_dkrylov.f90
index d5d40eaf..2bc24d6a 100644
--- a/krylov/psb_dkrylov.f90
+++ b/krylov/psb_dkrylov.f90
@@ -42,6 +42,7 @@
!
! methd - character The specific method; can take the values:
! CG
+! FCG
! CGS
! BICG
! BICGSTAB
diff --git a/krylov/psb_drichardson.f90 b/krylov/psb_drichardson.f90
new file mode 100644
index 00000000..2c057320
--- /dev/null
+++ b/krylov/psb_drichardson.f90
@@ -0,0 +1,216 @@
+!
+! Parallel Sparse BLAS version 3.5
+! (C) Copyright 2006-2018
+! Salvatore Filippone
+! Alfredo Buttari
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions
+! are met:
+! 1. Redistributions of source code must retain the above copyright
+! notice, this list of conditions and the following disclaimer.
+! 2. Redistributions in binary form must reproduce the above copyright
+! notice, this list of conditions, and the following disclaimer in the
+! documentation and/or other materials provided with the distribution.
+! 3. The name of the PSBLAS group or the names of its contributors may
+! not be used to endorse or promote products derived from this
+! software without specific written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+! POSSIBILITY OF SUCH DAMAGE.
+!
+!
+!
+! File: psb_richardson_mod.f90
+! Interfaces for Richardson iterative methods.
+!
+!
+! Subroutine: psb_drichardson
+!
+! Front-end for the Richardson iterations, realversion
+!
+! Arguments:
+!
+! a - type(psb_dspmat_type) Input: sparse matrix containing A.
+! prec - class(psb_dprec_type) Input: preconditioner
+! b - real,dimension(:) Input: vector containing the
+! right hand side B
+! x - real,dimension(:) Input/Output: vector containing the
+! initial guess and final solution X.
+! eps - real Input: Stopping tolerance; the iteration is
+! stopped when the error
+! estimate |err| <= eps
+!
+! desc_a - type(psb_desc_type). Input: The communication descriptor.
+! info - integer. Output: Return code
+!
+! itmax - integer(optional) Input: maximum number of iterations to be
+! performed.
+! iter - integer(optional) Output: how many iterations have been
+! performed.
+! err - real (optional) Output: error estimate on exit
+! itrace - integer(optional) Input: print an informational message
+! with the error estimate every itrace
+! iterations
+! istop - integer(optional) Input: stopping criterion, or how
+! to estimate the error.
+! 1: err = |r|/(|a||x|+|b|)
+! 2: err = |r|/|b|
+! where r is the (preconditioned, recursive
+! estimate of) residual
+!
+Subroutine psb_drichardson_vect(a,prec,b,x,eps,desc_a,info,&
+ & itmax,iter,err,itrace,istop)
+
+ use psb_base_mod
+ use psb_prec_mod
+ use psb_d_krylov_conv_mod
+ use psb_krylov_mod, psb_protect_name => psb_drichardson_vect
+
+ Type(psb_dspmat_type), Intent(in) :: a
+ Type(psb_desc_type), Intent(in) :: desc_a
+ class(psb_dprec_type), intent(inout) :: prec
+ type(psb_d_vect_type), Intent(inout) :: b
+ type(psb_d_vect_type), Intent(inout) :: x
+ Real(psb_dpk_), Intent(in) :: eps
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_), Optional, Intent(in) :: itmax, itrace, istop
+ integer(psb_ipk_), Optional, Intent(out) :: iter
+ Real(psb_dpk_), Optional, Intent(out) :: err
+
+
+ logical :: do_alloc_wrk
+ type(psb_ctxt_type) :: ctxt
+ integer(psb_ipk_) :: me,np,err_act
+ real(psb_dpk_), allocatable, target :: aux(:)
+ type(psb_d_vect_type), allocatable, target :: wwrk(:)
+ type(psb_d_vect_type), pointer :: q, p, r, z, w
+ real(psb_dpk_) :: derr
+ integer(psb_ipk_) :: itmax_, istop_, naux, it, itx, itrace_,&
+ & n_col, n_row,ieg,nspl, istebz
+ integer(psb_lpk_) :: mglob
+ integer(psb_ipk_) :: debug_level, debug_unit
+ type(psb_itconv_type) :: stopdat
+ character(len=20) :: name
+ character(len=*), parameter :: methdname='RICHARDSON'
+
+ info = psb_success_
+ name = 'psb_drichardson'
+ call psb_erractionsave(err_act)
+
+ ctxt=desc_a%get_context()
+
+ call psb_info(ctxt, me, np)
+
+ if (present(itrace)) then
+ itrace_ = itrace
+ else
+ itrace_ = -1
+ end if
+
+ if (present(istop)) then
+ istop_ = istop
+ else
+ istop_ = 2
+ endif
+ if (present(itmax)) then
+ itmax_ = itmax
+ else
+ itmax_ = 1000
+ endif
+
+ do_alloc_wrk = .not.prec%is_allocated_wrk()
+ if (do_alloc_wrk) call prec%allocate_wrk(info,vmold=x%v,desc=desc_a)
+
+ if (.not.allocated(b%v)) then
+ info = psb_err_invalid_vect_state_
+ call psb_errpush(info,name)
+ goto 9999
+ endif
+ if (.not.allocated(x%v)) then
+ info = psb_err_invalid_vect_state_
+ call psb_errpush(info,name)
+ goto 9999
+ endif
+
+ mglob = desc_a%get_global_rows()
+ n_row = desc_a%get_local_rows()
+ n_col = desc_a%get_local_cols()
+
+ call psb_chkvect(mglob,lone,x%get_nrows(),lone,lone,desc_a,info)
+ if (info == psb_success_)&
+ & call psb_chkvect(mglob,lone,b%get_nrows(),lone,lone,desc_a,info)
+ if(info /= psb_success_) then
+ info=psb_err_from_subroutine_
+ call psb_errpush(info,name,a_err='psb_chkvect on X/B')
+ goto 9999
+ end if
+
+ naux=4*n_col
+ allocate(aux(naux), stat=info)
+ if (info == psb_success_) call psb_geall(wwrk,desc_a,info,n=5_psb_ipk_)
+ if (info == psb_success_) call psb_geasb(wwrk,desc_a,info,mold=x%v,scratch=.true.)
+ if (info /= psb_success_) then
+ info=psb_err_from_subroutine_non_
+ call psb_errpush(info,name)
+ goto 9999
+ end if
+ p => wwrk(1)
+ q => wwrk(2)
+ r => wwrk(3)
+ z => wwrk(4)
+ w => wwrk(5)
+
+ call psb_geaxpby(done,b,dzero,r,desc_a,info)
+ if (info == psb_success_) call psb_spmm(-done,a,x,done,r,desc_a,info,work=aux)
+ if (info /= psb_success_) then
+ info=psb_err_from_subroutine_non_
+ call psb_errpush(info,name)
+ goto 9999
+ end if
+
+
+ call psb_init_conv(methdname,istop_,itrace_,itmax_,a,x,b,eps,desc_a,stopdat,info)
+ if (info /= psb_success_) Then
+ call psb_errpush(psb_err_from_subroutine_non_,name)
+ goto 9999
+ End If
+
+ loop: do itx=1,itmax_
+ call prec%apply(r,z,desc_a,info,work=aux)
+ call psb_geaxpby(done,z,done,x,desc_a,info)
+ call psb_geaxpby(done,b,dzero,r,desc_a,info)
+ if (info == psb_success_) call psb_spmm(-done,a,x,done,r,desc_a,info,work=aux)
+ if (psb_check_conv(methdname,itx,x,r,desc_a,stopdat,info)) exit loop
+ end do loop
+ call psb_end_conv(methdname,itx,desc_a,stopdat,info,derr,iter)
+ if (present(err)) err = derr
+
+ if (info == psb_success_) call psb_gefree(wwrk,desc_a,info)
+ if (info == psb_success_) deallocate(aux,stat=info)
+ if ((info==psb_success_).and.do_alloc_wrk) call prec%free_wrk(info)
+
+ if(info /= psb_success_) then
+ info = psb_err_from_subroutine_
+ call psb_errpush(info,name,a_err=trim(methdname))
+ goto 9999
+ end if
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(ctxt,err_act)
+
+ return
+
+end subroutine psb_drichardson_vect
+
diff --git a/krylov/psb_krylov_mod.f90 b/krylov/psb_krylov_mod.f90
index d8d4d904..e9a94e18 100644
--- a/krylov/psb_krylov_mod.f90
+++ b/krylov/psb_krylov_mod.f90
@@ -127,4 +127,88 @@ Module psb_krylov_mod
end interface
+ interface psb_richardson
+
+ Subroutine psb_srichardson_vect(a,prec,b,x,eps,desc_a,info,&
+ & itmax,iter,err,itrace,istop)
+
+ use psb_base_mod, only : psb_ipk_, psb_desc_type, psb_sspmat_type, &
+ & psb_spk_, psb_s_vect_type
+ use psb_prec_mod, only : psb_sprec_type
+
+ Type(psb_sspmat_type), Intent(in) :: a
+ Type(psb_desc_type), Intent(in) :: desc_a
+ class(psb_sprec_type), intent(inout) :: prec
+ type(psb_s_vect_type), Intent(inout) :: b
+ type(psb_s_vect_type), Intent(inout) :: x
+ Real(psb_spk_), Intent(in) :: eps
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_), Optional, Intent(in) :: itmax, itrace, istop
+ integer(psb_ipk_), Optional, Intent(out) :: iter
+ Real(psb_spk_), Optional, Intent(out) :: err
+
+ end Subroutine psb_srichardson_vect
+
+ Subroutine psb_crichardson_vect(a,prec,b,x,eps,desc_a,info,&
+ & itmax,iter,err,itrace,istop)
+
+ use psb_base_mod, only : psb_ipk_, psb_desc_type, psb_cspmat_type, &
+ & psb_spk_, psb_c_vect_type
+ use psb_prec_mod, only : psb_cprec_type
+
+ Type(psb_cspmat_type), Intent(in) :: a
+ Type(psb_desc_type), Intent(in) :: desc_a
+ class(psb_cprec_type), intent(inout) :: prec
+ type(psb_c_vect_type), Intent(inout) :: b
+ type(psb_c_vect_type), Intent(inout) :: x
+ Real(psb_spk_), Intent(in) :: eps
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_), Optional, Intent(in) :: itmax, itrace, istop
+ integer(psb_ipk_), Optional, Intent(out) :: iter
+ Real(psb_spk_), Optional, Intent(out) :: err
+
+ end Subroutine psb_crichardson_vect
+
+ Subroutine psb_drichardson_vect(a,prec,b,x,eps,desc_a,info,&
+ & itmax,iter,err,itrace,istop)
+
+ use psb_base_mod, only : psb_ipk_, psb_desc_type, psb_dspmat_type, &
+ & psb_dpk_, psb_d_vect_type
+ use psb_prec_mod, only : psb_dprec_type
+
+ Type(psb_dspmat_type), Intent(in) :: a
+ Type(psb_desc_type), Intent(in) :: desc_a
+ class(psb_dprec_type), intent(inout) :: prec
+ type(psb_d_vect_type), Intent(inout) :: b
+ type(psb_d_vect_type), Intent(inout) :: x
+ Real(psb_dpk_), Intent(in) :: eps
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_), Optional, Intent(in) :: itmax, itrace, istop
+ integer(psb_ipk_), Optional, Intent(out) :: iter
+ Real(psb_dpk_), Optional, Intent(out) :: err
+
+ end Subroutine psb_drichardson_vect
+
+ Subroutine psb_zrichardson_vect(a,prec,b,x,eps,desc_a,info,&
+ & itmax,iter,err,itrace,istop)
+
+ use psb_base_mod, only : psb_ipk_, psb_desc_type, psb_zspmat_type, &
+ & psb_dpk_, psb_z_vect_type
+ use psb_prec_mod, only : psb_zprec_type
+
+ Type(psb_zspmat_type), Intent(in) :: a
+ Type(psb_desc_type), Intent(in) :: desc_a
+ class(psb_zprec_type), intent(inout) :: prec
+ type(psb_z_vect_type), Intent(inout) :: b
+ type(psb_z_vect_type), Intent(inout) :: x
+ Real(psb_dpk_), Intent(in) :: eps
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_), Optional, Intent(in) :: itmax, itrace, istop
+ integer(psb_ipk_), Optional, Intent(out) :: iter
+ Real(psb_dpk_), Optional, Intent(out) :: err
+
+ end Subroutine psb_zrichardson_vect
+
+ end interface
+
end module psb_krylov_mod
diff --git a/krylov/psb_skrylov.f90 b/krylov/psb_skrylov.f90
index 39aecc36..35d2024f 100644
--- a/krylov/psb_skrylov.f90
+++ b/krylov/psb_skrylov.f90
@@ -42,6 +42,7 @@
!
! methd - character The specific method; can take the values:
! CG
+! FCG
! CGS
! BICG
! BICGSTAB
diff --git a/krylov/psb_srichardson.f90 b/krylov/psb_srichardson.f90
new file mode 100644
index 00000000..a06f6cb4
--- /dev/null
+++ b/krylov/psb_srichardson.f90
@@ -0,0 +1,216 @@
+!
+! Parallel Sparse BLAS version 3.5
+! (C) Copyright 2006-2018
+! Salvatore Filippone
+! Alfredo Buttari
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions
+! are met:
+! 1. Redistributions of source code must retain the above copyright
+! notice, this list of conditions and the following disclaimer.
+! 2. Redistributions in binary form must reproduce the above copyright
+! notice, this list of conditions, and the following disclaimer in the
+! documentation and/or other materials provided with the distribution.
+! 3. The name of the PSBLAS group or the names of its contributors may
+! not be used to endorse or promote products derived from this
+! software without specific written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+! POSSIBILITY OF SUCH DAMAGE.
+!
+!
+!
+! File: psb_richardson_mod.f90
+! Interfaces for Richardson iterative methods.
+!
+!
+! Subroutine: psb_srichardson
+!
+! Front-end for the Richardson iterations, realversion
+!
+! Arguments:
+!
+! a - type(psb_sspmat_type) Input: sparse matrix containing A.
+! prec - class(psb_sprec_type) Input: preconditioner
+! b - real,dimension(:) Input: vector containing the
+! right hand side B
+! x - real,dimension(:) Input/Output: vector containing the
+! initial guess and final solution X.
+! eps - real Input: Stopping tolerance; the iteration is
+! stopped when the error
+! estimate |err| <= eps
+!
+! desc_a - type(psb_desc_type). Input: The communication descriptor.
+! info - integer. Output: Return code
+!
+! itmax - integer(optional) Input: maximum number of iterations to be
+! performed.
+! iter - integer(optional) Output: how many iterations have been
+! performed.
+! err - real (optional) Output: error estimate on exit
+! itrace - integer(optional) Input: print an informational message
+! with the error estimate every itrace
+! iterations
+! istop - integer(optional) Input: stopping criterion, or how
+! to estimate the error.
+! 1: err = |r|/(|a||x|+|b|)
+! 2: err = |r|/|b|
+! where r is the (preconditioned, recursive
+! estimate of) residual
+!
+Subroutine psb_srichardson_vect(a,prec,b,x,eps,desc_a,info,&
+ & itmax,iter,err,itrace,istop)
+
+ use psb_base_mod
+ use psb_prec_mod
+ use psb_s_krylov_conv_mod
+ use psb_krylov_mod, psb_protect_name => psb_srichardson_vect
+
+ Type(psb_sspmat_type), Intent(in) :: a
+ Type(psb_desc_type), Intent(in) :: desc_a
+ class(psb_sprec_type), intent(inout) :: prec
+ type(psb_s_vect_type), Intent(inout) :: b
+ type(psb_s_vect_type), Intent(inout) :: x
+ Real(psb_spk_), Intent(in) :: eps
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_), Optional, Intent(in) :: itmax, itrace, istop
+ integer(psb_ipk_), Optional, Intent(out) :: iter
+ Real(psb_spk_), Optional, Intent(out) :: err
+
+
+ logical :: do_alloc_wrk
+ type(psb_ctxt_type) :: ctxt
+ integer(psb_ipk_) :: me,np,err_act
+ real(psb_spk_), allocatable, target :: aux(:)
+ type(psb_s_vect_type), allocatable, target :: wwrk(:)
+ type(psb_s_vect_type), pointer :: q, p, r, z, w
+ real(psb_dpk_) :: derr
+ integer(psb_ipk_) :: itmax_, istop_, naux, it, itx, itrace_,&
+ & n_col, n_row,ieg,nspl, istebz
+ integer(psb_lpk_) :: mglob
+ integer(psb_ipk_) :: debug_level, debug_unit
+ type(psb_itconv_type) :: stopdat
+ character(len=20) :: name
+ character(len=*), parameter :: methdname='RICHARDSON'
+
+ info = psb_success_
+ name = 'psb_srichardson'
+ call psb_erractionsave(err_act)
+
+ ctxt=desc_a%get_context()
+
+ call psb_info(ctxt, me, np)
+
+ if (present(itrace)) then
+ itrace_ = itrace
+ else
+ itrace_ = -1
+ end if
+
+ if (present(istop)) then
+ istop_ = istop
+ else
+ istop_ = 2
+ endif
+ if (present(itmax)) then
+ itmax_ = itmax
+ else
+ itmax_ = 1000
+ endif
+
+ do_alloc_wrk = .not.prec%is_allocated_wrk()
+ if (do_alloc_wrk) call prec%allocate_wrk(info,vmold=x%v,desc=desc_a)
+
+ if (.not.allocated(b%v)) then
+ info = psb_err_invalid_vect_state_
+ call psb_errpush(info,name)
+ goto 9999
+ endif
+ if (.not.allocated(x%v)) then
+ info = psb_err_invalid_vect_state_
+ call psb_errpush(info,name)
+ goto 9999
+ endif
+
+ mglob = desc_a%get_global_rows()
+ n_row = desc_a%get_local_rows()
+ n_col = desc_a%get_local_cols()
+
+ call psb_chkvect(mglob,lone,x%get_nrows(),lone,lone,desc_a,info)
+ if (info == psb_success_)&
+ & call psb_chkvect(mglob,lone,b%get_nrows(),lone,lone,desc_a,info)
+ if(info /= psb_success_) then
+ info=psb_err_from_subroutine_
+ call psb_errpush(info,name,a_err='psb_chkvect on X/B')
+ goto 9999
+ end if
+
+ naux=4*n_col
+ allocate(aux(naux), stat=info)
+ if (info == psb_success_) call psb_geall(wwrk,desc_a,info,n=5_psb_ipk_)
+ if (info == psb_success_) call psb_geasb(wwrk,desc_a,info,mold=x%v,scratch=.true.)
+ if (info /= psb_success_) then
+ info=psb_err_from_subroutine_non_
+ call psb_errpush(info,name)
+ goto 9999
+ end if
+ p => wwrk(1)
+ q => wwrk(2)
+ r => wwrk(3)
+ z => wwrk(4)
+ w => wwrk(5)
+
+ call psb_geaxpby(sone,b,szero,r,desc_a,info)
+ if (info == psb_success_) call psb_spmm(-sone,a,x,sone,r,desc_a,info,work=aux)
+ if (info /= psb_success_) then
+ info=psb_err_from_subroutine_non_
+ call psb_errpush(info,name)
+ goto 9999
+ end if
+
+
+ call psb_init_conv(methdname,istop_,itrace_,itmax_,a,x,b,eps,desc_a,stopdat,info)
+ if (info /= psb_success_) Then
+ call psb_errpush(psb_err_from_subroutine_non_,name)
+ goto 9999
+ End If
+
+ loop: do itx=1,itmax_
+ call prec%apply(r,z,desc_a,info,work=aux)
+ call psb_geaxpby(sone,z,sone,x,desc_a,info)
+ call psb_geaxpby(sone,b,szero,r,desc_a,info)
+ if (info == psb_success_) call psb_spmm(-sone,a,x,sone,r,desc_a,info,work=aux)
+ if (psb_check_conv(methdname,itx,x,r,desc_a,stopdat,info)) exit loop
+ end do loop
+ call psb_end_conv(methdname,itx,desc_a,stopdat,info,derr,iter)
+ if (present(err)) err = derr
+
+ if (info == psb_success_) call psb_gefree(wwrk,desc_a,info)
+ if (info == psb_success_) deallocate(aux,stat=info)
+ if ((info==psb_success_).and.do_alloc_wrk) call prec%free_wrk(info)
+
+ if(info /= psb_success_) then
+ info = psb_err_from_subroutine_
+ call psb_errpush(info,name,a_err=trim(methdname))
+ goto 9999
+ end if
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(ctxt,err_act)
+
+ return
+
+end subroutine psb_srichardson_vect
+
diff --git a/krylov/psb_zkrylov.f90 b/krylov/psb_zkrylov.f90
index a70cc98a..bcfd6806 100644
--- a/krylov/psb_zkrylov.f90
+++ b/krylov/psb_zkrylov.f90
@@ -42,6 +42,7 @@
!
! methd - character The specific method; can take the values:
! CG
+! FCG
! CGS
! BICG
! BICGSTAB
diff --git a/krylov/psb_zrichardson.f90 b/krylov/psb_zrichardson.f90
new file mode 100644
index 00000000..19f2f10e
--- /dev/null
+++ b/krylov/psb_zrichardson.f90
@@ -0,0 +1,216 @@
+!
+! Parallel Sparse BLAS version 3.5
+! (C) Copyright 2006-2018
+! Salvatore Filippone
+! Alfredo Buttari
+!
+! Redistribution and use in source and binary forms, with or without
+! modification, are permitted provided that the following conditions
+! are met:
+! 1. Redistributions of source code must retain the above copyright
+! notice, this list of conditions and the following disclaimer.
+! 2. Redistributions in binary form must reproduce the above copyright
+! notice, this list of conditions, and the following disclaimer in the
+! documentation and/or other materials provided with the distribution.
+! 3. The name of the PSBLAS group or the names of its contributors may
+! not be used to endorse or promote products derived from this
+! software without specific written permission.
+!
+! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+! ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+! TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+! PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE PSBLAS GROUP OR ITS CONTRIBUTORS
+! BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+! CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+! SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+! INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+! CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+! ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+! POSSIBILITY OF SUCH DAMAGE.
+!
+!
+!
+! File: psb_richardson_mod.f90
+! Interfaces for Richardson iterative methods.
+!
+!
+! Subroutine: psb_zrichardson
+!
+! Front-end for the Richardson iterations, complexversion
+!
+! Arguments:
+!
+! a - type(psb_zspmat_type) Input: sparse matrix containing A.
+! prec - class(psb_zprec_type) Input: preconditioner
+! b - complex,dimension(:) Input: vector containing the
+! right hand side B
+! x - complex,dimension(:) Input/Output: vector containing the
+! initial guess and final solution X.
+! eps - real Input: Stopping tolerance; the iteration is
+! stopped when the error
+! estimate |err| <= eps
+!
+! desc_a - type(psb_desc_type). Input: The communication descriptor.
+! info - integer. Output: Return code
+!
+! itmax - integer(optional) Input: maximum number of iterations to be
+! performed.
+! iter - integer(optional) Output: how many iterations have been
+! performed.
+! err - real (optional) Output: error estimate on exit
+! itrace - integer(optional) Input: print an informational message
+! with the error estimate every itrace
+! iterations
+! istop - integer(optional) Input: stopping criterion, or how
+! to estimate the error.
+! 1: err = |r|/(|a||x|+|b|)
+! 2: err = |r|/|b|
+! where r is the (preconditioned, recursive
+! estimate of) residual
+!
+Subroutine psb_zrichardson_vect(a,prec,b,x,eps,desc_a,info,&
+ & itmax,iter,err,itrace,istop)
+
+ use psb_base_mod
+ use psb_prec_mod
+ use psb_z_krylov_conv_mod
+ use psb_krylov_mod, psb_protect_name => psb_zrichardson_vect
+
+ Type(psb_zspmat_type), Intent(in) :: a
+ Type(psb_desc_type), Intent(in) :: desc_a
+ class(psb_zprec_type), intent(inout) :: prec
+ type(psb_z_vect_type), Intent(inout) :: b
+ type(psb_z_vect_type), Intent(inout) :: x
+ Real(psb_dpk_), Intent(in) :: eps
+ integer(psb_ipk_), intent(out) :: info
+ integer(psb_ipk_), Optional, Intent(in) :: itmax, itrace, istop
+ integer(psb_ipk_), Optional, Intent(out) :: iter
+ Real(psb_dpk_), Optional, Intent(out) :: err
+
+
+ logical :: do_alloc_wrk
+ type(psb_ctxt_type) :: ctxt
+ integer(psb_ipk_) :: me,np,err_act
+ complex(psb_dpk_), allocatable, target :: aux(:)
+ type(psb_z_vect_type), allocatable, target :: wwrk(:)
+ type(psb_z_vect_type), pointer :: q, p, r, z, w
+ real(psb_dpk_) :: derr
+ integer(psb_ipk_) :: itmax_, istop_, naux, it, itx, itrace_,&
+ & n_col, n_row,ieg,nspl, istebz
+ integer(psb_lpk_) :: mglob
+ integer(psb_ipk_) :: debug_level, debug_unit
+ type(psb_itconv_type) :: stopdat
+ character(len=20) :: name
+ character(len=*), parameter :: methdname='RICHARDSON'
+
+ info = psb_success_
+ name = 'psb_zrichardson'
+ call psb_erractionsave(err_act)
+
+ ctxt=desc_a%get_context()
+
+ call psb_info(ctxt, me, np)
+
+ if (present(itrace)) then
+ itrace_ = itrace
+ else
+ itrace_ = -1
+ end if
+
+ if (present(istop)) then
+ istop_ = istop
+ else
+ istop_ = 2
+ endif
+ if (present(itmax)) then
+ itmax_ = itmax
+ else
+ itmax_ = 1000
+ endif
+
+ do_alloc_wrk = .not.prec%is_allocated_wrk()
+ if (do_alloc_wrk) call prec%allocate_wrk(info,vmold=x%v,desc=desc_a)
+
+ if (.not.allocated(b%v)) then
+ info = psb_err_invalid_vect_state_
+ call psb_errpush(info,name)
+ goto 9999
+ endif
+ if (.not.allocated(x%v)) then
+ info = psb_err_invalid_vect_state_
+ call psb_errpush(info,name)
+ goto 9999
+ endif
+
+ mglob = desc_a%get_global_rows()
+ n_row = desc_a%get_local_rows()
+ n_col = desc_a%get_local_cols()
+
+ call psb_chkvect(mglob,lone,x%get_nrows(),lone,lone,desc_a,info)
+ if (info == psb_success_)&
+ & call psb_chkvect(mglob,lone,b%get_nrows(),lone,lone,desc_a,info)
+ if(info /= psb_success_) then
+ info=psb_err_from_subroutine_
+ call psb_errpush(info,name,a_err='psb_chkvect on X/B')
+ goto 9999
+ end if
+
+ naux=4*n_col
+ allocate(aux(naux), stat=info)
+ if (info == psb_success_) call psb_geall(wwrk,desc_a,info,n=5_psb_ipk_)
+ if (info == psb_success_) call psb_geasb(wwrk,desc_a,info,mold=x%v,scratch=.true.)
+ if (info /= psb_success_) then
+ info=psb_err_from_subroutine_non_
+ call psb_errpush(info,name)
+ goto 9999
+ end if
+ p => wwrk(1)
+ q => wwrk(2)
+ r => wwrk(3)
+ z => wwrk(4)
+ w => wwrk(5)
+
+ call psb_geaxpby(zone,b,zzero,r,desc_a,info)
+ if (info == psb_success_) call psb_spmm(-zone,a,x,zone,r,desc_a,info,work=aux)
+ if (info /= psb_success_) then
+ info=psb_err_from_subroutine_non_
+ call psb_errpush(info,name)
+ goto 9999
+ end if
+
+
+ call psb_init_conv(methdname,istop_,itrace_,itmax_,a,x,b,eps,desc_a,stopdat,info)
+ if (info /= psb_success_) Then
+ call psb_errpush(psb_err_from_subroutine_non_,name)
+ goto 9999
+ End If
+
+ loop: do itx=1,itmax_
+ call prec%apply(r,z,desc_a,info,work=aux)
+ call psb_geaxpby(zone,z,zone,x,desc_a,info)
+ call psb_geaxpby(zone,b,zzero,r,desc_a,info)
+ if (info == psb_success_) call psb_spmm(-zone,a,x,zone,r,desc_a,info,work=aux)
+ if (psb_check_conv(methdname,itx,x,r,desc_a,stopdat,info)) exit loop
+ end do loop
+ call psb_end_conv(methdname,itx,desc_a,stopdat,info,derr,iter)
+ if (present(err)) err = derr
+
+ if (info == psb_success_) call psb_gefree(wwrk,desc_a,info)
+ if (info == psb_success_) deallocate(aux,stat=info)
+ if ((info==psb_success_).and.do_alloc_wrk) call prec%free_wrk(info)
+
+ if(info /= psb_success_) then
+ info = psb_err_from_subroutine_
+ call psb_errpush(info,name,a_err=trim(methdname))
+ goto 9999
+ end if
+
+ call psb_erractionrestore(err_act)
+ return
+
+9999 call psb_error_handler(ctxt,err_act)
+
+ return
+
+end subroutine psb_zrichardson_vect
+
diff --git a/test/cudakern/dpdegenmv.F90 b/test/cudakern/dpdegenmv.F90
index 85059e81..a5463f0b 100644
--- a/test/cudakern/dpdegenmv.F90
+++ b/test/cudakern/dpdegenmv.F90
@@ -594,7 +594,7 @@ program pdgenmv
! solver parameters
integer(psb_epk_) :: amatsize, precsize, descsize, annz, nbytes
- real(psb_dpk_) :: err, eps
+ real(psb_dpk_) :: err, eps, tnv, tng,tdot, dnrm2,ddot
integer, parameter :: ntests=200, ngpu=50, ncnv=20
type(psb_d_coo_sparse_mat), target :: acoo
type(psb_d_csr_sparse_mat), target :: acsr
@@ -745,7 +745,7 @@ program pdgenmv
call psb_geall(x0,desc_a,info)
do i=1, nr
call desc_a%l2g(i,ig,info)
- x0(i) = 1.0 + (1.0*ig)/nrg
+ x0(i) = 1.0 + (1.0*ig)/(nrg**2)
end do
call a%cscnv(aux_a,info,mold=acoo)
tcnvcsr = 0
@@ -843,6 +843,12 @@ program pdgenmv
call bg%sync()
x1 = bv%get_vect()
x2 = bg%get_vect()
+ tnv = psb_genrm2(bv,desc_a,info)
+ tng = psb_genrm2(bg,desc_a,info)
+ tdot = psb_gedot(bg,bg,desc_a,info)
+ write(0,*) ' bv ',tnv,' bg ',tng, ' dot ',tdot,eps,&
+ & dnrm2(desc_a%get_local_rows(),x2,1),&
+ & ddot(desc_a%get_local_rows(),x1,1,x2,1)
call psb_geaxpby(-done,bg,+done,bv,desc_a,info)
eps = psb_geamax(bv,desc_a,info)
diff --git a/test/cudakern/spdegenmv.F90 b/test/cudakern/spdegenmv.F90
index f953e163..fbce6726 100644
--- a/test/cudakern/spdegenmv.F90
+++ b/test/cudakern/spdegenmv.F90
@@ -580,7 +580,7 @@ program pdgenmv
! solver parameters
integer(psb_epk_) :: amatsize, precsize, descsize, annz, nbytes
- real(psb_spk_) :: err, eps
+ real(psb_spk_) :: err, eps, tnv, tng,tdot, snrm2,sdot
integer, parameter :: ntests=200, ngpu=50, ncnv=20
type(psb_s_coo_sparse_mat), target :: acoo
type(psb_s_csr_sparse_mat), target :: acsr
@@ -728,7 +728,7 @@ program pdgenmv
call psb_geall(x0,desc_a,info)
do i=1, nr
call desc_a%l2g(i,ig,info)
- x0(i) = 1.0 + (1.0*ig)/nrg
+ x0(i) = 1.0 + (1.0*ig)/(nrg**2)
end do
call a%cscnv(aux_a,info,mold=acoo)
tcnvcsr = 0
@@ -826,10 +826,16 @@ program pdgenmv
call psb_amx(ctxt,gt2)
call bg%sync()
x1 = bv%get_vect()
- x2 = bg%get_vect()
+ x2 = bg%get_vect()
+ tnv = psb_genrm2(bv,desc_a,info)
+ tng = psb_genrm2(bg,desc_a,info)
+ tdot = psb_gedot(bg,bg,desc_a,info)
+ write(0,*) ' bv ',tnv,' bg ',tng, ' dot ',tdot,eps,&
+ & snrm2(desc_a%get_local_rows(),x2,1),&
+ & sdot(desc_a%get_local_rows(),x1,1,x2,1)
call psb_geaxpby(-sone,bg,+sone,bv,desc_a,info)
eps = psb_geamax(bv,desc_a,info)
-
+
call psb_amx(ctxt,t2)
eps = maxval(abs(x1(1:nr)-x2(1:nr)))
call psb_amx(ctxt,eps)
From ceac2faad0548d9b08704867c069f7f937cd968b Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Sun, 10 Nov 2024 10:08:50 +0100
Subject: [PATCH 59/86] Rename krylov into linsolve where needed, step 1.
---
Makefile | 18 +++++++++---------
{krylov => linsolve}/Makefile | 10 +++++-----
.../psb_base_linsolve_conv_mod.f90 | 8 ++++----
.../psb_c_linsolve_conv_mod.f90 | 8 ++++----
{krylov => linsolve}/psb_cbicg.f90 | 0
{krylov => linsolve}/psb_ccg.F90 | 0
{krylov => linsolve}/psb_ccgs.f90 | 0
{krylov => linsolve}/psb_ccgstab.f90 | 0
{krylov => linsolve}/psb_ccgstabl.f90 | 0
{krylov => linsolve}/psb_cfcg.F90 | 0
{krylov => linsolve}/psb_cgcr.f90 | 0
{krylov => linsolve}/psb_ckrylov.f90 | 0
{krylov => linsolve}/psb_crgmres.f90 | 0
{krylov => linsolve}/psb_crichardson.f90 | 0
.../psb_d_linsolve_conv_mod.f90 | 8 ++++----
{krylov => linsolve}/psb_dbicg.f90 | 0
{krylov => linsolve}/psb_dcg.F90 | 0
{krylov => linsolve}/psb_dcgs.f90 | 0
{krylov => linsolve}/psb_dcgstab.f90 | 0
{krylov => linsolve}/psb_dcgstabl.f90 | 0
{krylov => linsolve}/psb_dfcg.F90 | 0
{krylov => linsolve}/psb_dgcr.f90 | 0
{krylov => linsolve}/psb_dkrylov.f90 | 0
{krylov => linsolve}/psb_drgmres.f90 | 0
{krylov => linsolve}/psb_drichardson.f90 | 0
{krylov => linsolve}/psb_krylov_mod.f90 | 0
.../psb_linsolve_conv_mod.f90 | 16 ++++++++--------
.../psb_s_linsolve_conv_mod.f90 | 8 ++++----
{krylov => linsolve}/psb_sbicg.f90 | 0
{krylov => linsolve}/psb_scg.F90 | 0
{krylov => linsolve}/psb_scgs.f90 | 0
{krylov => linsolve}/psb_scgstab.f90 | 0
{krylov => linsolve}/psb_scgstabl.f90 | 0
{krylov => linsolve}/psb_sfcg.F90 | 0
{krylov => linsolve}/psb_sgcr.f90 | 0
{krylov => linsolve}/psb_skrylov.f90 | 0
{krylov => linsolve}/psb_srgmres.f90 | 0
{krylov => linsolve}/psb_srichardson.f90 | 0
.../psb_z_linsolve_conv_mod.f90 | 10 +++++-----
{krylov => linsolve}/psb_zbicg.f90 | 0
{krylov => linsolve}/psb_zcg.F90 | 0
{krylov => linsolve}/psb_zcgs.f90 | 0
{krylov => linsolve}/psb_zcgstab.f90 | 0
{krylov => linsolve}/psb_zcgstabl.f90 | 0
{krylov => linsolve}/psb_zfcg.F90 | 0
{krylov => linsolve}/psb_zgcr.f90 | 0
{krylov => linsolve}/psb_zkrylov.f90 | 0
{krylov => linsolve}/psb_zrgmres.f90 | 0
{krylov => linsolve}/psb_zrichardson.f90 | 0
49 files changed, 43 insertions(+), 43 deletions(-)
rename {krylov => linsolve}/Makefile (73%)
rename krylov/psb_base_krylov_conv_mod.f90 => linsolve/psb_base_linsolve_conv_mod.f90 (97%)
rename krylov/psb_c_krylov_conv_mod.f90 => linsolve/psb_c_linsolve_conv_mod.f90 (98%)
rename {krylov => linsolve}/psb_cbicg.f90 (100%)
rename {krylov => linsolve}/psb_ccg.F90 (100%)
rename {krylov => linsolve}/psb_ccgs.f90 (100%)
rename {krylov => linsolve}/psb_ccgstab.f90 (100%)
rename {krylov => linsolve}/psb_ccgstabl.f90 (100%)
rename {krylov => linsolve}/psb_cfcg.F90 (100%)
rename {krylov => linsolve}/psb_cgcr.f90 (100%)
rename {krylov => linsolve}/psb_ckrylov.f90 (100%)
rename {krylov => linsolve}/psb_crgmres.f90 (100%)
rename {krylov => linsolve}/psb_crichardson.f90 (100%)
rename krylov/psb_d_krylov_conv_mod.f90 => linsolve/psb_d_linsolve_conv_mod.f90 (98%)
rename {krylov => linsolve}/psb_dbicg.f90 (100%)
rename {krylov => linsolve}/psb_dcg.F90 (100%)
rename {krylov => linsolve}/psb_dcgs.f90 (100%)
rename {krylov => linsolve}/psb_dcgstab.f90 (100%)
rename {krylov => linsolve}/psb_dcgstabl.f90 (100%)
rename {krylov => linsolve}/psb_dfcg.F90 (100%)
rename {krylov => linsolve}/psb_dgcr.f90 (100%)
rename {krylov => linsolve}/psb_dkrylov.f90 (100%)
rename {krylov => linsolve}/psb_drgmres.f90 (100%)
rename {krylov => linsolve}/psb_drichardson.f90 (100%)
rename {krylov => linsolve}/psb_krylov_mod.f90 (100%)
rename krylov/psb_krylov_conv_mod.f90 => linsolve/psb_linsolve_conv_mod.f90 (86%)
rename krylov/psb_s_krylov_conv_mod.f90 => linsolve/psb_s_linsolve_conv_mod.f90 (98%)
rename {krylov => linsolve}/psb_sbicg.f90 (100%)
rename {krylov => linsolve}/psb_scg.F90 (100%)
rename {krylov => linsolve}/psb_scgs.f90 (100%)
rename {krylov => linsolve}/psb_scgstab.f90 (100%)
rename {krylov => linsolve}/psb_scgstabl.f90 (100%)
rename {krylov => linsolve}/psb_sfcg.F90 (100%)
rename {krylov => linsolve}/psb_sgcr.f90 (100%)
rename {krylov => linsolve}/psb_skrylov.f90 (100%)
rename {krylov => linsolve}/psb_srgmres.f90 (100%)
rename {krylov => linsolve}/psb_srichardson.f90 (100%)
rename krylov/psb_z_krylov_conv_mod.f90 => linsolve/psb_z_linsolve_conv_mod.f90 (98%)
rename {krylov => linsolve}/psb_zbicg.f90 (100%)
rename {krylov => linsolve}/psb_zcg.F90 (100%)
rename {krylov => linsolve}/psb_zcgs.f90 (100%)
rename {krylov => linsolve}/psb_zcgstab.f90 (100%)
rename {krylov => linsolve}/psb_zcgstabl.f90 (100%)
rename {krylov => linsolve}/psb_zfcg.F90 (100%)
rename {krylov => linsolve}/psb_zgcr.f90 (100%)
rename {krylov => linsolve}/psb_zkrylov.f90 (100%)
rename {krylov => linsolve}/psb_zrgmres.f90 (100%)
rename {krylov => linsolve}/psb_zrichardson.f90 (100%)
diff --git a/Makefile b/Makefile
index 972fd3c6..18970b8f 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
include Make.inc
-all: dirs based precd kryld utild cbindd extd $(CUDAD) $(OACCD) libd
+all: dirs based precd linslvd utild cbindd extd $(CUDAD) $(OACCD) libd
@echo "====================================="
@echo "PSBLAS libraries Compilation Successful."
@@ -11,16 +11,16 @@ dirs:
precd: based
utild: based
-kryld: precd
+linslvd: precd
extd: based
cudad: extd
oaccd: extd
-cbindd: based precd kryld utild
+cbindd: based precd linslvd utild
-libd: based precd kryld utild cbindd extd $(CUDALD) $(OACCLD)
+libd: based precd linslvd utild cbindd extd $(CUDALD) $(OACCLD)
$(MAKE) -C base lib
$(MAKE) -C prec lib
- $(MAKE) -C krylov lib
+ $(MAKE) -C linsolve lib
$(MAKE) -C util lib
$(MAKE) -C cbind lib
$(MAKE) -C ext lib
@@ -34,8 +34,8 @@ based:
$(MAKE) -C base objs
precd:
$(MAKE) -C prec objs
-kryld:
- $(MAKE) -C krylov objs
+linslvd:
+ $(MAKE) -C linsolve objs
utild:
$(MAKE) -C util objs
cbindd:
@@ -67,7 +67,7 @@ install: all
clean:
$(MAKE) -C base clean
$(MAKE) -C prec clean
- $(MAKE) -C krylov clean
+ $(MAKE) -C linsolve clean
$(MAKE) -C util clean
$(MAKE) -C cbind clean
$(MAKE) -C ext clean
@@ -85,7 +85,7 @@ cleanlib:
veryclean: cleanlib
cd base && $(MAKE) veryclean
cd prec && $(MAKE) veryclean
- cd krylov && $(MAKE) veryclean
+ cd linsolve && $(MAKE) veryclean
cd util && $(MAKE) veryclean
cd cbind && $(MAKE) veryclean
cd ext && $(MAKE) veryclean
diff --git a/krylov/Makefile b/linsolve/Makefile
similarity index 73%
rename from krylov/Makefile
rename to linsolve/Makefile
index c08d6740..7ce12020 100644
--- a/krylov/Makefile
+++ b/linsolve/Makefile
@@ -6,9 +6,9 @@ LIBDIR=../lib
INCDIR=../include
MODDIR=../modules
-MODOBJS= psb_base_krylov_conv_mod.o \
- psb_s_krylov_conv_mod.o psb_c_krylov_conv_mod.o \
- psb_d_krylov_conv_mod.o psb_z_krylov_conv_mod.o \
+MODOBJS= psb_base_linsolve_conv_mod.o \
+ psb_s_linsolve_conv_mod.o psb_c_linsolve_conv_mod.o \
+ psb_d_linsolve_conv_mod.o psb_z_linsolve_conv_mod.o \
psb_krylov_mod.o
F90OBJS=psb_dkrylov.o psb_skrylov.o psb_ckrylov.o psb_zkrylov.o \
psb_drichardson.o psb_srichardson.o psb_crichardson.o psb_zrichardson.o \
@@ -36,8 +36,8 @@ lib: objs
/bin/cp -p $(CPUPDFLAG) $(HERE)/$(LIBNAME) $(LIBDIR)
-psb_s_krylov_conv_mod.o psb_c_krylov_conv_mod.o psb_d_krylov_conv_mod.o psb_z_krylov_conv_mod.o: psb_base_krylov_conv_mod.o
-psb_krylov_conv_mod.o: psb_s_krylov_conv_mod.o psb_c_krylov_conv_mod.o psb_d_krylov_conv_mod.o psb_z_krylov_conv_mod.o
+psb_s_linsolve_conv_mod.o psb_c_linsolve_conv_mod.o psb_d_linsolve_conv_mod.o psb_z_linsolve_conv_mod.o: psb_base_linsolve_conv_mod.o
+psb_linsolve_conv_mod.o: psb_s_linsolve_conv_mod.o psb_c_linsolve_conv_mod.o psb_d_linsolve_conv_mod.o psb_z_linsolve_conv_mod.o
$(F90OBJS): $(MODOBJS)
$(OBJS): $(MODDIR)/$(PRECMODNAME)$(.mod) $(MODDIR)/$(BASEMODNAME)$(.mod)
diff --git a/krylov/psb_base_krylov_conv_mod.f90 b/linsolve/psb_base_linsolve_conv_mod.f90
similarity index 97%
rename from krylov/psb_base_krylov_conv_mod.f90
rename to linsolve/psb_base_linsolve_conv_mod.f90
index be7723f5..8d36fb53 100644
--- a/krylov/psb_base_krylov_conv_mod.f90
+++ b/linsolve/psb_base_linsolve_conv_mod.f90
@@ -30,10 +30,10 @@
!
!
!
-! File: psb_krylov_mod.f90
-! Interfaces for Krylov subspace iterative methods.
+! File: psb_base_linsolve_mod.f90
+! Interfaces for linear solvers' convergence handling.
!
-Module psb_base_krylov_conv_mod
+Module psb_base_linsolve_conv_mod
use psb_const_mod
@@ -166,4 +166,4 @@ contains
end subroutine psb_d_end_conv
-end module psb_base_krylov_conv_mod
+end module psb_base_linsolve_conv_mod
diff --git a/krylov/psb_c_krylov_conv_mod.f90 b/linsolve/psb_c_linsolve_conv_mod.f90
similarity index 98%
rename from krylov/psb_c_krylov_conv_mod.f90
rename to linsolve/psb_c_linsolve_conv_mod.f90
index 85a2bca7..317dda30 100644
--- a/krylov/psb_c_krylov_conv_mod.f90
+++ b/linsolve/psb_c_linsolve_conv_mod.f90
@@ -30,12 +30,12 @@
!
!
!
-! File: psb_krylov_conv_mod.f90
+! File: psb_linsolve_conv_mod.f90
! Interfaces for Krylov subspace iterative methods.
!
-Module psb_c_krylov_conv_mod
+Module psb_c_linsolve_conv_mod
- use psb_base_krylov_conv_mod
+ use psb_base_linsolve_conv_mod
interface psb_init_conv
module procedure psb_c_init_conv, psb_c_init_conv_vect
@@ -355,4 +355,4 @@ contains
end function psb_c_check_conv_vect
-end module psb_c_krylov_conv_mod
+end module psb_c_linsolve_conv_mod
diff --git a/krylov/psb_cbicg.f90 b/linsolve/psb_cbicg.f90
similarity index 100%
rename from krylov/psb_cbicg.f90
rename to linsolve/psb_cbicg.f90
diff --git a/krylov/psb_ccg.F90 b/linsolve/psb_ccg.F90
similarity index 100%
rename from krylov/psb_ccg.F90
rename to linsolve/psb_ccg.F90
diff --git a/krylov/psb_ccgs.f90 b/linsolve/psb_ccgs.f90
similarity index 100%
rename from krylov/psb_ccgs.f90
rename to linsolve/psb_ccgs.f90
diff --git a/krylov/psb_ccgstab.f90 b/linsolve/psb_ccgstab.f90
similarity index 100%
rename from krylov/psb_ccgstab.f90
rename to linsolve/psb_ccgstab.f90
diff --git a/krylov/psb_ccgstabl.f90 b/linsolve/psb_ccgstabl.f90
similarity index 100%
rename from krylov/psb_ccgstabl.f90
rename to linsolve/psb_ccgstabl.f90
diff --git a/krylov/psb_cfcg.F90 b/linsolve/psb_cfcg.F90
similarity index 100%
rename from krylov/psb_cfcg.F90
rename to linsolve/psb_cfcg.F90
diff --git a/krylov/psb_cgcr.f90 b/linsolve/psb_cgcr.f90
similarity index 100%
rename from krylov/psb_cgcr.f90
rename to linsolve/psb_cgcr.f90
diff --git a/krylov/psb_ckrylov.f90 b/linsolve/psb_ckrylov.f90
similarity index 100%
rename from krylov/psb_ckrylov.f90
rename to linsolve/psb_ckrylov.f90
diff --git a/krylov/psb_crgmres.f90 b/linsolve/psb_crgmres.f90
similarity index 100%
rename from krylov/psb_crgmres.f90
rename to linsolve/psb_crgmres.f90
diff --git a/krylov/psb_crichardson.f90 b/linsolve/psb_crichardson.f90
similarity index 100%
rename from krylov/psb_crichardson.f90
rename to linsolve/psb_crichardson.f90
diff --git a/krylov/psb_d_krylov_conv_mod.f90 b/linsolve/psb_d_linsolve_conv_mod.f90
similarity index 98%
rename from krylov/psb_d_krylov_conv_mod.f90
rename to linsolve/psb_d_linsolve_conv_mod.f90
index 4f9b9f2e..7af5f834 100644
--- a/krylov/psb_d_krylov_conv_mod.f90
+++ b/linsolve/psb_d_linsolve_conv_mod.f90
@@ -30,12 +30,12 @@
!
!
!
-! File: psb_krylov_conv_mod.f90
+! File: psb_linsolve_conv_mod.f90
! Interfaces for Krylov subspace iterative methods.
!
-Module psb_d_krylov_conv_mod
+Module psb_d_linsolve_conv_mod
- use psb_base_krylov_conv_mod
+ use psb_base_linsolve_conv_mod
interface psb_init_conv
module procedure psb_d_init_conv, psb_d_init_conv_vect
@@ -355,4 +355,4 @@ contains
end function psb_d_check_conv_vect
-end module psb_d_krylov_conv_mod
+end module psb_d_linsolve_conv_mod
diff --git a/krylov/psb_dbicg.f90 b/linsolve/psb_dbicg.f90
similarity index 100%
rename from krylov/psb_dbicg.f90
rename to linsolve/psb_dbicg.f90
diff --git a/krylov/psb_dcg.F90 b/linsolve/psb_dcg.F90
similarity index 100%
rename from krylov/psb_dcg.F90
rename to linsolve/psb_dcg.F90
diff --git a/krylov/psb_dcgs.f90 b/linsolve/psb_dcgs.f90
similarity index 100%
rename from krylov/psb_dcgs.f90
rename to linsolve/psb_dcgs.f90
diff --git a/krylov/psb_dcgstab.f90 b/linsolve/psb_dcgstab.f90
similarity index 100%
rename from krylov/psb_dcgstab.f90
rename to linsolve/psb_dcgstab.f90
diff --git a/krylov/psb_dcgstabl.f90 b/linsolve/psb_dcgstabl.f90
similarity index 100%
rename from krylov/psb_dcgstabl.f90
rename to linsolve/psb_dcgstabl.f90
diff --git a/krylov/psb_dfcg.F90 b/linsolve/psb_dfcg.F90
similarity index 100%
rename from krylov/psb_dfcg.F90
rename to linsolve/psb_dfcg.F90
diff --git a/krylov/psb_dgcr.f90 b/linsolve/psb_dgcr.f90
similarity index 100%
rename from krylov/psb_dgcr.f90
rename to linsolve/psb_dgcr.f90
diff --git a/krylov/psb_dkrylov.f90 b/linsolve/psb_dkrylov.f90
similarity index 100%
rename from krylov/psb_dkrylov.f90
rename to linsolve/psb_dkrylov.f90
diff --git a/krylov/psb_drgmres.f90 b/linsolve/psb_drgmres.f90
similarity index 100%
rename from krylov/psb_drgmres.f90
rename to linsolve/psb_drgmres.f90
diff --git a/krylov/psb_drichardson.f90 b/linsolve/psb_drichardson.f90
similarity index 100%
rename from krylov/psb_drichardson.f90
rename to linsolve/psb_drichardson.f90
diff --git a/krylov/psb_krylov_mod.f90 b/linsolve/psb_krylov_mod.f90
similarity index 100%
rename from krylov/psb_krylov_mod.f90
rename to linsolve/psb_krylov_mod.f90
diff --git a/krylov/psb_krylov_conv_mod.f90 b/linsolve/psb_linsolve_conv_mod.f90
similarity index 86%
rename from krylov/psb_krylov_conv_mod.f90
rename to linsolve/psb_linsolve_conv_mod.f90
index 2ec83178..23d6c84c 100644
--- a/krylov/psb_krylov_conv_mod.f90
+++ b/linsolve/psb_linsolve_conv_mod.f90
@@ -30,12 +30,12 @@
!
!
!
-! File: psb_krylov_mod.f90
-! Interfaces for Krylov subspace iterative methods.
+! File: psb_linsolve_conv_mod.f90
+! Interfaces for linear solvers' convergence handling.
!
-module psb_krylov_conv_mod
- use psb_s_krylov_conv_mod
- use psb_d_krylov_conv_mod
- use psb_c_krylov_conv_mod
- use psb_z_krylov_conv_mod
-end module psb_krylov_conv_mod
+module psb_linsolve_conv_mod
+ use psb_s_linsolve_conv_mod
+ use psb_d_linsolve_conv_mod
+ use psb_c_linsolve_conv_mod
+ use psb_z_linsolve_conv_mod
+end module psb_linsolve_conv_mod
diff --git a/krylov/psb_s_krylov_conv_mod.f90 b/linsolve/psb_s_linsolve_conv_mod.f90
similarity index 98%
rename from krylov/psb_s_krylov_conv_mod.f90
rename to linsolve/psb_s_linsolve_conv_mod.f90
index 29713c37..d68f6a90 100644
--- a/krylov/psb_s_krylov_conv_mod.f90
+++ b/linsolve/psb_s_linsolve_conv_mod.f90
@@ -30,12 +30,12 @@
!
!
!
-! File: psb_krylov_conv_mod.f90
+! File: psb_linsolve_conv_mod.f90
! Interfaces for Krylov subspace iterative methods.
!
-Module psb_s_krylov_conv_mod
+Module psb_s_linsolve_conv_mod
- use psb_base_krylov_conv_mod
+ use psb_base_linsolve_conv_mod
interface psb_init_conv
module procedure psb_s_init_conv, psb_s_init_conv_vect
@@ -355,4 +355,4 @@ contains
end function psb_s_check_conv_vect
-end module psb_s_krylov_conv_mod
+end module psb_s_linsolve_conv_mod
diff --git a/krylov/psb_sbicg.f90 b/linsolve/psb_sbicg.f90
similarity index 100%
rename from krylov/psb_sbicg.f90
rename to linsolve/psb_sbicg.f90
diff --git a/krylov/psb_scg.F90 b/linsolve/psb_scg.F90
similarity index 100%
rename from krylov/psb_scg.F90
rename to linsolve/psb_scg.F90
diff --git a/krylov/psb_scgs.f90 b/linsolve/psb_scgs.f90
similarity index 100%
rename from krylov/psb_scgs.f90
rename to linsolve/psb_scgs.f90
diff --git a/krylov/psb_scgstab.f90 b/linsolve/psb_scgstab.f90
similarity index 100%
rename from krylov/psb_scgstab.f90
rename to linsolve/psb_scgstab.f90
diff --git a/krylov/psb_scgstabl.f90 b/linsolve/psb_scgstabl.f90
similarity index 100%
rename from krylov/psb_scgstabl.f90
rename to linsolve/psb_scgstabl.f90
diff --git a/krylov/psb_sfcg.F90 b/linsolve/psb_sfcg.F90
similarity index 100%
rename from krylov/psb_sfcg.F90
rename to linsolve/psb_sfcg.F90
diff --git a/krylov/psb_sgcr.f90 b/linsolve/psb_sgcr.f90
similarity index 100%
rename from krylov/psb_sgcr.f90
rename to linsolve/psb_sgcr.f90
diff --git a/krylov/psb_skrylov.f90 b/linsolve/psb_skrylov.f90
similarity index 100%
rename from krylov/psb_skrylov.f90
rename to linsolve/psb_skrylov.f90
diff --git a/krylov/psb_srgmres.f90 b/linsolve/psb_srgmres.f90
similarity index 100%
rename from krylov/psb_srgmres.f90
rename to linsolve/psb_srgmres.f90
diff --git a/krylov/psb_srichardson.f90 b/linsolve/psb_srichardson.f90
similarity index 100%
rename from krylov/psb_srichardson.f90
rename to linsolve/psb_srichardson.f90
diff --git a/krylov/psb_z_krylov_conv_mod.f90 b/linsolve/psb_z_linsolve_conv_mod.f90
similarity index 98%
rename from krylov/psb_z_krylov_conv_mod.f90
rename to linsolve/psb_z_linsolve_conv_mod.f90
index fc88ccf6..a842a5b0 100644
--- a/krylov/psb_z_krylov_conv_mod.f90
+++ b/linsolve/psb_z_linsolve_conv_mod.f90
@@ -30,12 +30,12 @@
!
!
!
-! File: psb_krylov_conv_mod.f90
-! Interfaces for Krylov subspace iterative methods.
+! File: psb_linsolve_conv_mod.f90
+! Interfaces for linear solvers.
!
-Module psb_z_krylov_conv_mod
+Module psb_z_linsolve_conv_mod
- use psb_base_krylov_conv_mod
+ use psb_base_linsolve_conv_mod
interface psb_init_conv
module procedure psb_z_init_conv, psb_z_init_conv_vect
@@ -355,4 +355,4 @@ contains
end function psb_z_check_conv_vect
-end module psb_z_krylov_conv_mod
+end module psb_z_linsolve_conv_mod
diff --git a/krylov/psb_zbicg.f90 b/linsolve/psb_zbicg.f90
similarity index 100%
rename from krylov/psb_zbicg.f90
rename to linsolve/psb_zbicg.f90
diff --git a/krylov/psb_zcg.F90 b/linsolve/psb_zcg.F90
similarity index 100%
rename from krylov/psb_zcg.F90
rename to linsolve/psb_zcg.F90
diff --git a/krylov/psb_zcgs.f90 b/linsolve/psb_zcgs.f90
similarity index 100%
rename from krylov/psb_zcgs.f90
rename to linsolve/psb_zcgs.f90
diff --git a/krylov/psb_zcgstab.f90 b/linsolve/psb_zcgstab.f90
similarity index 100%
rename from krylov/psb_zcgstab.f90
rename to linsolve/psb_zcgstab.f90
diff --git a/krylov/psb_zcgstabl.f90 b/linsolve/psb_zcgstabl.f90
similarity index 100%
rename from krylov/psb_zcgstabl.f90
rename to linsolve/psb_zcgstabl.f90
diff --git a/krylov/psb_zfcg.F90 b/linsolve/psb_zfcg.F90
similarity index 100%
rename from krylov/psb_zfcg.F90
rename to linsolve/psb_zfcg.F90
diff --git a/krylov/psb_zgcr.f90 b/linsolve/psb_zgcr.f90
similarity index 100%
rename from krylov/psb_zgcr.f90
rename to linsolve/psb_zgcr.f90
diff --git a/krylov/psb_zkrylov.f90 b/linsolve/psb_zkrylov.f90
similarity index 100%
rename from krylov/psb_zkrylov.f90
rename to linsolve/psb_zkrylov.f90
diff --git a/krylov/psb_zrgmres.f90 b/linsolve/psb_zrgmres.f90
similarity index 100%
rename from krylov/psb_zrgmres.f90
rename to linsolve/psb_zrgmres.f90
diff --git a/krylov/psb_zrichardson.f90 b/linsolve/psb_zrichardson.f90
similarity index 100%
rename from krylov/psb_zrichardson.f90
rename to linsolve/psb_zrichardson.f90
From ea8c526bf27341451e6f4180d149fa7a4f4b6318 Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Sun, 10 Nov 2024 10:12:31 +0100
Subject: [PATCH 60/86] Rename krylov into linsolve step 2.
---
linsolve/psb_c_linsolve_conv_mod.f90 | 2 +-
linsolve/psb_d_linsolve_conv_mod.f90 | 2 +-
linsolve/psb_s_linsolve_conv_mod.f90 | 2 +-
linsolve/psb_z_linsolve_conv_mod.f90 | 2 +-
4 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/linsolve/psb_c_linsolve_conv_mod.f90 b/linsolve/psb_c_linsolve_conv_mod.f90
index 317dda30..73db081e 100644
--- a/linsolve/psb_c_linsolve_conv_mod.f90
+++ b/linsolve/psb_c_linsolve_conv_mod.f90
@@ -31,7 +31,7 @@
!
!
! File: psb_linsolve_conv_mod.f90
-! Interfaces for Krylov subspace iterative methods.
+! Interfaces for linear solvers' convergence handling.
!
Module psb_c_linsolve_conv_mod
diff --git a/linsolve/psb_d_linsolve_conv_mod.f90 b/linsolve/psb_d_linsolve_conv_mod.f90
index 7af5f834..ba59923f 100644
--- a/linsolve/psb_d_linsolve_conv_mod.f90
+++ b/linsolve/psb_d_linsolve_conv_mod.f90
@@ -31,7 +31,7 @@
!
!
! File: psb_linsolve_conv_mod.f90
-! Interfaces for Krylov subspace iterative methods.
+! Interfaces for linear solvers' convergence handling.
!
Module psb_d_linsolve_conv_mod
diff --git a/linsolve/psb_s_linsolve_conv_mod.f90 b/linsolve/psb_s_linsolve_conv_mod.f90
index d68f6a90..5de6ebb6 100644
--- a/linsolve/psb_s_linsolve_conv_mod.f90
+++ b/linsolve/psb_s_linsolve_conv_mod.f90
@@ -31,7 +31,7 @@
!
!
! File: psb_linsolve_conv_mod.f90
-! Interfaces for Krylov subspace iterative methods.
+! Interfaces for linear solvers' convergence handling.
!
Module psb_s_linsolve_conv_mod
diff --git a/linsolve/psb_z_linsolve_conv_mod.f90 b/linsolve/psb_z_linsolve_conv_mod.f90
index a842a5b0..6c6276ff 100644
--- a/linsolve/psb_z_linsolve_conv_mod.f90
+++ b/linsolve/psb_z_linsolve_conv_mod.f90
@@ -31,7 +31,7 @@
!
!
! File: psb_linsolve_conv_mod.f90
-! Interfaces for linear solvers.
+! Interfaces for linear solvers' convergence handling.
!
Module psb_z_linsolve_conv_mod
From 14dce3eefd656fbed38f7e1bfa18dca2af7e6ec3 Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Sun, 10 Nov 2024 10:27:31 +0100
Subject: [PATCH 61/86] Krylov into linsolve, step 3.
---
linsolve/psb_cbicg.f90 | 2 +-
linsolve/psb_ccg.F90 | 2 +-
linsolve/psb_ccgs.f90 | 2 +-
linsolve/psb_ccgstab.f90 | 2 +-
linsolve/psb_ccgstabl.f90 | 2 +-
linsolve/psb_cfcg.F90 | 2 +-
linsolve/psb_cgcr.f90 | 2 +-
linsolve/psb_crgmres.f90 | 2 +-
linsolve/psb_crichardson.f90 | 2 +-
linsolve/psb_dbicg.f90 | 2 +-
linsolve/psb_dcg.F90 | 2 +-
linsolve/psb_dcgs.f90 | 2 +-
linsolve/psb_dcgstab.f90 | 2 +-
linsolve/psb_dcgstabl.f90 | 2 +-
linsolve/psb_dfcg.F90 | 2 +-
linsolve/psb_dgcr.f90 | 2 +-
linsolve/psb_drgmres.f90 | 2 +-
linsolve/psb_drichardson.f90 | 2 +-
linsolve/psb_sbicg.f90 | 2 +-
linsolve/psb_scg.F90 | 2 +-
linsolve/psb_scgs.f90 | 2 +-
linsolve/psb_scgstab.f90 | 2 +-
linsolve/psb_scgstabl.f90 | 2 +-
linsolve/psb_sfcg.F90 | 2 +-
linsolve/psb_sgcr.f90 | 2 +-
linsolve/psb_srgmres.f90 | 2 +-
linsolve/psb_srichardson.f90 | 2 +-
linsolve/psb_zbicg.f90 | 2 +-
linsolve/psb_zcg.F90 | 2 +-
linsolve/psb_zcgs.f90 | 2 +-
linsolve/psb_zcgstab.f90 | 2 +-
linsolve/psb_zcgstabl.f90 | 2 +-
linsolve/psb_zfcg.F90 | 2 +-
linsolve/psb_zgcr.f90 | 2 +-
linsolve/psb_zrgmres.f90 | 2 +-
linsolve/psb_zrichardson.f90 | 2 +-
36 files changed, 36 insertions(+), 36 deletions(-)
diff --git a/linsolve/psb_cbicg.f90 b/linsolve/psb_cbicg.f90
index c3b4f472..7a59fac5 100644
--- a/linsolve/psb_cbicg.f90
+++ b/linsolve/psb_cbicg.f90
@@ -98,7 +98,7 @@ subroutine psb_cbicg_vect(a,prec,b,x,eps,desc_a,info,&
& itmax,iter,err,itrace,istop)
use psb_base_mod
use psb_prec_mod
- use psb_c_krylov_conv_mod
+ use psb_c_linsolve_conv_mod
use psb_krylov_mod
implicit none
type(psb_cspmat_type), intent(in) :: a
diff --git a/linsolve/psb_ccg.F90 b/linsolve/psb_ccg.F90
index fbc550e7..45455edb 100644
--- a/linsolve/psb_ccg.F90
+++ b/linsolve/psb_ccg.F90
@@ -99,7 +99,7 @@ subroutine psb_ccg_vect(a,prec,b,x,eps,desc_a,info,&
& itmax,iter,err,itrace,istop,cond)
use psb_base_mod
use psb_prec_mod
- use psb_c_krylov_conv_mod
+ use psb_c_linsolve_conv_mod
use psb_krylov_mod
implicit none
type(psb_cspmat_type), intent(in) :: a
diff --git a/linsolve/psb_ccgs.f90 b/linsolve/psb_ccgs.f90
index c25a449a..c4cb9b8c 100644
--- a/linsolve/psb_ccgs.f90
+++ b/linsolve/psb_ccgs.f90
@@ -96,7 +96,7 @@ Subroutine psb_ccgs_vect(a,prec,b,x,eps,desc_a,info,&
& itmax,iter,err,itrace,istop)
use psb_base_mod
use psb_prec_mod
- use psb_c_krylov_conv_mod
+ use psb_c_linsolve_conv_mod
use psb_krylov_mod
implicit none
type(psb_cspmat_type), intent(in) :: a
diff --git a/linsolve/psb_ccgstab.f90 b/linsolve/psb_ccgstab.f90
index 22d73b85..2acfaadd 100644
--- a/linsolve/psb_ccgstab.f90
+++ b/linsolve/psb_ccgstab.f90
@@ -96,7 +96,7 @@
Subroutine psb_ccgstab_vect(a,prec,b,x,eps,desc_a,info,itmax,iter,err,itrace,istop)
use psb_base_mod
use psb_prec_mod
- use psb_c_krylov_conv_mod
+ use psb_c_linsolve_conv_mod
use psb_krylov_mod
implicit none
type(psb_cspmat_type), intent(in) :: a
diff --git a/linsolve/psb_ccgstabl.f90 b/linsolve/psb_ccgstabl.f90
index 86ca5d93..64fb2441 100644
--- a/linsolve/psb_ccgstabl.f90
+++ b/linsolve/psb_ccgstabl.f90
@@ -107,7 +107,7 @@ Subroutine psb_ccgstabl_vect(a,prec,b,x,eps,desc_a,info,&
& itmax,iter,err,itrace,irst,istop)
use psb_base_mod
use psb_prec_mod
- use psb_c_krylov_conv_mod
+ use psb_c_linsolve_conv_mod
use psb_krylov_mod
implicit none
type(psb_cspmat_type), intent(in) :: a
diff --git a/linsolve/psb_cfcg.F90 b/linsolve/psb_cfcg.F90
index 590ed29d..bdb5207a 100644
--- a/linsolve/psb_cfcg.F90
+++ b/linsolve/psb_cfcg.F90
@@ -107,7 +107,7 @@ subroutine psb_cfcg_vect(a,prec,b,x,eps,desc_a,info,&
& itmax,iter,err,itrace,istop,cond)
use psb_base_mod
use psb_prec_mod
- use psb_c_krylov_conv_mod
+ use psb_c_linsolve_conv_mod
use psb_krylov_mod
implicit none
type(psb_cspmat_type), intent(in) :: a
diff --git a/linsolve/psb_cgcr.f90 b/linsolve/psb_cgcr.f90
index 91c848a2..f2ee9037 100644
--- a/linsolve/psb_cgcr.f90
+++ b/linsolve/psb_cgcr.f90
@@ -109,7 +109,7 @@ subroutine psb_cgcr_vect(a,prec,b,x,eps,desc_a,info,&
& itmax,iter,err,itrace, irst, istop)
use psb_base_mod
use psb_prec_mod
- use psb_c_krylov_conv_mod
+ use psb_c_linsolve_conv_mod
use psb_krylov_mod
implicit none
diff --git a/linsolve/psb_crgmres.f90 b/linsolve/psb_crgmres.f90
index 80aa34c3..39b8ae18 100644
--- a/linsolve/psb_crgmres.f90
+++ b/linsolve/psb_crgmres.f90
@@ -110,7 +110,7 @@ subroutine psb_crgmres_vect(a,prec,b,x,eps,desc_a,info,&
& itmax,iter,err,itrace,irst,istop)
use psb_base_mod
use psb_prec_mod
- use psb_c_krylov_conv_mod
+ use psb_c_linsolve_conv_mod
use psb_krylov_mod
implicit none
type(psb_cspmat_type), intent(in) :: a
diff --git a/linsolve/psb_crichardson.f90 b/linsolve/psb_crichardson.f90
index 08678653..e47e18a7 100644
--- a/linsolve/psb_crichardson.f90
+++ b/linsolve/psb_crichardson.f90
@@ -73,7 +73,7 @@ Subroutine psb_crichardson_vect(a,prec,b,x,eps,desc_a,info,&
use psb_base_mod
use psb_prec_mod
- use psb_c_krylov_conv_mod
+ use psb_c_linsolve_conv_mod
use psb_krylov_mod, psb_protect_name => psb_crichardson_vect
Type(psb_cspmat_type), Intent(in) :: a
diff --git a/linsolve/psb_dbicg.f90 b/linsolve/psb_dbicg.f90
index 5ac94d3c..6b1c260b 100644
--- a/linsolve/psb_dbicg.f90
+++ b/linsolve/psb_dbicg.f90
@@ -98,7 +98,7 @@ subroutine psb_dbicg_vect(a,prec,b,x,eps,desc_a,info,&
& itmax,iter,err,itrace,istop)
use psb_base_mod
use psb_prec_mod
- use psb_d_krylov_conv_mod
+ use psb_d_linsolve_conv_mod
use psb_krylov_mod
implicit none
type(psb_dspmat_type), intent(in) :: a
diff --git a/linsolve/psb_dcg.F90 b/linsolve/psb_dcg.F90
index 669573be..edc23870 100644
--- a/linsolve/psb_dcg.F90
+++ b/linsolve/psb_dcg.F90
@@ -99,7 +99,7 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,&
& itmax,iter,err,itrace,istop,cond)
use psb_base_mod
use psb_prec_mod
- use psb_d_krylov_conv_mod
+ use psb_d_linsolve_conv_mod
use psb_krylov_mod
implicit none
type(psb_dspmat_type), intent(in) :: a
diff --git a/linsolve/psb_dcgs.f90 b/linsolve/psb_dcgs.f90
index 78a3905c..6dc9385a 100644
--- a/linsolve/psb_dcgs.f90
+++ b/linsolve/psb_dcgs.f90
@@ -96,7 +96,7 @@ Subroutine psb_dcgs_vect(a,prec,b,x,eps,desc_a,info,&
& itmax,iter,err,itrace,istop)
use psb_base_mod
use psb_prec_mod
- use psb_d_krylov_conv_mod
+ use psb_d_linsolve_conv_mod
use psb_krylov_mod
implicit none
type(psb_dspmat_type), intent(in) :: a
diff --git a/linsolve/psb_dcgstab.f90 b/linsolve/psb_dcgstab.f90
index bec3329a..91d66f87 100644
--- a/linsolve/psb_dcgstab.f90
+++ b/linsolve/psb_dcgstab.f90
@@ -96,7 +96,7 @@
Subroutine psb_dcgstab_vect(a,prec,b,x,eps,desc_a,info,itmax,iter,err,itrace,istop)
use psb_base_mod
use psb_prec_mod
- use psb_d_krylov_conv_mod
+ use psb_d_linsolve_conv_mod
use psb_krylov_mod
implicit none
type(psb_dspmat_type), intent(in) :: a
diff --git a/linsolve/psb_dcgstabl.f90 b/linsolve/psb_dcgstabl.f90
index 01641226..b51e2aa8 100644
--- a/linsolve/psb_dcgstabl.f90
+++ b/linsolve/psb_dcgstabl.f90
@@ -107,7 +107,7 @@ Subroutine psb_dcgstabl_vect(a,prec,b,x,eps,desc_a,info,&
& itmax,iter,err,itrace,irst,istop)
use psb_base_mod
use psb_prec_mod
- use psb_d_krylov_conv_mod
+ use psb_d_linsolve_conv_mod
use psb_krylov_mod
implicit none
type(psb_dspmat_type), intent(in) :: a
diff --git a/linsolve/psb_dfcg.F90 b/linsolve/psb_dfcg.F90
index d3b2c9d2..6eba5762 100644
--- a/linsolve/psb_dfcg.F90
+++ b/linsolve/psb_dfcg.F90
@@ -107,7 +107,7 @@ subroutine psb_dfcg_vect(a,prec,b,x,eps,desc_a,info,&
& itmax,iter,err,itrace,istop,cond)
use psb_base_mod
use psb_prec_mod
- use psb_d_krylov_conv_mod
+ use psb_d_linsolve_conv_mod
use psb_krylov_mod
implicit none
type(psb_dspmat_type), intent(in) :: a
diff --git a/linsolve/psb_dgcr.f90 b/linsolve/psb_dgcr.f90
index b7480f84..5566209f 100644
--- a/linsolve/psb_dgcr.f90
+++ b/linsolve/psb_dgcr.f90
@@ -109,7 +109,7 @@ subroutine psb_dgcr_vect(a,prec,b,x,eps,desc_a,info,&
& itmax,iter,err,itrace, irst, istop)
use psb_base_mod
use psb_prec_mod
- use psb_d_krylov_conv_mod
+ use psb_d_linsolve_conv_mod
use psb_krylov_mod
implicit none
diff --git a/linsolve/psb_drgmres.f90 b/linsolve/psb_drgmres.f90
index 1503748a..9a8690dd 100644
--- a/linsolve/psb_drgmres.f90
+++ b/linsolve/psb_drgmres.f90
@@ -110,7 +110,7 @@ subroutine psb_drgmres_vect(a,prec,b,x,eps,desc_a,info,&
& itmax,iter,err,itrace,irst,istop)
use psb_base_mod
use psb_prec_mod
- use psb_d_krylov_conv_mod
+ use psb_d_linsolve_conv_mod
use psb_krylov_mod
implicit none
type(psb_dspmat_type), intent(in) :: a
diff --git a/linsolve/psb_drichardson.f90 b/linsolve/psb_drichardson.f90
index 2c057320..f7c72454 100644
--- a/linsolve/psb_drichardson.f90
+++ b/linsolve/psb_drichardson.f90
@@ -73,7 +73,7 @@ Subroutine psb_drichardson_vect(a,prec,b,x,eps,desc_a,info,&
use psb_base_mod
use psb_prec_mod
- use psb_d_krylov_conv_mod
+ use psb_d_linsolve_conv_mod
use psb_krylov_mod, psb_protect_name => psb_drichardson_vect
Type(psb_dspmat_type), Intent(in) :: a
diff --git a/linsolve/psb_sbicg.f90 b/linsolve/psb_sbicg.f90
index 609d3a5f..f8074dec 100644
--- a/linsolve/psb_sbicg.f90
+++ b/linsolve/psb_sbicg.f90
@@ -98,7 +98,7 @@ subroutine psb_sbicg_vect(a,prec,b,x,eps,desc_a,info,&
& itmax,iter,err,itrace,istop)
use psb_base_mod
use psb_prec_mod
- use psb_s_krylov_conv_mod
+ use psb_s_linsolve_conv_mod
use psb_krylov_mod
implicit none
type(psb_sspmat_type), intent(in) :: a
diff --git a/linsolve/psb_scg.F90 b/linsolve/psb_scg.F90
index c16dbf6a..f6276348 100644
--- a/linsolve/psb_scg.F90
+++ b/linsolve/psb_scg.F90
@@ -99,7 +99,7 @@ subroutine psb_scg_vect(a,prec,b,x,eps,desc_a,info,&
& itmax,iter,err,itrace,istop,cond)
use psb_base_mod
use psb_prec_mod
- use psb_s_krylov_conv_mod
+ use psb_s_linsolve_conv_mod
use psb_krylov_mod
implicit none
type(psb_sspmat_type), intent(in) :: a
diff --git a/linsolve/psb_scgs.f90 b/linsolve/psb_scgs.f90
index 48fe5372..c28026d5 100644
--- a/linsolve/psb_scgs.f90
+++ b/linsolve/psb_scgs.f90
@@ -96,7 +96,7 @@ Subroutine psb_scgs_vect(a,prec,b,x,eps,desc_a,info,&
& itmax,iter,err,itrace,istop)
use psb_base_mod
use psb_prec_mod
- use psb_s_krylov_conv_mod
+ use psb_s_linsolve_conv_mod
use psb_krylov_mod
implicit none
type(psb_sspmat_type), intent(in) :: a
diff --git a/linsolve/psb_scgstab.f90 b/linsolve/psb_scgstab.f90
index 2a811b8d..89f5244b 100644
--- a/linsolve/psb_scgstab.f90
+++ b/linsolve/psb_scgstab.f90
@@ -96,7 +96,7 @@
Subroutine psb_scgstab_vect(a,prec,b,x,eps,desc_a,info,itmax,iter,err,itrace,istop)
use psb_base_mod
use psb_prec_mod
- use psb_s_krylov_conv_mod
+ use psb_s_linsolve_conv_mod
use psb_krylov_mod
implicit none
type(psb_sspmat_type), intent(in) :: a
diff --git a/linsolve/psb_scgstabl.f90 b/linsolve/psb_scgstabl.f90
index c2fc9833..0dca7720 100644
--- a/linsolve/psb_scgstabl.f90
+++ b/linsolve/psb_scgstabl.f90
@@ -107,7 +107,7 @@ Subroutine psb_scgstabl_vect(a,prec,b,x,eps,desc_a,info,&
& itmax,iter,err,itrace,irst,istop)
use psb_base_mod
use psb_prec_mod
- use psb_s_krylov_conv_mod
+ use psb_s_linsolve_conv_mod
use psb_krylov_mod
implicit none
type(psb_sspmat_type), intent(in) :: a
diff --git a/linsolve/psb_sfcg.F90 b/linsolve/psb_sfcg.F90
index 3a518bb2..c39faf5e 100644
--- a/linsolve/psb_sfcg.F90
+++ b/linsolve/psb_sfcg.F90
@@ -107,7 +107,7 @@ subroutine psb_sfcg_vect(a,prec,b,x,eps,desc_a,info,&
& itmax,iter,err,itrace,istop,cond)
use psb_base_mod
use psb_prec_mod
- use psb_s_krylov_conv_mod
+ use psb_s_linsolve_conv_mod
use psb_krylov_mod
implicit none
type(psb_sspmat_type), intent(in) :: a
diff --git a/linsolve/psb_sgcr.f90 b/linsolve/psb_sgcr.f90
index dd0aca16..c0d4f1cd 100644
--- a/linsolve/psb_sgcr.f90
+++ b/linsolve/psb_sgcr.f90
@@ -109,7 +109,7 @@ subroutine psb_sgcr_vect(a,prec,b,x,eps,desc_a,info,&
& itmax,iter,err,itrace, irst, istop)
use psb_base_mod
use psb_prec_mod
- use psb_s_krylov_conv_mod
+ use psb_s_linsolve_conv_mod
use psb_krylov_mod
implicit none
diff --git a/linsolve/psb_srgmres.f90 b/linsolve/psb_srgmres.f90
index 02836dd7..757e7eb7 100644
--- a/linsolve/psb_srgmres.f90
+++ b/linsolve/psb_srgmres.f90
@@ -110,7 +110,7 @@ subroutine psb_srgmres_vect(a,prec,b,x,eps,desc_a,info,&
& itmax,iter,err,itrace,irst,istop)
use psb_base_mod
use psb_prec_mod
- use psb_s_krylov_conv_mod
+ use psb_s_linsolve_conv_mod
use psb_krylov_mod
implicit none
type(psb_sspmat_type), intent(in) :: a
diff --git a/linsolve/psb_srichardson.f90 b/linsolve/psb_srichardson.f90
index a06f6cb4..d3a47b97 100644
--- a/linsolve/psb_srichardson.f90
+++ b/linsolve/psb_srichardson.f90
@@ -73,7 +73,7 @@ Subroutine psb_srichardson_vect(a,prec,b,x,eps,desc_a,info,&
use psb_base_mod
use psb_prec_mod
- use psb_s_krylov_conv_mod
+ use psb_s_linsolve_conv_mod
use psb_krylov_mod, psb_protect_name => psb_srichardson_vect
Type(psb_sspmat_type), Intent(in) :: a
diff --git a/linsolve/psb_zbicg.f90 b/linsolve/psb_zbicg.f90
index c22e499a..4c4f4c32 100644
--- a/linsolve/psb_zbicg.f90
+++ b/linsolve/psb_zbicg.f90
@@ -98,7 +98,7 @@ subroutine psb_zbicg_vect(a,prec,b,x,eps,desc_a,info,&
& itmax,iter,err,itrace,istop)
use psb_base_mod
use psb_prec_mod
- use psb_z_krylov_conv_mod
+ use psb_z_linsolve_conv_mod
use psb_krylov_mod
implicit none
type(psb_zspmat_type), intent(in) :: a
diff --git a/linsolve/psb_zcg.F90 b/linsolve/psb_zcg.F90
index a4a521d8..5d6adb4d 100644
--- a/linsolve/psb_zcg.F90
+++ b/linsolve/psb_zcg.F90
@@ -99,7 +99,7 @@ subroutine psb_zcg_vect(a,prec,b,x,eps,desc_a,info,&
& itmax,iter,err,itrace,istop,cond)
use psb_base_mod
use psb_prec_mod
- use psb_z_krylov_conv_mod
+ use psb_z_linsolve_conv_mod
use psb_krylov_mod
implicit none
type(psb_zspmat_type), intent(in) :: a
diff --git a/linsolve/psb_zcgs.f90 b/linsolve/psb_zcgs.f90
index 3ccce860..fe307bc6 100644
--- a/linsolve/psb_zcgs.f90
+++ b/linsolve/psb_zcgs.f90
@@ -96,7 +96,7 @@ Subroutine psb_zcgs_vect(a,prec,b,x,eps,desc_a,info,&
& itmax,iter,err,itrace,istop)
use psb_base_mod
use psb_prec_mod
- use psb_z_krylov_conv_mod
+ use psb_z_linsolve_conv_mod
use psb_krylov_mod
implicit none
type(psb_zspmat_type), intent(in) :: a
diff --git a/linsolve/psb_zcgstab.f90 b/linsolve/psb_zcgstab.f90
index 95ff129a..6cc72a28 100644
--- a/linsolve/psb_zcgstab.f90
+++ b/linsolve/psb_zcgstab.f90
@@ -96,7 +96,7 @@
Subroutine psb_zcgstab_vect(a,prec,b,x,eps,desc_a,info,itmax,iter,err,itrace,istop)
use psb_base_mod
use psb_prec_mod
- use psb_z_krylov_conv_mod
+ use psb_z_linsolve_conv_mod
use psb_krylov_mod
implicit none
type(psb_zspmat_type), intent(in) :: a
diff --git a/linsolve/psb_zcgstabl.f90 b/linsolve/psb_zcgstabl.f90
index 2cf3a0e5..0898953a 100644
--- a/linsolve/psb_zcgstabl.f90
+++ b/linsolve/psb_zcgstabl.f90
@@ -107,7 +107,7 @@ Subroutine psb_zcgstabl_vect(a,prec,b,x,eps,desc_a,info,&
& itmax,iter,err,itrace,irst,istop)
use psb_base_mod
use psb_prec_mod
- use psb_z_krylov_conv_mod
+ use psb_z_linsolve_conv_mod
use psb_krylov_mod
implicit none
type(psb_zspmat_type), intent(in) :: a
diff --git a/linsolve/psb_zfcg.F90 b/linsolve/psb_zfcg.F90
index 3c26ad3d..e5d6f23c 100644
--- a/linsolve/psb_zfcg.F90
+++ b/linsolve/psb_zfcg.F90
@@ -107,7 +107,7 @@ subroutine psb_zfcg_vect(a,prec,b,x,eps,desc_a,info,&
& itmax,iter,err,itrace,istop,cond)
use psb_base_mod
use psb_prec_mod
- use psb_z_krylov_conv_mod
+ use psb_z_linsolve_conv_mod
use psb_krylov_mod
implicit none
type(psb_zspmat_type), intent(in) :: a
diff --git a/linsolve/psb_zgcr.f90 b/linsolve/psb_zgcr.f90
index 2399160c..30eb7db2 100644
--- a/linsolve/psb_zgcr.f90
+++ b/linsolve/psb_zgcr.f90
@@ -109,7 +109,7 @@ subroutine psb_zgcr_vect(a,prec,b,x,eps,desc_a,info,&
& itmax,iter,err,itrace, irst, istop)
use psb_base_mod
use psb_prec_mod
- use psb_z_krylov_conv_mod
+ use psb_z_linsolve_conv_mod
use psb_krylov_mod
implicit none
diff --git a/linsolve/psb_zrgmres.f90 b/linsolve/psb_zrgmres.f90
index 3aaf0032..2ee8e4dc 100644
--- a/linsolve/psb_zrgmres.f90
+++ b/linsolve/psb_zrgmres.f90
@@ -110,7 +110,7 @@ subroutine psb_zrgmres_vect(a,prec,b,x,eps,desc_a,info,&
& itmax,iter,err,itrace,irst,istop)
use psb_base_mod
use psb_prec_mod
- use psb_z_krylov_conv_mod
+ use psb_z_linsolve_conv_mod
use psb_krylov_mod
implicit none
type(psb_zspmat_type), intent(in) :: a
diff --git a/linsolve/psb_zrichardson.f90 b/linsolve/psb_zrichardson.f90
index 19f2f10e..b4c0af0f 100644
--- a/linsolve/psb_zrichardson.f90
+++ b/linsolve/psb_zrichardson.f90
@@ -73,7 +73,7 @@ Subroutine psb_zrichardson_vect(a,prec,b,x,eps,desc_a,info,&
use psb_base_mod
use psb_prec_mod
- use psb_z_krylov_conv_mod
+ use psb_z_linsolve_conv_mod
use psb_krylov_mod, psb_protect_name => psb_zrichardson_vect
Type(psb_zspmat_type), Intent(in) :: a
From 98d5db73776e01b4526bf25e334cb6211a72e484 Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Sun, 10 Nov 2024 10:47:05 +0100
Subject: [PATCH 62/86] Krylov into linsolve, step 4
---
cbind/krylov/psb_ckrylov_cbind_mod.f90 | 4 ++--
cbind/krylov/psb_dkrylov_cbind_mod.f90 | 4 ++--
cbind/krylov/psb_skrylov_cbind_mod.f90 | 4 ++--
cbind/krylov/psb_zkrylov_cbind_mod.f90 | 4 ++--
linsolve/Makefile | 2 +-
linsolve/psb_cbicg.f90 | 2 +-
linsolve/psb_ccg.F90 | 2 +-
linsolve/psb_ccgs.f90 | 2 +-
linsolve/psb_ccgstab.f90 | 2 +-
linsolve/psb_ccgstabl.f90 | 2 +-
linsolve/psb_cfcg.F90 | 2 +-
linsolve/psb_cgcr.f90 | 2 +-
linsolve/psb_ckrylov.f90 | 2 +-
linsolve/psb_crgmres.f90 | 2 +-
linsolve/psb_crichardson.f90 | 2 +-
linsolve/psb_dbicg.f90 | 2 +-
linsolve/psb_dcg.F90 | 2 +-
linsolve/psb_dcgs.f90 | 2 +-
linsolve/psb_dcgstab.f90 | 2 +-
linsolve/psb_dcgstabl.f90 | 2 +-
linsolve/psb_dfcg.F90 | 2 +-
linsolve/psb_dgcr.f90 | 2 +-
linsolve/psb_dkrylov.f90 | 2 +-
linsolve/psb_drgmres.f90 | 2 +-
linsolve/psb_drichardson.f90 | 2 +-
linsolve/{psb_krylov_mod.f90 => psb_linsolve_mod.f90} | 8 ++++----
linsolve/psb_sbicg.f90 | 2 +-
linsolve/psb_scg.F90 | 2 +-
linsolve/psb_scgs.f90 | 2 +-
linsolve/psb_scgstab.f90 | 2 +-
linsolve/psb_scgstabl.f90 | 2 +-
linsolve/psb_sfcg.F90 | 2 +-
linsolve/psb_sgcr.f90 | 2 +-
linsolve/psb_skrylov.f90 | 2 +-
linsolve/psb_srgmres.f90 | 2 +-
linsolve/psb_srichardson.f90 | 2 +-
linsolve/psb_zbicg.f90 | 2 +-
linsolve/psb_zcg.F90 | 2 +-
linsolve/psb_zcgs.f90 | 2 +-
linsolve/psb_zcgstab.f90 | 2 +-
linsolve/psb_zcgstabl.f90 | 2 +-
linsolve/psb_zfcg.F90 | 2 +-
linsolve/psb_zgcr.f90 | 2 +-
linsolve/psb_zkrylov.f90 | 2 +-
linsolve/psb_zrgmres.f90 | 2 +-
linsolve/psb_zrichardson.f90 | 2 +-
test/fileread/psb_cf_sample.f90 | 2 +-
test/fileread/psb_df_sample.f90 | 2 +-
test/fileread/psb_sf_sample.f90 | 2 +-
test/fileread/psb_zf_sample.f90 | 2 +-
test/pargen/psb_d_pde2d.F90 | 2 +-
test/pargen/psb_d_pde3d.F90 | 2 +-
test/pargen/psb_s_pde2d.F90 | 2 +-
test/pargen/psb_s_pde3d.F90 | 2 +-
54 files changed, 61 insertions(+), 61 deletions(-)
rename linsolve/{psb_krylov_mod.f90 => psb_linsolve_mod.f90} (98%)
diff --git a/cbind/krylov/psb_ckrylov_cbind_mod.f90 b/cbind/krylov/psb_ckrylov_cbind_mod.f90
index 56cd51ab..758ecd02 100644
--- a/cbind/krylov/psb_ckrylov_cbind_mod.f90
+++ b/cbind/krylov/psb_ckrylov_cbind_mod.f90
@@ -8,7 +8,7 @@ contains
& ah,ph,bh,xh,cdh,options) bind(c) result(res)
use psb_base_mod
use psb_prec_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
use psb_objhandle_mod
use psb_prec_cbind_mod
use psb_base_string_cbind_mod
@@ -33,7 +33,7 @@ contains
& ah,ph,bh,xh,eps,cdh,itmax,iter,err,itrace,irst,istop) bind(c) result(res)
use psb_base_mod
use psb_prec_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
use psb_objhandle_mod
use psb_prec_cbind_mod
use psb_base_string_cbind_mod
diff --git a/cbind/krylov/psb_dkrylov_cbind_mod.f90 b/cbind/krylov/psb_dkrylov_cbind_mod.f90
index 43b3ca8c..b1119067 100644
--- a/cbind/krylov/psb_dkrylov_cbind_mod.f90
+++ b/cbind/krylov/psb_dkrylov_cbind_mod.f90
@@ -8,7 +8,7 @@ contains
& ah,ph,bh,xh,cdh,options) bind(c) result(res)
use psb_base_mod
use psb_prec_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
use psb_objhandle_mod
use psb_prec_cbind_mod
use psb_base_string_cbind_mod
@@ -33,7 +33,7 @@ contains
& ah,ph,bh,xh,eps,cdh,itmax,iter,err,itrace,irst,istop) bind(c) result(res)
use psb_base_mod
use psb_prec_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
use psb_objhandle_mod
use psb_prec_cbind_mod
use psb_base_string_cbind_mod
diff --git a/cbind/krylov/psb_skrylov_cbind_mod.f90 b/cbind/krylov/psb_skrylov_cbind_mod.f90
index 60d41d14..41bb9506 100644
--- a/cbind/krylov/psb_skrylov_cbind_mod.f90
+++ b/cbind/krylov/psb_skrylov_cbind_mod.f90
@@ -8,7 +8,7 @@ contains
& ah,ph,bh,xh,cdh,options) bind(c) result(res)
use psb_base_mod
use psb_prec_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
use psb_objhandle_mod
use psb_prec_cbind_mod
use psb_base_string_cbind_mod
@@ -33,7 +33,7 @@ contains
& ah,ph,bh,xh,eps,cdh,itmax,iter,err,itrace,irst,istop) bind(c) result(res)
use psb_base_mod
use psb_prec_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
use psb_objhandle_mod
use psb_prec_cbind_mod
use psb_base_string_cbind_mod
diff --git a/cbind/krylov/psb_zkrylov_cbind_mod.f90 b/cbind/krylov/psb_zkrylov_cbind_mod.f90
index 22e74386..37f24be7 100644
--- a/cbind/krylov/psb_zkrylov_cbind_mod.f90
+++ b/cbind/krylov/psb_zkrylov_cbind_mod.f90
@@ -8,7 +8,7 @@ contains
& ah,ph,bh,xh,cdh,options) bind(c) result(res)
use psb_base_mod
use psb_prec_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
use psb_objhandle_mod
use psb_prec_cbind_mod
use psb_base_string_cbind_mod
@@ -33,7 +33,7 @@ contains
& ah,ph,bh,xh,eps,cdh,itmax,iter,err,itrace,irst,istop) bind(c) result(res)
use psb_base_mod
use psb_prec_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
use psb_objhandle_mod
use psb_prec_cbind_mod
use psb_base_string_cbind_mod
diff --git a/linsolve/Makefile b/linsolve/Makefile
index 7ce12020..5c6445f5 100644
--- a/linsolve/Makefile
+++ b/linsolve/Makefile
@@ -9,7 +9,7 @@ MODDIR=../modules
MODOBJS= psb_base_linsolve_conv_mod.o \
psb_s_linsolve_conv_mod.o psb_c_linsolve_conv_mod.o \
psb_d_linsolve_conv_mod.o psb_z_linsolve_conv_mod.o \
- psb_krylov_mod.o
+ psb_linsolve_mod.o
F90OBJS=psb_dkrylov.o psb_skrylov.o psb_ckrylov.o psb_zkrylov.o \
psb_drichardson.o psb_srichardson.o psb_crichardson.o psb_zrichardson.o \
psb_dcgstab.o psb_dcg.o psb_dfcg.o psb_dgcr.o psb_dcgs.o \
diff --git a/linsolve/psb_cbicg.f90 b/linsolve/psb_cbicg.f90
index 7a59fac5..c357054b 100644
--- a/linsolve/psb_cbicg.f90
+++ b/linsolve/psb_cbicg.f90
@@ -99,7 +99,7 @@ subroutine psb_cbicg_vect(a,prec,b,x,eps,desc_a,info,&
use psb_base_mod
use psb_prec_mod
use psb_c_linsolve_conv_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
implicit none
type(psb_cspmat_type), intent(in) :: a
type(psb_desc_type), intent(in) :: desc_a
diff --git a/linsolve/psb_ccg.F90 b/linsolve/psb_ccg.F90
index 45455edb..0ba1b5fc 100644
--- a/linsolve/psb_ccg.F90
+++ b/linsolve/psb_ccg.F90
@@ -100,7 +100,7 @@ subroutine psb_ccg_vect(a,prec,b,x,eps,desc_a,info,&
use psb_base_mod
use psb_prec_mod
use psb_c_linsolve_conv_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
implicit none
type(psb_cspmat_type), intent(in) :: a
Type(psb_desc_type), Intent(in) :: desc_a
diff --git a/linsolve/psb_ccgs.f90 b/linsolve/psb_ccgs.f90
index c4cb9b8c..f7808c0d 100644
--- a/linsolve/psb_ccgs.f90
+++ b/linsolve/psb_ccgs.f90
@@ -97,7 +97,7 @@ Subroutine psb_ccgs_vect(a,prec,b,x,eps,desc_a,info,&
use psb_base_mod
use psb_prec_mod
use psb_c_linsolve_conv_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
implicit none
type(psb_cspmat_type), intent(in) :: a
Type(psb_desc_type), Intent(in) :: desc_a
diff --git a/linsolve/psb_ccgstab.f90 b/linsolve/psb_ccgstab.f90
index 2acfaadd..4ccbaf7a 100644
--- a/linsolve/psb_ccgstab.f90
+++ b/linsolve/psb_ccgstab.f90
@@ -97,7 +97,7 @@ Subroutine psb_ccgstab_vect(a,prec,b,x,eps,desc_a,info,itmax,iter,err,itrace,ist
use psb_base_mod
use psb_prec_mod
use psb_c_linsolve_conv_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
implicit none
type(psb_cspmat_type), intent(in) :: a
class(psb_cprec_type), Intent(inout) :: prec
diff --git a/linsolve/psb_ccgstabl.f90 b/linsolve/psb_ccgstabl.f90
index 64fb2441..f1463b83 100644
--- a/linsolve/psb_ccgstabl.f90
+++ b/linsolve/psb_ccgstabl.f90
@@ -108,7 +108,7 @@ Subroutine psb_ccgstabl_vect(a,prec,b,x,eps,desc_a,info,&
use psb_base_mod
use psb_prec_mod
use psb_c_linsolve_conv_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
implicit none
type(psb_cspmat_type), intent(in) :: a
class(psb_cprec_type), Intent(inout) :: prec
diff --git a/linsolve/psb_cfcg.F90 b/linsolve/psb_cfcg.F90
index bdb5207a..f28db7de 100644
--- a/linsolve/psb_cfcg.F90
+++ b/linsolve/psb_cfcg.F90
@@ -108,7 +108,7 @@ subroutine psb_cfcg_vect(a,prec,b,x,eps,desc_a,info,&
use psb_base_mod
use psb_prec_mod
use psb_c_linsolve_conv_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
implicit none
type(psb_cspmat_type), intent(in) :: a
Type(psb_desc_type), Intent(in) :: desc_a
diff --git a/linsolve/psb_cgcr.f90 b/linsolve/psb_cgcr.f90
index f2ee9037..5120b102 100644
--- a/linsolve/psb_cgcr.f90
+++ b/linsolve/psb_cgcr.f90
@@ -110,7 +110,7 @@ subroutine psb_cgcr_vect(a,prec,b,x,eps,desc_a,info,&
use psb_base_mod
use psb_prec_mod
use psb_c_linsolve_conv_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
implicit none
diff --git a/linsolve/psb_ckrylov.f90 b/linsolve/psb_ckrylov.f90
index 3afa525e..39f310fc 100644
--- a/linsolve/psb_ckrylov.f90
+++ b/linsolve/psb_ckrylov.f90
@@ -84,7 +84,7 @@ Subroutine psb_ckrylov_vect(method,a,prec,b,x,eps,desc_a,info,&
use psb_base_mod
use psb_prec_mod,only : psb_cprec_type
- use psb_krylov_mod, psb_protect_name => psb_ckrylov_vect
+ use psb_linsolve_mod, psb_protect_name => psb_ckrylov_vect
character(len=*) :: method
Type(psb_cspmat_type), Intent(in) :: a
diff --git a/linsolve/psb_crgmres.f90 b/linsolve/psb_crgmres.f90
index 39b8ae18..e7799f70 100644
--- a/linsolve/psb_crgmres.f90
+++ b/linsolve/psb_crgmres.f90
@@ -111,7 +111,7 @@ subroutine psb_crgmres_vect(a,prec,b,x,eps,desc_a,info,&
use psb_base_mod
use psb_prec_mod
use psb_c_linsolve_conv_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
implicit none
type(psb_cspmat_type), intent(in) :: a
Type(psb_desc_type), Intent(in) :: desc_a
diff --git a/linsolve/psb_crichardson.f90 b/linsolve/psb_crichardson.f90
index e47e18a7..3eb4c263 100644
--- a/linsolve/psb_crichardson.f90
+++ b/linsolve/psb_crichardson.f90
@@ -74,7 +74,7 @@ Subroutine psb_crichardson_vect(a,prec,b,x,eps,desc_a,info,&
use psb_base_mod
use psb_prec_mod
use psb_c_linsolve_conv_mod
- use psb_krylov_mod, psb_protect_name => psb_crichardson_vect
+ use psb_linsolve_mod, psb_protect_name => psb_crichardson_vect
Type(psb_cspmat_type), Intent(in) :: a
Type(psb_desc_type), Intent(in) :: desc_a
diff --git a/linsolve/psb_dbicg.f90 b/linsolve/psb_dbicg.f90
index 6b1c260b..ebcc8eb9 100644
--- a/linsolve/psb_dbicg.f90
+++ b/linsolve/psb_dbicg.f90
@@ -99,7 +99,7 @@ subroutine psb_dbicg_vect(a,prec,b,x,eps,desc_a,info,&
use psb_base_mod
use psb_prec_mod
use psb_d_linsolve_conv_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
implicit none
type(psb_dspmat_type), intent(in) :: a
type(psb_desc_type), intent(in) :: desc_a
diff --git a/linsolve/psb_dcg.F90 b/linsolve/psb_dcg.F90
index edc23870..14c978bb 100644
--- a/linsolve/psb_dcg.F90
+++ b/linsolve/psb_dcg.F90
@@ -100,7 +100,7 @@ subroutine psb_dcg_vect(a,prec,b,x,eps,desc_a,info,&
use psb_base_mod
use psb_prec_mod
use psb_d_linsolve_conv_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
implicit none
type(psb_dspmat_type), intent(in) :: a
Type(psb_desc_type), Intent(in) :: desc_a
diff --git a/linsolve/psb_dcgs.f90 b/linsolve/psb_dcgs.f90
index 6dc9385a..5c2401d1 100644
--- a/linsolve/psb_dcgs.f90
+++ b/linsolve/psb_dcgs.f90
@@ -97,7 +97,7 @@ Subroutine psb_dcgs_vect(a,prec,b,x,eps,desc_a,info,&
use psb_base_mod
use psb_prec_mod
use psb_d_linsolve_conv_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
implicit none
type(psb_dspmat_type), intent(in) :: a
Type(psb_desc_type), Intent(in) :: desc_a
diff --git a/linsolve/psb_dcgstab.f90 b/linsolve/psb_dcgstab.f90
index 91d66f87..749015d7 100644
--- a/linsolve/psb_dcgstab.f90
+++ b/linsolve/psb_dcgstab.f90
@@ -97,7 +97,7 @@ Subroutine psb_dcgstab_vect(a,prec,b,x,eps,desc_a,info,itmax,iter,err,itrace,ist
use psb_base_mod
use psb_prec_mod
use psb_d_linsolve_conv_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
implicit none
type(psb_dspmat_type), intent(in) :: a
class(psb_dprec_type), Intent(inout) :: prec
diff --git a/linsolve/psb_dcgstabl.f90 b/linsolve/psb_dcgstabl.f90
index b51e2aa8..a2ae6164 100644
--- a/linsolve/psb_dcgstabl.f90
+++ b/linsolve/psb_dcgstabl.f90
@@ -108,7 +108,7 @@ Subroutine psb_dcgstabl_vect(a,prec,b,x,eps,desc_a,info,&
use psb_base_mod
use psb_prec_mod
use psb_d_linsolve_conv_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
implicit none
type(psb_dspmat_type), intent(in) :: a
class(psb_dprec_type), Intent(inout) :: prec
diff --git a/linsolve/psb_dfcg.F90 b/linsolve/psb_dfcg.F90
index 6eba5762..cb741795 100644
--- a/linsolve/psb_dfcg.F90
+++ b/linsolve/psb_dfcg.F90
@@ -108,7 +108,7 @@ subroutine psb_dfcg_vect(a,prec,b,x,eps,desc_a,info,&
use psb_base_mod
use psb_prec_mod
use psb_d_linsolve_conv_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
implicit none
type(psb_dspmat_type), intent(in) :: a
Type(psb_desc_type), Intent(in) :: desc_a
diff --git a/linsolve/psb_dgcr.f90 b/linsolve/psb_dgcr.f90
index 5566209f..cf2e2b0e 100644
--- a/linsolve/psb_dgcr.f90
+++ b/linsolve/psb_dgcr.f90
@@ -110,7 +110,7 @@ subroutine psb_dgcr_vect(a,prec,b,x,eps,desc_a,info,&
use psb_base_mod
use psb_prec_mod
use psb_d_linsolve_conv_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
implicit none
diff --git a/linsolve/psb_dkrylov.f90 b/linsolve/psb_dkrylov.f90
index 2bc24d6a..d858c228 100644
--- a/linsolve/psb_dkrylov.f90
+++ b/linsolve/psb_dkrylov.f90
@@ -84,7 +84,7 @@ Subroutine psb_dkrylov_vect(method,a,prec,b,x,eps,desc_a,info,&
use psb_base_mod
use psb_prec_mod,only : psb_dprec_type
- use psb_krylov_mod, psb_protect_name => psb_dkrylov_vect
+ use psb_linsolve_mod, psb_protect_name => psb_dkrylov_vect
character(len=*) :: method
Type(psb_dspmat_type), Intent(in) :: a
diff --git a/linsolve/psb_drgmres.f90 b/linsolve/psb_drgmres.f90
index 9a8690dd..373c7a70 100644
--- a/linsolve/psb_drgmres.f90
+++ b/linsolve/psb_drgmres.f90
@@ -111,7 +111,7 @@ subroutine psb_drgmres_vect(a,prec,b,x,eps,desc_a,info,&
use psb_base_mod
use psb_prec_mod
use psb_d_linsolve_conv_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
implicit none
type(psb_dspmat_type), intent(in) :: a
Type(psb_desc_type), Intent(in) :: desc_a
diff --git a/linsolve/psb_drichardson.f90 b/linsolve/psb_drichardson.f90
index f7c72454..b0a53a4b 100644
--- a/linsolve/psb_drichardson.f90
+++ b/linsolve/psb_drichardson.f90
@@ -74,7 +74,7 @@ Subroutine psb_drichardson_vect(a,prec,b,x,eps,desc_a,info,&
use psb_base_mod
use psb_prec_mod
use psb_d_linsolve_conv_mod
- use psb_krylov_mod, psb_protect_name => psb_drichardson_vect
+ use psb_linsolve_mod, psb_protect_name => psb_drichardson_vect
Type(psb_dspmat_type), Intent(in) :: a
Type(psb_desc_type), Intent(in) :: desc_a
diff --git a/linsolve/psb_krylov_mod.f90 b/linsolve/psb_linsolve_mod.f90
similarity index 98%
rename from linsolve/psb_krylov_mod.f90
rename to linsolve/psb_linsolve_mod.f90
index e9a94e18..c54326f3 100644
--- a/linsolve/psb_krylov_mod.f90
+++ b/linsolve/psb_linsolve_mod.f90
@@ -30,10 +30,10 @@
!
!
!
-! File: psb_krylov_mod.f90
-! Interfaces for Krylov subspace iterative methods.
+! File: psb_linsolve_mod.f90
+! Interfaces for linear solvers.
!
-Module psb_krylov_mod
+Module psb_linsolve_mod
use psb_const_mod
public
@@ -211,4 +211,4 @@ Module psb_krylov_mod
end interface
-end module psb_krylov_mod
+end module psb_linsolve_mod
diff --git a/linsolve/psb_sbicg.f90 b/linsolve/psb_sbicg.f90
index f8074dec..1b5f1fb8 100644
--- a/linsolve/psb_sbicg.f90
+++ b/linsolve/psb_sbicg.f90
@@ -99,7 +99,7 @@ subroutine psb_sbicg_vect(a,prec,b,x,eps,desc_a,info,&
use psb_base_mod
use psb_prec_mod
use psb_s_linsolve_conv_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
implicit none
type(psb_sspmat_type), intent(in) :: a
type(psb_desc_type), intent(in) :: desc_a
diff --git a/linsolve/psb_scg.F90 b/linsolve/psb_scg.F90
index f6276348..d3485db5 100644
--- a/linsolve/psb_scg.F90
+++ b/linsolve/psb_scg.F90
@@ -100,7 +100,7 @@ subroutine psb_scg_vect(a,prec,b,x,eps,desc_a,info,&
use psb_base_mod
use psb_prec_mod
use psb_s_linsolve_conv_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
implicit none
type(psb_sspmat_type), intent(in) :: a
Type(psb_desc_type), Intent(in) :: desc_a
diff --git a/linsolve/psb_scgs.f90 b/linsolve/psb_scgs.f90
index c28026d5..4034283e 100644
--- a/linsolve/psb_scgs.f90
+++ b/linsolve/psb_scgs.f90
@@ -97,7 +97,7 @@ Subroutine psb_scgs_vect(a,prec,b,x,eps,desc_a,info,&
use psb_base_mod
use psb_prec_mod
use psb_s_linsolve_conv_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
implicit none
type(psb_sspmat_type), intent(in) :: a
Type(psb_desc_type), Intent(in) :: desc_a
diff --git a/linsolve/psb_scgstab.f90 b/linsolve/psb_scgstab.f90
index 89f5244b..a0b57fa8 100644
--- a/linsolve/psb_scgstab.f90
+++ b/linsolve/psb_scgstab.f90
@@ -97,7 +97,7 @@ Subroutine psb_scgstab_vect(a,prec,b,x,eps,desc_a,info,itmax,iter,err,itrace,ist
use psb_base_mod
use psb_prec_mod
use psb_s_linsolve_conv_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
implicit none
type(psb_sspmat_type), intent(in) :: a
class(psb_sprec_type), Intent(inout) :: prec
diff --git a/linsolve/psb_scgstabl.f90 b/linsolve/psb_scgstabl.f90
index 0dca7720..e55d2746 100644
--- a/linsolve/psb_scgstabl.f90
+++ b/linsolve/psb_scgstabl.f90
@@ -108,7 +108,7 @@ Subroutine psb_scgstabl_vect(a,prec,b,x,eps,desc_a,info,&
use psb_base_mod
use psb_prec_mod
use psb_s_linsolve_conv_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
implicit none
type(psb_sspmat_type), intent(in) :: a
class(psb_sprec_type), Intent(inout) :: prec
diff --git a/linsolve/psb_sfcg.F90 b/linsolve/psb_sfcg.F90
index c39faf5e..449942cd 100644
--- a/linsolve/psb_sfcg.F90
+++ b/linsolve/psb_sfcg.F90
@@ -108,7 +108,7 @@ subroutine psb_sfcg_vect(a,prec,b,x,eps,desc_a,info,&
use psb_base_mod
use psb_prec_mod
use psb_s_linsolve_conv_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
implicit none
type(psb_sspmat_type), intent(in) :: a
Type(psb_desc_type), Intent(in) :: desc_a
diff --git a/linsolve/psb_sgcr.f90 b/linsolve/psb_sgcr.f90
index c0d4f1cd..b2d09d3e 100644
--- a/linsolve/psb_sgcr.f90
+++ b/linsolve/psb_sgcr.f90
@@ -110,7 +110,7 @@ subroutine psb_sgcr_vect(a,prec,b,x,eps,desc_a,info,&
use psb_base_mod
use psb_prec_mod
use psb_s_linsolve_conv_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
implicit none
diff --git a/linsolve/psb_skrylov.f90 b/linsolve/psb_skrylov.f90
index 35d2024f..1a612a12 100644
--- a/linsolve/psb_skrylov.f90
+++ b/linsolve/psb_skrylov.f90
@@ -84,7 +84,7 @@ Subroutine psb_skrylov_vect(method,a,prec,b,x,eps,desc_a,info,&
use psb_base_mod
use psb_prec_mod,only : psb_sprec_type
- use psb_krylov_mod, psb_protect_name => psb_skrylov_vect
+ use psb_linsolve_mod, psb_protect_name => psb_skrylov_vect
character(len=*) :: method
Type(psb_sspmat_type), Intent(in) :: a
diff --git a/linsolve/psb_srgmres.f90 b/linsolve/psb_srgmres.f90
index 757e7eb7..537d2f00 100644
--- a/linsolve/psb_srgmres.f90
+++ b/linsolve/psb_srgmres.f90
@@ -111,7 +111,7 @@ subroutine psb_srgmres_vect(a,prec,b,x,eps,desc_a,info,&
use psb_base_mod
use psb_prec_mod
use psb_s_linsolve_conv_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
implicit none
type(psb_sspmat_type), intent(in) :: a
Type(psb_desc_type), Intent(in) :: desc_a
diff --git a/linsolve/psb_srichardson.f90 b/linsolve/psb_srichardson.f90
index d3a47b97..660778fc 100644
--- a/linsolve/psb_srichardson.f90
+++ b/linsolve/psb_srichardson.f90
@@ -74,7 +74,7 @@ Subroutine psb_srichardson_vect(a,prec,b,x,eps,desc_a,info,&
use psb_base_mod
use psb_prec_mod
use psb_s_linsolve_conv_mod
- use psb_krylov_mod, psb_protect_name => psb_srichardson_vect
+ use psb_linsolve_mod, psb_protect_name => psb_srichardson_vect
Type(psb_sspmat_type), Intent(in) :: a
Type(psb_desc_type), Intent(in) :: desc_a
diff --git a/linsolve/psb_zbicg.f90 b/linsolve/psb_zbicg.f90
index 4c4f4c32..6a3e0215 100644
--- a/linsolve/psb_zbicg.f90
+++ b/linsolve/psb_zbicg.f90
@@ -99,7 +99,7 @@ subroutine psb_zbicg_vect(a,prec,b,x,eps,desc_a,info,&
use psb_base_mod
use psb_prec_mod
use psb_z_linsolve_conv_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
implicit none
type(psb_zspmat_type), intent(in) :: a
type(psb_desc_type), intent(in) :: desc_a
diff --git a/linsolve/psb_zcg.F90 b/linsolve/psb_zcg.F90
index 5d6adb4d..6ca5909c 100644
--- a/linsolve/psb_zcg.F90
+++ b/linsolve/psb_zcg.F90
@@ -100,7 +100,7 @@ subroutine psb_zcg_vect(a,prec,b,x,eps,desc_a,info,&
use psb_base_mod
use psb_prec_mod
use psb_z_linsolve_conv_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
implicit none
type(psb_zspmat_type), intent(in) :: a
Type(psb_desc_type), Intent(in) :: desc_a
diff --git a/linsolve/psb_zcgs.f90 b/linsolve/psb_zcgs.f90
index fe307bc6..c5140545 100644
--- a/linsolve/psb_zcgs.f90
+++ b/linsolve/psb_zcgs.f90
@@ -97,7 +97,7 @@ Subroutine psb_zcgs_vect(a,prec,b,x,eps,desc_a,info,&
use psb_base_mod
use psb_prec_mod
use psb_z_linsolve_conv_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
implicit none
type(psb_zspmat_type), intent(in) :: a
Type(psb_desc_type), Intent(in) :: desc_a
diff --git a/linsolve/psb_zcgstab.f90 b/linsolve/psb_zcgstab.f90
index 6cc72a28..e04cd7c4 100644
--- a/linsolve/psb_zcgstab.f90
+++ b/linsolve/psb_zcgstab.f90
@@ -97,7 +97,7 @@ Subroutine psb_zcgstab_vect(a,prec,b,x,eps,desc_a,info,itmax,iter,err,itrace,ist
use psb_base_mod
use psb_prec_mod
use psb_z_linsolve_conv_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
implicit none
type(psb_zspmat_type), intent(in) :: a
class(psb_zprec_type), Intent(inout) :: prec
diff --git a/linsolve/psb_zcgstabl.f90 b/linsolve/psb_zcgstabl.f90
index 0898953a..93abd056 100644
--- a/linsolve/psb_zcgstabl.f90
+++ b/linsolve/psb_zcgstabl.f90
@@ -108,7 +108,7 @@ Subroutine psb_zcgstabl_vect(a,prec,b,x,eps,desc_a,info,&
use psb_base_mod
use psb_prec_mod
use psb_z_linsolve_conv_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
implicit none
type(psb_zspmat_type), intent(in) :: a
class(psb_zprec_type), Intent(inout) :: prec
diff --git a/linsolve/psb_zfcg.F90 b/linsolve/psb_zfcg.F90
index e5d6f23c..1ab036fe 100644
--- a/linsolve/psb_zfcg.F90
+++ b/linsolve/psb_zfcg.F90
@@ -108,7 +108,7 @@ subroutine psb_zfcg_vect(a,prec,b,x,eps,desc_a,info,&
use psb_base_mod
use psb_prec_mod
use psb_z_linsolve_conv_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
implicit none
type(psb_zspmat_type), intent(in) :: a
Type(psb_desc_type), Intent(in) :: desc_a
diff --git a/linsolve/psb_zgcr.f90 b/linsolve/psb_zgcr.f90
index 30eb7db2..e48fc4c1 100644
--- a/linsolve/psb_zgcr.f90
+++ b/linsolve/psb_zgcr.f90
@@ -110,7 +110,7 @@ subroutine psb_zgcr_vect(a,prec,b,x,eps,desc_a,info,&
use psb_base_mod
use psb_prec_mod
use psb_z_linsolve_conv_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
implicit none
diff --git a/linsolve/psb_zkrylov.f90 b/linsolve/psb_zkrylov.f90
index bcfd6806..900e50b6 100644
--- a/linsolve/psb_zkrylov.f90
+++ b/linsolve/psb_zkrylov.f90
@@ -84,7 +84,7 @@ Subroutine psb_zkrylov_vect(method,a,prec,b,x,eps,desc_a,info,&
use psb_base_mod
use psb_prec_mod,only : psb_zprec_type
- use psb_krylov_mod, psb_protect_name => psb_zkrylov_vect
+ use psb_linsolve_mod, psb_protect_name => psb_zkrylov_vect
character(len=*) :: method
Type(psb_zspmat_type), Intent(in) :: a
diff --git a/linsolve/psb_zrgmres.f90 b/linsolve/psb_zrgmres.f90
index 2ee8e4dc..39985518 100644
--- a/linsolve/psb_zrgmres.f90
+++ b/linsolve/psb_zrgmres.f90
@@ -111,7 +111,7 @@ subroutine psb_zrgmres_vect(a,prec,b,x,eps,desc_a,info,&
use psb_base_mod
use psb_prec_mod
use psb_z_linsolve_conv_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
implicit none
type(psb_zspmat_type), intent(in) :: a
Type(psb_desc_type), Intent(in) :: desc_a
diff --git a/linsolve/psb_zrichardson.f90 b/linsolve/psb_zrichardson.f90
index b4c0af0f..4c2bc567 100644
--- a/linsolve/psb_zrichardson.f90
+++ b/linsolve/psb_zrichardson.f90
@@ -74,7 +74,7 @@ Subroutine psb_zrichardson_vect(a,prec,b,x,eps,desc_a,info,&
use psb_base_mod
use psb_prec_mod
use psb_z_linsolve_conv_mod
- use psb_krylov_mod, psb_protect_name => psb_zrichardson_vect
+ use psb_linsolve_mod, psb_protect_name => psb_zrichardson_vect
Type(psb_zspmat_type), Intent(in) :: a
Type(psb_desc_type), Intent(in) :: desc_a
diff --git a/test/fileread/psb_cf_sample.f90 b/test/fileread/psb_cf_sample.f90
index 3c0ce8f9..01d9a10b 100644
--- a/test/fileread/psb_cf_sample.f90
+++ b/test/fileread/psb_cf_sample.f90
@@ -32,7 +32,7 @@
program psb_cf_sample
use psb_base_mod
use psb_prec_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
use psb_util_mod
use getp
implicit none
diff --git a/test/fileread/psb_df_sample.f90 b/test/fileread/psb_df_sample.f90
index 25a121a4..b186241d 100644
--- a/test/fileread/psb_df_sample.f90
+++ b/test/fileread/psb_df_sample.f90
@@ -32,7 +32,7 @@
program psb_df_sample
use psb_base_mod
use psb_prec_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
use psb_util_mod
use getp
implicit none
diff --git a/test/fileread/psb_sf_sample.f90 b/test/fileread/psb_sf_sample.f90
index 8d9ccb0a..da0fe6b4 100644
--- a/test/fileread/psb_sf_sample.f90
+++ b/test/fileread/psb_sf_sample.f90
@@ -32,7 +32,7 @@
program psb_sf_sample
use psb_base_mod
use psb_prec_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
use psb_util_mod
use getp
implicit none
diff --git a/test/fileread/psb_zf_sample.f90 b/test/fileread/psb_zf_sample.f90
index 3c024606..eefe0901 100644
--- a/test/fileread/psb_zf_sample.f90
+++ b/test/fileread/psb_zf_sample.f90
@@ -32,7 +32,7 @@
program psb_zf_sample
use psb_base_mod
use psb_prec_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
use psb_util_mod
use getp
implicit none
diff --git a/test/pargen/psb_d_pde2d.F90 b/test/pargen/psb_d_pde2d.F90
index 11777b19..7b99fb05 100644
--- a/test/pargen/psb_d_pde2d.F90
+++ b/test/pargen/psb_d_pde2d.F90
@@ -595,7 +595,7 @@ end module psb_d_pde2d_mod
program psb_d_pde2d
use psb_base_mod
use psb_prec_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
use psb_util_mod
use psb_d_pde2d_mod
#if defined(OPENMP)
diff --git a/test/pargen/psb_d_pde3d.F90 b/test/pargen/psb_d_pde3d.F90
index 6e895c00..fca19c80 100644
--- a/test/pargen/psb_d_pde3d.F90
+++ b/test/pargen/psb_d_pde3d.F90
@@ -653,7 +653,7 @@ end module psb_d_pde3d_mod
program psb_d_pde3d
use psb_base_mod
use psb_prec_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
use psb_util_mod
use psb_d_pde3d_mod
#if defined(OPENMP)
diff --git a/test/pargen/psb_s_pde2d.F90 b/test/pargen/psb_s_pde2d.F90
index f14d2cb4..9bfcc271 100644
--- a/test/pargen/psb_s_pde2d.F90
+++ b/test/pargen/psb_s_pde2d.F90
@@ -595,7 +595,7 @@ end module psb_s_pde2d_mod
program psb_s_pde2d
use psb_base_mod
use psb_prec_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
use psb_util_mod
use psb_s_pde2d_mod
#if defined(OPENMP)
diff --git a/test/pargen/psb_s_pde3d.F90 b/test/pargen/psb_s_pde3d.F90
index 2938a4ff..b62073d2 100644
--- a/test/pargen/psb_s_pde3d.F90
+++ b/test/pargen/psb_s_pde3d.F90
@@ -653,7 +653,7 @@ end module psb_s_pde3d_mod
program psb_s_pde3d
use psb_base_mod
use psb_prec_mod
- use psb_krylov_mod
+ use psb_linsolve_mod
use psb_util_mod
use psb_s_pde3d_mod
#if defined(OPENMP)
From e9aa9a52371d6c77ffc567e74581e58b0a79485f Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Sun, 10 Nov 2024 10:59:28 +0100
Subject: [PATCH 63/86] Restructuring linsolve.
---
linsolve/Makefile | 25 ++++++++++-----------
linsolve/impl/Makefile | 30 +++++++++++++++++++++++++
linsolve/{ => impl}/psb_cbicg.f90 | 0
linsolve/{ => impl}/psb_ccg.F90 | 0
linsolve/{ => impl}/psb_ccgs.f90 | 0
linsolve/{ => impl}/psb_ccgstab.f90 | 0
linsolve/{ => impl}/psb_ccgstabl.f90 | 0
linsolve/{ => impl}/psb_cfcg.F90 | 0
linsolve/{ => impl}/psb_cgcr.f90 | 0
linsolve/{ => impl}/psb_ckrylov.f90 | 0
linsolve/{ => impl}/psb_crgmres.f90 | 0
linsolve/{ => impl}/psb_crichardson.f90 | 0
linsolve/{ => impl}/psb_dbicg.f90 | 0
linsolve/{ => impl}/psb_dcg.F90 | 0
linsolve/{ => impl}/psb_dcgs.f90 | 0
linsolve/{ => impl}/psb_dcgstab.f90 | 0
linsolve/{ => impl}/psb_dcgstabl.f90 | 0
linsolve/{ => impl}/psb_dfcg.F90 | 0
linsolve/{ => impl}/psb_dgcr.f90 | 0
linsolve/{ => impl}/psb_dkrylov.f90 | 0
linsolve/{ => impl}/psb_drgmres.f90 | 0
linsolve/{ => impl}/psb_drichardson.f90 | 0
linsolve/{ => impl}/psb_sbicg.f90 | 0
linsolve/{ => impl}/psb_scg.F90 | 0
linsolve/{ => impl}/psb_scgs.f90 | 0
linsolve/{ => impl}/psb_scgstab.f90 | 0
linsolve/{ => impl}/psb_scgstabl.f90 | 0
linsolve/{ => impl}/psb_sfcg.F90 | 0
linsolve/{ => impl}/psb_sgcr.f90 | 0
linsolve/{ => impl}/psb_skrylov.f90 | 0
linsolve/{ => impl}/psb_srgmres.f90 | 0
linsolve/{ => impl}/psb_srichardson.f90 | 0
linsolve/{ => impl}/psb_zbicg.f90 | 0
linsolve/{ => impl}/psb_zcg.F90 | 0
linsolve/{ => impl}/psb_zcgs.f90 | 0
linsolve/{ => impl}/psb_zcgstab.f90 | 0
linsolve/{ => impl}/psb_zcgstabl.f90 | 0
linsolve/{ => impl}/psb_zfcg.F90 | 0
linsolve/{ => impl}/psb_zgcr.f90 | 0
linsolve/{ => impl}/psb_zkrylov.f90 | 0
linsolve/{ => impl}/psb_zrgmres.f90 | 0
linsolve/{ => impl}/psb_zrichardson.f90 | 0
42 files changed, 42 insertions(+), 13 deletions(-)
create mode 100644 linsolve/impl/Makefile
rename linsolve/{ => impl}/psb_cbicg.f90 (100%)
rename linsolve/{ => impl}/psb_ccg.F90 (100%)
rename linsolve/{ => impl}/psb_ccgs.f90 (100%)
rename linsolve/{ => impl}/psb_ccgstab.f90 (100%)
rename linsolve/{ => impl}/psb_ccgstabl.f90 (100%)
rename linsolve/{ => impl}/psb_cfcg.F90 (100%)
rename linsolve/{ => impl}/psb_cgcr.f90 (100%)
rename linsolve/{ => impl}/psb_ckrylov.f90 (100%)
rename linsolve/{ => impl}/psb_crgmres.f90 (100%)
rename linsolve/{ => impl}/psb_crichardson.f90 (100%)
rename linsolve/{ => impl}/psb_dbicg.f90 (100%)
rename linsolve/{ => impl}/psb_dcg.F90 (100%)
rename linsolve/{ => impl}/psb_dcgs.f90 (100%)
rename linsolve/{ => impl}/psb_dcgstab.f90 (100%)
rename linsolve/{ => impl}/psb_dcgstabl.f90 (100%)
rename linsolve/{ => impl}/psb_dfcg.F90 (100%)
rename linsolve/{ => impl}/psb_dgcr.f90 (100%)
rename linsolve/{ => impl}/psb_dkrylov.f90 (100%)
rename linsolve/{ => impl}/psb_drgmres.f90 (100%)
rename linsolve/{ => impl}/psb_drichardson.f90 (100%)
rename linsolve/{ => impl}/psb_sbicg.f90 (100%)
rename linsolve/{ => impl}/psb_scg.F90 (100%)
rename linsolve/{ => impl}/psb_scgs.f90 (100%)
rename linsolve/{ => impl}/psb_scgstab.f90 (100%)
rename linsolve/{ => impl}/psb_scgstabl.f90 (100%)
rename linsolve/{ => impl}/psb_sfcg.F90 (100%)
rename linsolve/{ => impl}/psb_sgcr.f90 (100%)
rename linsolve/{ => impl}/psb_skrylov.f90 (100%)
rename linsolve/{ => impl}/psb_srgmres.f90 (100%)
rename linsolve/{ => impl}/psb_srichardson.f90 (100%)
rename linsolve/{ => impl}/psb_zbicg.f90 (100%)
rename linsolve/{ => impl}/psb_zcg.F90 (100%)
rename linsolve/{ => impl}/psb_zcgs.f90 (100%)
rename linsolve/{ => impl}/psb_zcgstab.f90 (100%)
rename linsolve/{ => impl}/psb_zcgstabl.f90 (100%)
rename linsolve/{ => impl}/psb_zfcg.F90 (100%)
rename linsolve/{ => impl}/psb_zgcr.f90 (100%)
rename linsolve/{ => impl}/psb_zkrylov.f90 (100%)
rename linsolve/{ => impl}/psb_zrgmres.f90 (100%)
rename linsolve/{ => impl}/psb_zrichardson.f90 (100%)
diff --git a/linsolve/Makefile b/linsolve/Makefile
index 5c6445f5..c8fa3ac7 100644
--- a/linsolve/Makefile
+++ b/linsolve/Makefile
@@ -10,32 +10,29 @@ MODOBJS= psb_base_linsolve_conv_mod.o \
psb_s_linsolve_conv_mod.o psb_c_linsolve_conv_mod.o \
psb_d_linsolve_conv_mod.o psb_z_linsolve_conv_mod.o \
psb_linsolve_mod.o
-F90OBJS=psb_dkrylov.o psb_skrylov.o psb_ckrylov.o psb_zkrylov.o \
- psb_drichardson.o psb_srichardson.o psb_crichardson.o psb_zrichardson.o \
- psb_dcgstab.o psb_dcg.o psb_dfcg.o psb_dgcr.o psb_dcgs.o \
- psb_dbicg.o psb_dcgstabl.o psb_drgmres.o\
- psb_scgstab.o psb_scg.o psb_sfcg.o psb_sgcr.o psb_scgs.o \
- psb_sbicg.o psb_scgstabl.o psb_srgmres.o\
- psb_ccgstab.o psb_ccg.o psb_cfcg.o psb_cgcr.o psb_ccgs.o \
- psb_cbicg.o psb_ccgstabl.o psb_crgmres.o\
- psb_zcgstab.o psb_zcg.o psb_zfcg.o psb_zgcr.o psb_zcgs.o \
- psb_zbicg.o psb_zcgstabl.o psb_zrgmres.o
-OBJS=$(F90OBJS) $(MODOBJS)
+
+OBJS=$(MODOBJS)
LOCAL_MODS=$(MODOBJS:.o=$(.mod))
LIBNAME=$(METHDLIBNAME)
FINCLUDES=$(FMFLAG). $(FMFLAG)$(MODDIR)
-objs: $(OBJS)
+objs: $(OBJS) impld
/bin/cp -p $(CPUPDFLAG) *$(.mod) $(MODDIR)
lib: objs
+ $(MAKE) -C impl lib
$(AR) $(HERE)/$(LIBNAME) $(OBJS)
$(RANLIB) $(HERE)/$(LIBNAME)
/bin/cp -p $(CPUPDFLAG) $(HERE)/$(LIBNAME) $(LIBDIR)
+$(OBJS): $(MODDIR)/$(BASEMODNAME)$(.mod)
+
+impld: $(OBJS)
+ $(MAKE) -C impl objs
+
psb_s_linsolve_conv_mod.o psb_c_linsolve_conv_mod.o psb_d_linsolve_conv_mod.o psb_z_linsolve_conv_mod.o: psb_base_linsolve_conv_mod.o
psb_linsolve_conv_mod.o: psb_s_linsolve_conv_mod.o psb_c_linsolve_conv_mod.o psb_d_linsolve_conv_mod.o psb_z_linsolve_conv_mod.o
$(F90OBJS): $(MODOBJS)
@@ -44,6 +41,8 @@ $(OBJS): $(MODDIR)/$(PRECMODNAME)$(.mod) $(MODDIR)/$(BASEMODNAME)$(.mod)
veryclean: clean
/bin/rm -f $(HERE)/$(LIBNAME)
-clean:
+iclean:
+ cd impl && $(MAKE) clean
+clean: iclean
/bin/rm -f $(OBJS) *$(.mod)
diff --git a/linsolve/impl/Makefile b/linsolve/impl/Makefile
new file mode 100644
index 00000000..62444af7
--- /dev/null
+++ b/linsolve/impl/Makefile
@@ -0,0 +1,30 @@
+include ../../Make.inc
+
+LIBDIR=../../lib
+INCDIR=../../include
+MODDIR=../../modules
+HERE=..
+OBJS=psb_dkrylov.o psb_skrylov.o psb_ckrylov.o psb_zkrylov.o \
+ psb_drichardson.o psb_srichardson.o psb_crichardson.o psb_zrichardson.o \
+ psb_dcgstab.o psb_dcg.o psb_dfcg.o psb_dgcr.o psb_dcgs.o \
+ psb_dbicg.o psb_dcgstabl.o psb_drgmres.o\
+ psb_scgstab.o psb_scg.o psb_sfcg.o psb_sgcr.o psb_scgs.o \
+ psb_sbicg.o psb_scgstabl.o psb_srgmres.o\
+ psb_ccgstab.o psb_ccg.o psb_cfcg.o psb_cgcr.o psb_ccgs.o \
+ psb_cbicg.o psb_ccgstabl.o psb_crgmres.o\
+ psb_zcgstab.o psb_zcg.o psb_zfcg.o psb_zgcr.o psb_zcgs.o \
+ psb_zbicg.o psb_zcgstabl.o psb_zrgmres.o
+
+LIBNAME=$(METHDLIBNAME)
+COBJS=
+FINCLUDES=$(FMFLAG).. $(FMFLAG)$(MODDIR)
+
+objs: $(OBJS)
+lib: objs
+ $(AR) $(HERE)/$(LIBNAME) $(OBJS)
+ $(RANLIB) $(HERE)/$(LIBNAME)
+
+veryclean: clean
+
+clean:
+ /bin/rm -f $(OBJS) $(LOCAL_MODS)
diff --git a/linsolve/psb_cbicg.f90 b/linsolve/impl/psb_cbicg.f90
similarity index 100%
rename from linsolve/psb_cbicg.f90
rename to linsolve/impl/psb_cbicg.f90
diff --git a/linsolve/psb_ccg.F90 b/linsolve/impl/psb_ccg.F90
similarity index 100%
rename from linsolve/psb_ccg.F90
rename to linsolve/impl/psb_ccg.F90
diff --git a/linsolve/psb_ccgs.f90 b/linsolve/impl/psb_ccgs.f90
similarity index 100%
rename from linsolve/psb_ccgs.f90
rename to linsolve/impl/psb_ccgs.f90
diff --git a/linsolve/psb_ccgstab.f90 b/linsolve/impl/psb_ccgstab.f90
similarity index 100%
rename from linsolve/psb_ccgstab.f90
rename to linsolve/impl/psb_ccgstab.f90
diff --git a/linsolve/psb_ccgstabl.f90 b/linsolve/impl/psb_ccgstabl.f90
similarity index 100%
rename from linsolve/psb_ccgstabl.f90
rename to linsolve/impl/psb_ccgstabl.f90
diff --git a/linsolve/psb_cfcg.F90 b/linsolve/impl/psb_cfcg.F90
similarity index 100%
rename from linsolve/psb_cfcg.F90
rename to linsolve/impl/psb_cfcg.F90
diff --git a/linsolve/psb_cgcr.f90 b/linsolve/impl/psb_cgcr.f90
similarity index 100%
rename from linsolve/psb_cgcr.f90
rename to linsolve/impl/psb_cgcr.f90
diff --git a/linsolve/psb_ckrylov.f90 b/linsolve/impl/psb_ckrylov.f90
similarity index 100%
rename from linsolve/psb_ckrylov.f90
rename to linsolve/impl/psb_ckrylov.f90
diff --git a/linsolve/psb_crgmres.f90 b/linsolve/impl/psb_crgmres.f90
similarity index 100%
rename from linsolve/psb_crgmres.f90
rename to linsolve/impl/psb_crgmres.f90
diff --git a/linsolve/psb_crichardson.f90 b/linsolve/impl/psb_crichardson.f90
similarity index 100%
rename from linsolve/psb_crichardson.f90
rename to linsolve/impl/psb_crichardson.f90
diff --git a/linsolve/psb_dbicg.f90 b/linsolve/impl/psb_dbicg.f90
similarity index 100%
rename from linsolve/psb_dbicg.f90
rename to linsolve/impl/psb_dbicg.f90
diff --git a/linsolve/psb_dcg.F90 b/linsolve/impl/psb_dcg.F90
similarity index 100%
rename from linsolve/psb_dcg.F90
rename to linsolve/impl/psb_dcg.F90
diff --git a/linsolve/psb_dcgs.f90 b/linsolve/impl/psb_dcgs.f90
similarity index 100%
rename from linsolve/psb_dcgs.f90
rename to linsolve/impl/psb_dcgs.f90
diff --git a/linsolve/psb_dcgstab.f90 b/linsolve/impl/psb_dcgstab.f90
similarity index 100%
rename from linsolve/psb_dcgstab.f90
rename to linsolve/impl/psb_dcgstab.f90
diff --git a/linsolve/psb_dcgstabl.f90 b/linsolve/impl/psb_dcgstabl.f90
similarity index 100%
rename from linsolve/psb_dcgstabl.f90
rename to linsolve/impl/psb_dcgstabl.f90
diff --git a/linsolve/psb_dfcg.F90 b/linsolve/impl/psb_dfcg.F90
similarity index 100%
rename from linsolve/psb_dfcg.F90
rename to linsolve/impl/psb_dfcg.F90
diff --git a/linsolve/psb_dgcr.f90 b/linsolve/impl/psb_dgcr.f90
similarity index 100%
rename from linsolve/psb_dgcr.f90
rename to linsolve/impl/psb_dgcr.f90
diff --git a/linsolve/psb_dkrylov.f90 b/linsolve/impl/psb_dkrylov.f90
similarity index 100%
rename from linsolve/psb_dkrylov.f90
rename to linsolve/impl/psb_dkrylov.f90
diff --git a/linsolve/psb_drgmres.f90 b/linsolve/impl/psb_drgmres.f90
similarity index 100%
rename from linsolve/psb_drgmres.f90
rename to linsolve/impl/psb_drgmres.f90
diff --git a/linsolve/psb_drichardson.f90 b/linsolve/impl/psb_drichardson.f90
similarity index 100%
rename from linsolve/psb_drichardson.f90
rename to linsolve/impl/psb_drichardson.f90
diff --git a/linsolve/psb_sbicg.f90 b/linsolve/impl/psb_sbicg.f90
similarity index 100%
rename from linsolve/psb_sbicg.f90
rename to linsolve/impl/psb_sbicg.f90
diff --git a/linsolve/psb_scg.F90 b/linsolve/impl/psb_scg.F90
similarity index 100%
rename from linsolve/psb_scg.F90
rename to linsolve/impl/psb_scg.F90
diff --git a/linsolve/psb_scgs.f90 b/linsolve/impl/psb_scgs.f90
similarity index 100%
rename from linsolve/psb_scgs.f90
rename to linsolve/impl/psb_scgs.f90
diff --git a/linsolve/psb_scgstab.f90 b/linsolve/impl/psb_scgstab.f90
similarity index 100%
rename from linsolve/psb_scgstab.f90
rename to linsolve/impl/psb_scgstab.f90
diff --git a/linsolve/psb_scgstabl.f90 b/linsolve/impl/psb_scgstabl.f90
similarity index 100%
rename from linsolve/psb_scgstabl.f90
rename to linsolve/impl/psb_scgstabl.f90
diff --git a/linsolve/psb_sfcg.F90 b/linsolve/impl/psb_sfcg.F90
similarity index 100%
rename from linsolve/psb_sfcg.F90
rename to linsolve/impl/psb_sfcg.F90
diff --git a/linsolve/psb_sgcr.f90 b/linsolve/impl/psb_sgcr.f90
similarity index 100%
rename from linsolve/psb_sgcr.f90
rename to linsolve/impl/psb_sgcr.f90
diff --git a/linsolve/psb_skrylov.f90 b/linsolve/impl/psb_skrylov.f90
similarity index 100%
rename from linsolve/psb_skrylov.f90
rename to linsolve/impl/psb_skrylov.f90
diff --git a/linsolve/psb_srgmres.f90 b/linsolve/impl/psb_srgmres.f90
similarity index 100%
rename from linsolve/psb_srgmres.f90
rename to linsolve/impl/psb_srgmres.f90
diff --git a/linsolve/psb_srichardson.f90 b/linsolve/impl/psb_srichardson.f90
similarity index 100%
rename from linsolve/psb_srichardson.f90
rename to linsolve/impl/psb_srichardson.f90
diff --git a/linsolve/psb_zbicg.f90 b/linsolve/impl/psb_zbicg.f90
similarity index 100%
rename from linsolve/psb_zbicg.f90
rename to linsolve/impl/psb_zbicg.f90
diff --git a/linsolve/psb_zcg.F90 b/linsolve/impl/psb_zcg.F90
similarity index 100%
rename from linsolve/psb_zcg.F90
rename to linsolve/impl/psb_zcg.F90
diff --git a/linsolve/psb_zcgs.f90 b/linsolve/impl/psb_zcgs.f90
similarity index 100%
rename from linsolve/psb_zcgs.f90
rename to linsolve/impl/psb_zcgs.f90
diff --git a/linsolve/psb_zcgstab.f90 b/linsolve/impl/psb_zcgstab.f90
similarity index 100%
rename from linsolve/psb_zcgstab.f90
rename to linsolve/impl/psb_zcgstab.f90
diff --git a/linsolve/psb_zcgstabl.f90 b/linsolve/impl/psb_zcgstabl.f90
similarity index 100%
rename from linsolve/psb_zcgstabl.f90
rename to linsolve/impl/psb_zcgstabl.f90
diff --git a/linsolve/psb_zfcg.F90 b/linsolve/impl/psb_zfcg.F90
similarity index 100%
rename from linsolve/psb_zfcg.F90
rename to linsolve/impl/psb_zfcg.F90
diff --git a/linsolve/psb_zgcr.f90 b/linsolve/impl/psb_zgcr.f90
similarity index 100%
rename from linsolve/psb_zgcr.f90
rename to linsolve/impl/psb_zgcr.f90
diff --git a/linsolve/psb_zkrylov.f90 b/linsolve/impl/psb_zkrylov.f90
similarity index 100%
rename from linsolve/psb_zkrylov.f90
rename to linsolve/impl/psb_zkrylov.f90
diff --git a/linsolve/psb_zrgmres.f90 b/linsolve/impl/psb_zrgmres.f90
similarity index 100%
rename from linsolve/psb_zrgmres.f90
rename to linsolve/impl/psb_zrgmres.f90
diff --git a/linsolve/psb_zrichardson.f90 b/linsolve/impl/psb_zrichardson.f90
similarity index 100%
rename from linsolve/psb_zrichardson.f90
rename to linsolve/impl/psb_zrichardson.f90
From 4f4006cf6b92cd702bc7ed10ed87c61ef8c8c8a0 Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Sun, 10 Nov 2024 17:30:51 +0100
Subject: [PATCH 64/86] Configure fixes
---
configure | 6 +++---
configure.ac | 6 +++---
2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/configure b/configure
index de173d6e..4645c593 100755
--- a/configure
+++ b/configure
@@ -7423,7 +7423,7 @@ fi
##############################################################################
BASEMODNAME=psb_base_mod
PRECMODNAME=psb_prec_mod
-METHDMODNAME=psb_krylov_mod
+METHDMODNAME=psb_linsolve_mod
UTILMODNAME=psb_util_mod
if test "X$psblas_cv_fc" == X"cray"
@@ -7433,7 +7433,7 @@ then
FIFLAG="-I"
BASEMODNAME=PSB_BASE_MOD
PRECMODNAME=PSB_PREC_MOD
- METHDMODNAME=PSB_KRYLOV_MOD
+ METHDMODNAME=PSB_LINSOLVE_MOD
UTILMODNAME=PSB_UTIL_MOD
else
@@ -11332,7 +11332,7 @@ fi
LIBDIR=lib
BASELIBNAME=libpsb_base.a
PRECLIBNAME=libpsb_prec.a
-METHDLIBNAME=libpsb_krylov.a
+METHDLIBNAME=libpsb_linsolve.a
UTILLIBNAME=libpsb_util.a
###############################################################################
diff --git a/configure.ac b/configure.ac
index c670f6c8..b2d851ae 100755
--- a/configure.ac
+++ b/configure.ac
@@ -483,7 +483,7 @@ fi
##############################################################################
BASEMODNAME=psb_base_mod
PRECMODNAME=psb_prec_mod
-METHDMODNAME=psb_krylov_mod
+METHDMODNAME=psb_linsolve_mod
UTILMODNAME=psb_util_mod
if test "X$psblas_cv_fc" == X"cray"
@@ -493,7 +493,7 @@ then
FIFLAG="-I"
BASEMODNAME=PSB_BASE_MOD
PRECMODNAME=PSB_PREC_MOD
- METHDMODNAME=PSB_KRYLOV_MOD
+ METHDMODNAME=PSB_LINSOLVE_MOD
UTILMODNAME=PSB_UTIL_MOD
else
@@ -926,7 +926,7 @@ fi
LIBDIR=lib
BASELIBNAME=libpsb_base.a
PRECLIBNAME=libpsb_prec.a
-METHDLIBNAME=libpsb_krylov.a
+METHDLIBNAME=libpsb_linsolve.a
UTILLIBNAME=libpsb_util.a
###############################################################################
From a02440afffee744ed1bff20fd76838820f126d29 Mon Sep 17 00:00:00 2001
From: sfilippone
Date: Mon, 11 Nov 2024 17:48:49 +0100
Subject: [PATCH 65/86] Updatex docs for linsolve
---
cbind/test/pargen/Makefile | 2 +-
docs/html/index.html | 6 +-
docs/html/userhtml.html | 6 +-
docs/html/userhtmlli1.html | 42 +-
docs/html/userhtmlli2.html | 2 +-
docs/html/userhtmlse11.html | 345 +-
docs/html/userhtmlse12.html | 84 +-
docs/html/userhtmlse13.html | 60 +-
docs/psblas-3.9.pdf | 13501 +++++++++++++++++-----------------
docs/src/methods.tex | 117 +-
10 files changed, 7483 insertions(+), 6682 deletions(-)
diff --git a/cbind/test/pargen/Makefile b/cbind/test/pargen/Makefile
index 2e74497f..e1da6892 100644
--- a/cbind/test/pargen/Makefile
+++ b/cbind/test/pargen/Makefile
@@ -9,7 +9,7 @@ FINCLUDES=$(FMFLAG). $(FMFLAG)$(HERE) $(FMFLAG)$(MODDIR)
CINCLUDES=-I. -I$(HERE) -I$(INCLUDEDIR)
PSBC_LIBS= -L$(LIBDIR) -lpsb_cbind
-PSB_LIBS=-lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base -L$(LIBDIR)
+PSB_LIBS=-lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base -L$(LIBDIR)
#
# Compilers and such
diff --git a/docs/html/index.html b/docs/html/index.html
index 083bd90a..2ccf61d9 100644
--- a/docs/html/index.html
+++ b/docs/html/index.html
@@ -54,11 +54,11 @@ href="userhtmlse10.html#x15-13500010" id="QQ2-15-165">Preconditioner routines 11 Iterative Methods
12 Extensions
+href="userhtmlse12.html#x19-14500012" id="QQ2-19-175">Extensions
13 CUDA Environment Routines
+href="userhtmlse13.html#x20-15400013" id="QQ2-20-190">CUDA Environment Routines
References
+href="userhtmlli2.html#x21-169000" id="QQ2-21-219">References
diff --git a/docs/html/userhtml.html b/docs/html/userhtml.html
index 083bd90a..2ccf61d9 100644
--- a/docs/html/userhtml.html
+++ b/docs/html/userhtml.html
@@ -54,11 +54,11 @@ href="userhtmlse10.html#x15-13500010" id="QQ2-15-165">Preconditioner routines 11 Iterative Methods
12 Extensions
+href="userhtmlse12.html#x19-14500012" id="QQ2-19-175">Extensions
13 CUDA Environment Routines
+href="userhtmlse13.html#x20-15400013" id="QQ2-20-190">CUDA Environment Routines
References
+href="userhtmlli2.html#x21-169000" id="QQ2-21-219">References
diff --git a/docs/html/userhtmlli1.html b/docs/html/userhtmlli1.html
index e767ddcf..40ffe257 100644
--- a/docs/html/userhtmlli1.html
+++ b/docs/html/userhtmlli1.html
@@ -310,46 +310,48 @@ href="userhtmlse10.html#x15-14100010.6" id="QQ2-15-171">free — Free a prec
href="userhtmlse11.html#x17-14200011">Iterative Methods
11.1 psb_krylov — Krylov Methods Driver Routine
+
11.2 psb_richardson — Richardson Iteration Driver Routine
12 Extensions
+href="userhtmlse12.html#x19-14500012">Extensions
12.1 Using the extensions
+href="userhtmlse12.html#x19-14600012.1" id="QQ2-19-176">Using the extensions
12.2 Extensions’ Data Structures
+href="userhtmlse12.html#x19-14700012.2" id="QQ2-19-177">Extensions’ Data Structures
12.3 CPU-class extensions
+href="userhtmlse12.html#x19-14800012.3" id="QQ2-19-180">CPU-class extensions
12.4 CUDA-class extensions
+href="userhtmlse12.html#x19-15300012.4" id="QQ2-19-189">CUDA-class extensions
13 CUDA Environment Routines
+href="userhtmlse13.html#x20-15400013">CUDA Environment Routines
psb_cuda_init
+href="userhtmlse13.html#Q1-20-192">psb_cuda_init
psb_cuda_exit
+href="userhtmlse13.html#Q1-20-194">psb_cuda_exit
psb_cuda_DeviceSync
+href="userhtmlse13.html#Q1-20-196">psb_cuda_DeviceSync
psb_cuda_getDeviceCount
+href="userhtmlse13.html#Q1-20-198">psb_cuda_getDeviceCount
psb_cuda_getDevice
+href="userhtmlse13.html#Q1-20-200">psb_cuda_getDevice
psb_cuda_setDevice
+href="userhtmlse13.html#Q1-20-202">psb_cuda_setDevice
psb_cuda_DeviceHasUVA
+href="userhtmlse13.html#Q1-20-204">psb_cuda_DeviceHasUVA
psb_cuda_WarpSize
+href="userhtmlse13.html#Q1-20-206">psb_cuda_WarpSize
psb_cuda_MultiProcessors
+href="userhtmlse13.html#Q1-20-208">psb_cuda_MultiProcessors
psb_cuda_MaxThreadsPerMP
+href="userhtmlse13.html#Q1-20-210">psb_cuda_MaxThreadsPerMP
psb_cuda_MaxRegisterPerBlock
+href="userhtmlse13.html#Q1-20-212">psb_cuda_MaxRegisterPerBlock
psb_cuda_MemoryClockRate
+href="userhtmlse13.html#Q1-20-214">psb_cuda_MemoryClockRate
psb_cuda_MemoryBusWidth
+href="userhtmlse13.html#Q1-20-216">psb_cuda_MemoryBusWidth
psb_cuda_MemoryPeakBandwidth
+href="userhtmlse13.html#Q1-20-218">psb_cuda_MemoryPeakBandwidth
diff --git a/docs/html/userhtmlli2.html b/docs/html/userhtmlli2.html
index e486077a..f31a33cf 100644
--- a/docs/html/userhtmlli2.html
+++ b/docs/html/userhtmlli2.html
@@ -16,7 +16,7 @@ href="userhtmlse13.html#tailuserhtmlse13.html" >prev-tail] [tail] [up]
References
+ id="x21-169000">References
diff --git a/docs/html/userhtmlse11.html b/docs/html/userhtmlse11.html
index 41a4cd65..c99d4e40 100644
--- a/docs/html/userhtmlse11.html
+++ b/docs/html/userhtmlse11.html
@@ -17,10 +17,9 @@ href="userhtmlse8.html#tailuserhtmlse11.html">tail] [up]
11 Iterative Methods
-In this chapter we provide routines for preconditioners and iterative methods.
-The interfaces for Krylov subspace methods are available in the module
-psb_krylov_mod.
+
In this chapter we provide routines for preconditioners and iterative methods. The
+interfaces for iterative methods are available in the module psb_linsolve_mod.
@@ -456,6 +455,344 @@ class="newline" />An integer value; 0 means no error has been detected.11.2 psb_richardson — Richardson Iteration Driver Routine
+
This subroutine is a driver implementig a Richardson iteration
+
+
![x = M - 1(b - Ax )+ x ,
+ k+1 k k
+]()
+ with the preconditioner operator M defined in the previous section.
+
The stopping criterion can take the following values:
+
-
+
+1
-
+
normwise backward error in the infinity norm; the iteration is stopped
+ when
+
+
![-----∥ri∥------
+err = (∥A∥∥xi∥+ ∥b∥) < eps
+]()
+
+
-
+
+2
-
+
Relative residual in the 2-norm; the iteration is stopped when
+
+
![∥ri∥-
+err = ∥b∥2 < eps
+]()
+
+
-
+
+3
-
+
Relative residual reduction in the 2-norm; the iteration is stopped when
+
+
![-∥ri∥-
+err = ∥r0∥2 < eps
+]()
+
+
+
+
+The behaviour is controlled by the istop argument (see later). In the above formulae, xi
+is the tentative solution and ri = b - Axi the corresponding residual at the i-th
+iteration.
+
+
call psb_richardson(a,prec,b,x,eps,desc_a,info,&
+ & itmax,iter,err,itrace,istop)
+
+
+
-
+
+Type:
-
+
Synchronous.
+
-
+
+On Entry
-
+
+
-
+
+a
-
+
the local portion of global sparse matrix A.
Scope: local
Type: required
Intent: in.
Specified as: a structured data of type psb_Tspmat_type.
+
-
+
+prec
-
+
The data structure containing the preconditioner.
Scope: local
Type: required
Intent: in.
Specified as: a structured data of type psb_prec_type.
+
-
+
+b
-
+
The RHS vector.
Scope: local
Type: required
Intent: in.
Specified as: a rank one array or an object of type psb_T_vect_type.
+
-
+
+x
-
+
+
+
+
The initial guess.
Scope: local
Type: required
Intent: inout.
Specified as: a rank one array or an object of type psb_T_vect_type.
+
-
+
+eps
-
+
The stopping tolerance.
Scope: global
Type: required
Intent: in.
Specified as: a real number.
+
-
+
+desc_a
-
+
contains data structures for communications.
Scope: local
Type: required
Intent: in.
Specified as: a structured data of type psb_desc_type.
+
-
+
+itmax
-
+
The maximum number of iterations to perform.
Scope: global
Type: optional
Intent: in.
Default: itmax = 1000.
Specified as: an integer variable itmax ≥ 1.
+
-
+
+itrace
-
+
If > 0 print out an informational message about convergence every itrace
+ iterations. If = 0 print a message in case of convergence failure.
Scope: global
Type: optional
Intent: in.
Default: itrace = -1.
+
-
+
+istop
-
+
+
+
+
An integer specifying the stopping criterion.
Scope: global
Type: optional.
Intent: in.
Values: 1: use the normwise backward error, 2: use the scaled 2-norm of
+ the residual, 3: use the residual reduction in the 2-norm. Default: 2.
+
-
+
+On Return
-
+
+
-
+
+x
-
+
The computed solution.
Scope: local
Type: required
Intent: inout.
Specified as: a rank one array or an object of type psb_T_vect_type.
+
-
+
+iter
-
+
The number of iterations performed.
Scope: global
Type: optional
Intent: out.
Returned as: an integer variable.
+
-
+
+err
-
+
The convergence estimate on exit.
Scope: global
Type: optional
Intent: out.
Returned as: a real number.
+
-
+
+info
-
+
Error code.
Scope: local
Type: required
Intent: out.
An integer value; 0 means no error has been detected.
+
+
+
diff --git a/docs/html/userhtmlse12.html b/docs/html/userhtmlse12.html
index e47c099d..180aa0dd 100644
--- a/docs/html/userhtmlse12.html
+++ b/docs/html/userhtmlse12.html
@@ -16,7 +16,7 @@ href="userhtmlse11.html#tailuserhtmlse11.html" >prev-tail] [tail] [up]
12 Extensions
+ id="x19-14500012">Extensions
The EXT, CUDA and RSB subdirectories contains a set of extensions to the base
library. The extensions provide additional storage formats beyond the ones already
contained in the base library, as well as interfaces to:
@@ -49,7 +49,7 @@ in [22].
12.1 Using the extensions
+ id="x19-14600012.1">Using the extensions
A sample application using the PSBLAS extensions will contain the following
steps:
@@ -142,7 +142,7 @@ speed of the sparse matrix-vector product with the various data structures inclu
in the library.
12.2 Extensions’ Data Structures
+ id="x19-14700012.2">Extensions’ Data Structures
Access to the facilities provided by the EXT library is mainly achieved through
the data types that are provided within. The data classes are derived from
the base classes in PSBLAS, through the Fortran 2003 mechanism of 17].
The data classes are divided between the general purpose CPU extensions, the
GPU interfaces and the RSB interfaces. In the description we will make use of the
notation introduced in Table 21.
+href="#x19-147001r21">21.
Table 21: Notation for parameters describing a sparse matrix
+class="content">Notation for parameters describing a sparse matrix
@@ -274,7 +274,7 @@ class="td11">
+ id="x19-147002r5">
@@ -283,18 +283,18 @@ src="mat.png" alt="PIC"
width="147" height="147" >
Figure 5: Example of sparse matrix
+class="content">Example of sparse matrix
12.3 CPU-class extensions
+ id="x19-14800012.3">CPU-class extensions
ELLPACK
+ id="x19-149000">ELLPACK
The ELLPACK/ITPACK format (shown in Figure 6) comprises two 2-dimensional
+href="#x19-149001r6">6) comprises two 2-dimensional
arrays AS and JA with
+ id="x19-149001r6">
@@ -325,13 +325,13 @@ width="233" height="233" >
Figure 6: ELLPACK compression of matrix in Figure 5
+href="#x19-147002r5">5
+ id="x19-149002r1">
@@ -341,8 +341,8 @@ href="#x19-146002r5">5
-
i=1,n
t=0
j=1,maxnzr
,j))
end do
) = t
end do
+ id="x19-149010r1">
+ id="x19-149011">
Algorithm 1: Matrix-Vector product in ELL format
@@ -450,7 +450,7 @@ class="cmbx-10"> 1: Matrix-Vector product in ELL format
class="cmmi-10">y = Ax can be computed with the code shown in
Alg. 1; it costs one memory write per outer iteration, plus three memory reads and
+href="#x19-149010r1">1; it costs one memory write per outer iteration, plus three memory reads and
two floating-point operations per inner iteration.
Unless all rows have exactly the same number of nonzeros, some of the coefficients
in the
+ class="enumerate" id="x19-149013x1">
The maximum number of nonzeros per row is not much larger than the
average;
+ class="enumerate" id="x19-149015x2">
The regularity of the data structure allows for faster code, e.g. by allowing
vectorization, thereby offsetting the additional storage requirements.
In the extreme case where the input matrix has one full row, the ELLPACK
@@ -492,7 +492,7 @@ class="cmtt-10">psb_T_ell_sparse_mat
:
Hacked ELLPACK
+ id="x19-150000">Hacked ELLPACK
The hacked ELLPACK (HLL) format alleviates the main problem of the ELLPACK
@@ -558,7 +558,7 @@ format.
+ id="x19-150001r7">
@@ -568,7 +568,7 @@ width="248" height="248" >
Figure 7: Hacked ELLPACK compression of matrix in Figure 5
+href="#x19-147002r5">5
@@ -595,9 +595,9 @@ class="cmtt-10">psb_T_hll_sparse_mat:
Diagonal storage
+ id="x19-151000">Diagonal storage
The DIAgonal (DIA) format (shown in Figure 8) has a 2-dimensional array 8) has a 2-dimensional array AS
containing in each column the coefficients along a diagonal of the matrix, and an
integer array The code to compute the matrix-vector product y = Ax is shown in Alg. 2; it
+href="#x19-151003r2">2; it
costs one memory read per outer iteration, plus three memory reads, one memory
write and two floating-point operations per inner iteration. The accesses to
+ id="x19-151001r8">
@@ -630,13 +630,13 @@ width="248" height="248" >
Figure 8: DIA compression of matrix in Figure 5
+href="#x19-147002r5">5
+ id="x19-151002r2">
@@ -662,9 +662,9 @@ href="#x19-146002r5">5
+ id="x19-151003r2">
+ id="x19-151004">
Algorithm 2: Matrix-Vector product in DIA format
@@ -691,7 +691,7 @@ class="cmtt-10">psb_T_dia_sparse_mat
:
Hacked DIA
+ id="x19-152000">Hacked DIA
Storage by DIAgonals is an attractive option for matrices whose coefficients are
located on a small set of diagonals, since they do away with storing explicitly the
indices and therefore reduce significantly memory traffic. However, having a few
@@ -738,7 +738,7 @@ class="cmti-10">hackOffsets[k]
.
+ id="x19-152001r9">
@@ -748,7 +748,7 @@ width="248" height="248" >
Figure 9: Hacked DIA compression of matrix in Figure 5
+href="#x19-147002r5">5
@@ -793,7 +793,7 @@ class="cmtt-10">psb_T_hdia_sparse_mat:
12.4 CUDA-class extensions
+ id="x19-15300012.4">CUDA-class extensions
For computing with CUDA we define a dual memorization strategy in which each
variable on the CPU (“host”) side has a GPU (“device”) side. When a GPU-type
variable is initialized, the data contained is (usually) the same on both sides. Each
diff --git a/docs/html/userhtmlse13.html b/docs/html/userhtmlse13.html
index fa8fed8f..90d399e0 100644
--- a/docs/html/userhtmlse13.html
+++ b/docs/html/userhtmlse13.html
@@ -16,12 +16,12 @@ href="userhtmlse12.html#tailuserhtmlse12.html" >prev-tail] [tail] [up]
13 CUDA Environment Routines
+ id="x20-15400013">CUDA Environment Routines
psb_cuda_init — Initializes PSBLAS-CUDA environment
+ id="x20-155000">psb_cuda_init — Initializes PSBLAS-CUDA environment
+ id="Q1-20-192">
@@ -64,13 +64,13 @@ class="cmbx-12">Notes
-
+ class="enumerate" id="x20-155002x1">
A call to this routine must precede any other PSBLAS-CUDA call.
psb_cuda_exit — Exit from PSBLAS-CUDA environment
+ id="x20-156000">psb_cuda_exit — Exit from PSBLAS-CUDA environment
+ id="Q1-20-194">
@@ -106,9 +106,9 @@ class="cmbx-10">in.
Specified as: an integer variable.
psb_cuda_DeviceSync — Synchronize CUDA device
+ id="x20-157000">psb_cuda_DeviceSync — Synchronize CUDA device
+ id="Q1-20-196">
@@ -123,9 +123,9 @@ call psb_cuda_DeviceSync()
CUDA-side code, have completed.
psb_cuda_getDeviceCount
+ id="x20-158000">psb_cuda_getDeviceCount
+ id="Q1-20-198">
@@ -136,9 +136,9 @@ ngpus = psb_cuda_getDeviceCount()
Get number of devices available on current computing node.
psb_cuda_getDevice
+ id="x20-159000">psb_cuda_getDevice
+ id="Q1-20-200">
@@ -149,9 +149,9 @@ ngpus = psb_cuda_getDevice()
Get device in use by current process.
psb_cuda_setDevice
+ id="x20-160000">psb_cuda_setDevice
+ id="Q1-20-202">
@@ -165,9 +165,9 @@ info = psb_cuda_setDevice(dev)
Set device to be used by current process.
psb_cuda_DeviceHasUVA
+ id="x20-161000">psb_cuda_DeviceHasUVA
+ id="Q1-20-204">
@@ -178,9 +178,9 @@ hasUva = psb_cuda_DeviceHasUVA()
Returns true if device currently in use supports UVA (Unified Virtual Addressing).
psb_cuda_WarpSize
+ id="x20-162000">psb_cuda_WarpSize
+ id="Q1-20-206">
@@ -191,9 +191,9 @@ nw = psb_cuda_WarpSize()
Returns the warp size.
psb_cuda_MultiProcessors
+ id="x20-163000">psb_cuda_MultiProcessors
+ id="Q1-20-208">
@@ -207,9 +207,9 @@ nmp = psb_cuda_MultiProcessors()
Returns the number of multiprocessors in the CUDA device.
psb_cuda_MaxThreadsPerMP
+ id="x20-164000">psb_cuda_MaxThreadsPerMP
+ id="Q1-20-210">
@@ -220,9 +220,9 @@ nt = psb_cuda_MaxThreadsPerMP()
Returns the maximum number of threads per multiprocessor.
psb_cuda_MaxRegistersPerBlock
+ id="x20-165000">psb_cuda_MaxRegistersPerBlock
+ id="Q1-20-212">
@@ -233,9 +233,9 @@ nr = psb_cuda_MaxRegistersPerBlock()
Returns the maximum number of register per thread block.
psb_cuda_MemoryClockRate
+ id="x20-166000">psb_cuda_MemoryClockRate
+ id="Q1-20-214">
@@ -249,9 +249,9 @@ cl = psb_cuda_MemoryClockRate()
Returns the memory clock rate in KHz, as an integer.
psb_cuda_MemoryBusWidth
+ id="x20-167000">psb_cuda_MemoryBusWidth
+ id="Q1-20-216">
@@ -262,9 +262,9 @@ nb = psb_cuda_MemoryBusWidth()
Returns the memory bus width in bits.
psb_cuda_MemoryPeakBandwidth
+ id="x20-168000">psb_cuda_MemoryPeakBandwidth
+ id="Q1-20-218">
diff --git a/docs/psblas-3.9.pdf b/docs/psblas-3.9.pdf
index 6d104b9d..fe0bb60b 100644
--- a/docs/psblas-3.9.pdf
+++ b/docs/psblas-3.9.pdf
@@ -447,7 +447,7 @@ endobj
/Type /ObjStm
/N 100
/First 928
-/Length 16303
+/Length 16554
>>
stream
403 0 407 48 408 376 411 424 412 766 415 814 416 1030 419 1078 420 1241 423 1283
@@ -458,8 +458,8 @@ stream
504 7435 507 7482 508 7853 511 7900 512 8366 515 8408 516 8489 519 8536 520 8990 523 9037
524 9483 527 9530 528 9986 531 10033 532 10489 535 10536 536 10992 539 11039 540 11505 543 11548
544 11707 547 11755 548 11991 551 12039 552 12260 555 12308 556 12584 559 12632 560 12977 563 13025
-564 13271 567 13319 568 13525 571 13568 572 13697 575 13745 576 14035 579 14078 580 14169 583 14217
-584 14374 587 14422 588 14614 591 14662 592 14816 595 14864 596 15023 599 15066 600 15238 603 15281
+564 13271 567 13319 568 13525 571 13568 572 13697 575 13745 576 14035 579 14083 580 14423 583 14466
+584 14557 587 14605 588 14762 591 14810 592 15002 595 15050 596 15204 599 15252 600 15411 603 15454
% 403 0 obj
<< /S /GoTo /D (subsection.6.24) >>
% 407 0 obj
@@ -633,37 +633,37 @@ stream
% 575 0 obj
(\376\377\0001\0001\000.\0001\000\040\000p\000s\000b\000\137\000k\000r\000y\000l\000o\000v\000\040\000\040\040\024\000\040\000K\000r\000y\000l\000o\000v\000\040\000M\000e\000t\000h\000o\000d\000s\000\040\000D\000r\000i\000v\000e\000r\000\040\000R\000o\000u\000t\000i\000n\000e)
% 576 0 obj
-<< /S /GoTo /D (section.12) >>
+<< /S /GoTo /D (subsection.11.2) >>
% 579 0 obj
-(\376\377\0001\0002\000\040\000E\000x\000t\000e\000n\000s\000i\000o\000n\000s)
+(\376\377\0001\0001\000.\0002\000\040\000p\000s\000b\000\137\000r\000i\000c\000h\000a\000r\000d\000s\000o\000n\000\040\000\040\040\024\000\040\000R\000i\000c\000h\000a\000r\000d\000s\000o\000n\000\040\000I\000t\000e\000r\000a\000t\000i\000o\000n\000\040\000D\000r\000i\000v\000e\000r\000\040\000R\000o\000u\000t\000i\000n\000e)
% 580 0 obj
-<< /S /GoTo /D (subsection.12.1) >>
+<< /S /GoTo /D (section.12) >>
% 583 0 obj
-(\376\377\0001\0002\000.\0001\000\040\000U\000s\000i\000n\000g\000\040\000t\000h\000e\000\040\000e\000x\000t\000e\000n\000s\000i\000o\000n\000s)
+(\376\377\0001\0002\000\040\000E\000x\000t\000e\000n\000s\000i\000o\000n\000s)
% 584 0 obj
-<< /S /GoTo /D (subsection.12.2) >>
+<< /S /GoTo /D (subsection.12.1) >>
% 587 0 obj
-(\376\377\0001\0002\000.\0002\000\040\000E\000x\000t\000e\000n\000s\000i\000o\000n\000s\000'\000\040\000D\000a\000t\000a\000\040\000S\000t\000r\000u\000c\000t\000u\000r\000e\000s)
+(\376\377\0001\0002\000.\0001\000\040\000U\000s\000i\000n\000g\000\040\000t\000h\000e\000\040\000e\000x\000t\000e\000n\000s\000i\000o\000n\000s)
% 588 0 obj
-<< /S /GoTo /D (subsection.12.3) >>
+<< /S /GoTo /D (subsection.12.2) >>
% 591 0 obj
-(\376\377\0001\0002\000.\0003\000\040\000C\000P\000U\000-\000c\000l\000a\000s\000s\000\040\000e\000x\000t\000e\000n\000s\000i\000o\000n\000s)
+(\376\377\0001\0002\000.\0002\000\040\000E\000x\000t\000e\000n\000s\000i\000o\000n\000s\000'\000\040\000D\000a\000t\000a\000\040\000S\000t\000r\000u\000c\000t\000u\000r\000e\000s)
% 592 0 obj
-<< /S /GoTo /D (subsection.12.4) >>
+<< /S /GoTo /D (subsection.12.3) >>
% 595 0 obj
-(\376\377\0001\0002\000.\0004\000\040\000C\000U\000D\000A\000-\000c\000l\000a\000s\000s\000\040\000e\000x\000t\000e\000n\000s\000i\000o\000n\000s)
+(\376\377\0001\0002\000.\0003\000\040\000C\000P\000U\000-\000c\000l\000a\000s\000s\000\040\000e\000x\000t\000e\000n\000s\000i\000o\000n\000s)
% 596 0 obj
-<< /S /GoTo /D (section.13) >>
+<< /S /GoTo /D (subsection.12.4) >>
% 599 0 obj
-(\376\377\0001\0003\000\040\000C\000U\000D\000A\000\040\000E\000n\000v\000i\000r\000o\000n\000m\000e\000n\000t\000\040\000R\000o\000u\000t\000i\000n\000e\000s)
+(\376\377\0001\0002\000.\0004\000\040\000C\000U\000D\000A\000-\000c\000l\000a\000s\000s\000\040\000e\000x\000t\000e\000n\000s\000i\000o\000n\000s)
% 600 0 obj
-<< /S /GoTo /D (section*.6) >>
+<< /S /GoTo /D (section.13) >>
% 603 0 obj
-(\376\377\000p\000s\000b\000\137\000c\000u\000d\000a\000\137\000i\000n\000i\000t)
+(\376\377\0001\0003\000\040\000C\000U\000D\000A\000\040\000E\000n\000v\000i\000r\000o\000n\000m\000e\000n\000t\000\040\000R\000o\000u\000t\000i\000n\000e\000s)
endstream
endobj
-662 0 obj
+666 0 obj
<<
/Length 729
>>
@@ -708,7 +708,7 @@ ET
endstream
endobj
-659 0 obj
+663 0 obj
<<
/Type /XObject
/Subtype /Image
@@ -716,14 +716,14 @@ endobj
/Height 480
/BitsPerComponent 8
/ColorSpace /DeviceRGB
-/SMask 669 0 R
+/SMask 673 0 R
/Length 921600
>>
stream
ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ ÿÿÿþþþýýýýýýýýýþþþÿÿÿÿÿÿþþþþþþþþþþþþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿýýýûûûûûûûûûûûûúúúúúúøøø÷÷÷÷÷÷÷÷÷öööõõõõõõððððððððððððððððððíííùùùþþþÿÿÿ ÿÿÿþþþùùùæææäääááááááááááááááááááÝÝÝÜÜÜÜÜÜÜÜÜÜÜÜÜÜÜÛÛÛÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÖÔÔÔÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÑÍÍÍÌÌÌÌÌÌÌÌÌÌÌÌÌÌÌËËËÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÇÄÄĽ½½½½½½½½½½½½½½½½½ººº¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸¸³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÈÈÈÿÿÿÿÿÿ ÿÿÿÿÿÿÑÑѳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³èèèþþþÿÿÿ ÿÿÿþþþòòò´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¼¼¼ýýýþþþ ÿÿÿÿÿÿÈÈȳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÙÙÙÿÿÿÿÿÿ ÿÿÿþþþèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶öööþþþÿÿÿ ÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ËËËÿÿÿÿÿÿ ÿÿÿÿÿÿÞÞÞ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêÿÿÿÿÿÿ ÿÿÿûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¿¿¿þþþÿÿÿ ÿÿÿÿÿÿÔÔÔ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÝÝÝÿÿÿÿÿÿ ÿÿÿþþþöööµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···øøøÿÿÿÿÿÿ ÿÿÿÿÿÿËË˳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÏÏÏÿÿÿÿÿÿ ÿÿÿþþþííí³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííþþþÿÿÿ ÿÿÿÿÿÿ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂþþþÿÿÿ ÿÿÿþþþâââ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³àààÿÿÿÿÿÿ þþþýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúþþþÿÿÿ ÿÿÿÿÿÿÙÙÙ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÒÒÒÿÿÿÿÿÿ ÿÿÿþþþøøø¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ðððþþþÿÿÿ ÿÿÿÿÿÿÏÏϳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÄÄÄþþþÿÿÿ ÿÿÿþþþñññ´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³äääþþþÿÿÿ ÿÿÿÿÿÿÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººüüüÿÿÿ ÿÿÿþþþççç³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÖÖÖÿÿÿÿÿÿ ÿÿÿýýý¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³µµµôôôþþþÿÿÿ ÿÿÿþþþÜÜܳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÇÇÇÿÿÿÿÿÿ ÿÿÿûûû¸¸¸³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³èèèþþþÿÿÿ ÿÿÿÿÿÿÓÓÓ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¼¼¼ýýýþþþ ÿÿÿþþþôôô´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÙÙÙÿÿÿÿÿÿ ÿÿÿÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶öööþþþÿÿÿ ÿÿÿþþþëëë³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ËËËÿÿÿÿÿÿ ÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêÿÿÿÿÿÿ ÿÿÿÿÿÿààà³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¾¾¾þþþÿÿÿ ÿÿÿûûûººº³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÝÝÝÿÿÿÿÿÿ ÿÿÿÿÿÿ××׳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···øøøÿÿÿÿÿÿ ÿÿÿÿÿÿöööµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÏÏÏÿÿÿÿÿÿ ÿÿÿÿÿÿÍÍͳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííþþþÿÿÿ ÿÿÿþþþïïï³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂþþþÿÿÿ ÿÿÿþþþÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³àààÿÿÿÿÿÿ ÿÿÿÿÿÿäää³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúþþþÿÿÿ ÿÿÿýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÒÒÒÿÿÿÿÿÿ ÿÿÿÿÿÿÚÚÚ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ðððþþþÿÿÿ ÿÿÿþþþúúú···³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÄÄÄþþþÿÿÿ ÿÿÿÿÿÿÑÑѳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³äääþþþÿÿÿ ÿÿÿþþþòòò´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººüüüÿÿÿ ÿÿÿÿÿÿÈÈȳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÖÖÖÿÿÿÿÿÿ ÿÿÿÿÿÿèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ôôôþþþÿÿÿ ÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÇÇÇÿÿÿÿÿÿ ÿÿÿÿÿÿßßß³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³èèèþþþÿÿÿ ÿÿÿûûû¹¹¹³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¼¼¼ýýýþþþ ÿÿÿÿÿÿÕÕÕ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÙÙÙÿÿÿÿÿÿ ÿÿÿþþþöööµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶öööþþþÿÿÿ ÿÿÿÿÿÿËË˳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ËËËÿÿÿÿÿÿ ÿÿÿþþþííí³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêÿÿÿÿÿÿ ÿÿÿÿÿÿ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¾¾¾þþþÿÿÿ ÿÿÿþþþâââ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÜÜÜÿÿÿÿÿÿ þþþýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···øøøÿÿÿÿÿÿ ÿÿÿÿÿÿÙÙÙ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÏÏÏÿÿÿÿÿÿ ÿÿÿþþþøøø¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³íííþþþÿÿÿ ÿÿÿÿÿÿÏÏϳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÂÂÂþþþÿÿÿ ÿÿÿþþþñññ´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³àààÿÿÿÿÿÿ ÿÿÿÿÿÿÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¸¸¸úúúþþþÿÿÿ ÿÿÿþþþççç³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÒÒÒÿÿÿÿÿÿ ÿÿÿýýý¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ðððþþþÿÿÿ ÿÿÿþþþÜÜܳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÄÄÄþþþÿÿÿ þþþûûû¸¸¸³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³äääþþþÿÿÿ ÿÿÿÿÿÿÓÓÓ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ºººüüüÿÿÿ ÿÿÿþþþôôô´´´³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÖÖÖÿÿÿÿÿÿ ÿÿÿÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³´´´ôôôþþþÿÿÿ ÿÿÿþþþëëë³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÇÇÇÿÿÿÿÿÿ ÿÿÿþþþÀÀÀ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³èèèþþþÿÿÿ ÿÿÿÿÿÿááá³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¼¼¼ýýýþþþ ÿÿÿûûûººº³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÙÙÙÿÿÿÿÿÿ ÿÿÿÿÿÿ××׳³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¶¶¶öööþþþÿÿÿ ÿÿÿÿÿÿ÷÷÷¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ËËËÿÿÿÿÿÿ ÿÿÿÿÿÿÍÍͳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³êêêÿÿÿÿÿÿ ÿÿÿþþþïïï³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¾¾¾þþþÿÿÿ ÿÿÿþþþÅÅų³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÜÜÜÿÿÿÿÿÿ ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ ÿÿÿþþþäää³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···üüüûûýüüþýýþýýþüüþüüþüüþüüþÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿûûýòòøòòøòòøòòøòñøòñøéèóäãñäãñäãñãâðâáïàßîÓÑçÓÑçÓÑçÓÑçÑÏæÑÏæÉÇâ¿Þ¿Þ¿Þûûýÿÿÿ ÿÿÿýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÄÄÄýýþÑÎ棟ͣŸÍ£ŸÍ£ŸÍ£ŸÍ™”ȔŔŔŔŔŒŽÄ…€½…€½…€½…€½…€½…€½~x¹vpµvpµvpµvpµvpµvpµhbf`¬f`¬f`¬f`¬f`¬b[ªWP¤WP¤WP¤WP¤WP¤WP¤LDžH@œH@œH@œH@œH@œF>›90”90”90”90”90”90”0') ‹) ‹) ‹) ‹) ‹) ‹„ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ•Æÿÿÿÿÿÿ ÿÿÿÿÿÿààà³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³õõõùùü7.“ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ%‰éèóýýþÿÿÿ ÿÿÿÿÿÿ÷÷÷¶¶¶³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÐÐÐÿÿÿ’ŽÄƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒle¯ÿÿÿÿÿÿ ÿÿÿÿÿÿÚÚÚ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ùùúîíö'ŠƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒÇÅáüüýÿÿÿ ÿÿÿþþþ¾¾¾³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÞÞÞÿÿÿvpµƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒB9˜üüýÿÿÿ ÿÿÿÿÿÿèèè³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³···ûûüÛÚë…ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ œËþþþÿÿÿ ÿÿÿÿÿÿÉÉɳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³éééÿÿÿZS¦ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ) ‹ðï÷üüþÿÿÿ ÿÿÿþþþõõõµµµ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³¿¿¿üüþ½ºÛƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒvqµÿÿÿÿÿÿ ÿÿÿÿÿÿÖÖÖ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³òòòýýþA8˜ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ„ÐÎæüüþÿÿÿ þþþýýý¼¼¼³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÉÉÉþþþŸ›ËƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒMEŸþþþÿÿÿ ÿÿÿþþþäää³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³øøùóòø,#ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒª¦Ñýýþÿÿÿ ÿÿÿÿÿÿÆÆƳ³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³³ÙÙÙÿÿÿ|»ƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒƒ/&Žôôùþþÿ