From 10ec5eafabe405d9ee2ebb1c0be5a9bdca13f9c8 Mon Sep 17 00:00:00 2001 From: tloloum Date: Fri, 26 Jul 2024 09:56:32 +0200 Subject: [PATCH] ELL oacc impl --- openacc/impl/Makefile | 15 ++- openacc/impl/psb_d_oacc_ell_allocate_mnnz.F90 | 47 +++++++ openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 | 78 +++++++++++ openacc/impl/psb_d_oacc_ell_cp_from_fmt.F90 | 24 ++++ openacc/impl/psb_d_oacc_ell_csmm.F90 | 86 ++++++++++++ openacc/impl/psb_d_oacc_ell_csmv.F90 | 82 ++++++++++++ openacc/impl/psb_d_oacc_ell_inner_vect_sv.F90 | 85 ++++++++++++ openacc/impl/psb_d_oacc_ell_mold.F90 | 34 +++++ openacc/impl/psb_d_oacc_ell_mv_from_coo.F90 | 25 ++++ openacc/impl/psb_d_oacc_ell_mv_from_fmt.F90 | 24 ++++ openacc/impl/psb_d_oacc_ell_reallocate_nz.F90 | 28 ++++ openacc/impl/psb_d_oacc_ell_scal.F90 | 58 ++++++++ openacc/impl/psb_d_oacc_ell_scals.F90 | 39 ++++++ openacc/impl/psb_d_oacc_ell_vect_mv.F90 | 66 +++++++++ openacc/psb_d_oacc_ell_mat_mod.F90 | 125 ++++++++++++++++++ 15 files changed, 815 insertions(+), 1 deletion(-) create mode 100644 openacc/impl/psb_d_oacc_ell_allocate_mnnz.F90 create mode 100644 openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 create mode 100644 openacc/impl/psb_d_oacc_ell_cp_from_fmt.F90 create mode 100644 openacc/impl/psb_d_oacc_ell_csmm.F90 create mode 100644 openacc/impl/psb_d_oacc_ell_csmv.F90 create mode 100644 openacc/impl/psb_d_oacc_ell_inner_vect_sv.F90 create mode 100644 openacc/impl/psb_d_oacc_ell_mold.F90 create mode 100644 openacc/impl/psb_d_oacc_ell_mv_from_coo.F90 create mode 100644 openacc/impl/psb_d_oacc_ell_mv_from_fmt.F90 create mode 100644 openacc/impl/psb_d_oacc_ell_reallocate_nz.F90 create mode 100644 openacc/impl/psb_d_oacc_ell_scal.F90 create mode 100644 openacc/impl/psb_d_oacc_ell_scals.F90 create mode 100644 openacc/impl/psb_d_oacc_ell_vect_mv.F90 diff --git a/openacc/impl/Makefile b/openacc/impl/Makefile index 32f104dd..a38c703d 100755 --- a/openacc/impl/Makefile +++ b/openacc/impl/Makefile @@ -70,7 +70,20 @@ psb_z_oacc_csr_mv_from_coo.o \ psb_z_oacc_csr_mv_from_fmt.o \ psb_z_oacc_csr_mold.o \ psb_z_oacc_mlt_v_2.o \ -psb_z_oacc_mlt_v.o +psb_z_oacc_mlt_v.o \ +psb_d_oacc_ell_vect_mv.o \ +psb_d_oacc_ell_inner_vect_sv.o \ +psb_d_oacc_ell_csmm.o \ +psb_d_oacc_ell_csmv.o \ +psb_d_oacc_ell_scals.o \ +psb_d_oacc_ell_scal.o \ +psb_d_oacc_ell_reallocate_nz.o \ +psb_d_oacc_ell_allocate_mnnz.o \ +psb_d_oacc_ell_cp_from_coo.o \ +psb_d_oacc_ell_cp_from_fmt.o \ +psb_d_oacc_ell_mv_from_coo.o \ +psb_d_oacc_ell_mv_from_fmt.o \ +psb_d_oacc_ell_mold.o \ objs: $(OBJS) diff --git a/openacc/impl/psb_d_oacc_ell_allocate_mnnz.F90 b/openacc/impl/psb_d_oacc_ell_allocate_mnnz.F90 new file mode 100644 index 00000000..4923e12c --- /dev/null +++ b/openacc/impl/psb_d_oacc_ell_allocate_mnnz.F90 @@ -0,0 +1,47 @@ +submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_allocate_mnnz_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_ell_allocate_mnnz(m, n, a, nz) + implicit none + integer(psb_ipk_), intent(in) :: m, n + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act, nz_ + character(len=20) :: name='allocate_mnnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(nz)) then + nz_ = nz + else + nz_ = 10 + end if + + call a%psb_d_ell_sparse_mat%allocate(m, n, nz_) + + if (.not.allocated(a%val)) then + allocate(a%val(m, nz_)) + allocate(a%ja(m, nz_)) + allocate(a%irn(m)) + allocate(a%idiag(m)) + end if + + a%val = 0.0_psb_dpk_ + a%ja = -1 + a%irn = 0 + a%idiag = 0 + + call a%set_dev() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_ell_allocate_mnnz +end submodule psb_d_oacc_ell_allocate_mnnz_impl diff --git a/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 b/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 new file mode 100644 index 00000000..560d3451 --- /dev/null +++ b/openacc/impl/psb_d_oacc_ell_cp_from_coo.F90 @@ -0,0 +1,78 @@ +submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_cp_from_coo_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_ell_cp_from_coo(a, b, info) + implicit none + + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, j, k, row, col, nz_per_row + real(psb_dpk_) :: value + integer(psb_ipk_), allocatable :: row_counts(:) + integer(psb_ipk_) :: hacksize, nza + + info = psb_success_ + hacksize = 1 + + call a%set_nrows(b%get_nrows()) + call a%set_ncols(b%get_ncols()) + nz_per_row = a%nzt + + if (.not.allocated(a%val)) then + allocate(a%val(a%get_nrows(), nz_per_row)) + allocate(a%ja(a%get_nrows(), nz_per_row)) + allocate(a%irn(a%get_nrows())) + allocate(a%idiag(a%get_nrows())) + end if + a%val = 0.0_psb_dpk_ + a%ja = -1 + a%irn = 0 + a%idiag = 0 + + allocate(row_counts(a%get_nrows())) + row_counts = 0 + + nza = b%get_nzeros() + + !$acc parallel loop present(b, a, row_counts) + do k = 1, nza + row = b%ia(k) + col = b%ja(k) + value = b%val(k) + if (row_counts(row) < nz_per_row) then + a%val(row, row_counts(row) + 1) = value + a%ja(row, row_counts(row) + 1) = col + row_counts(row) = row_counts(row) + 1 + else + info = psb_err_invalid_mat_state_ + goto 9999 + end if + end do + + a%irn = row_counts + + !$acc parallel loop present(a) + do i = 1, a%get_nrows() + do j = 1, nz_per_row + if (a%ja(i, j) == i) then + a%idiag(i) = j + exit + end if + end do + end do + + deallocate(row_counts) + + call a%set_dev() + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_d_oacc_ell_cp_from_coo +end submodule psb_d_oacc_ell_cp_from_coo_impl diff --git a/openacc/impl/psb_d_oacc_ell_cp_from_fmt.F90 b/openacc/impl/psb_d_oacc_ell_cp_from_fmt.F90 new file mode 100644 index 00000000..b62a40d5 --- /dev/null +++ b/openacc/impl/psb_d_oacc_ell_cp_from_fmt.F90 @@ -0,0 +1,24 @@ +submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_cp_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_ell_cp_from_fmt(a, b, info) + implicit none + + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_d_coo_sparse_mat) + call a%cp_from_coo(b, info) + class default + call a%psb_d_ell_sparse_mat%cp_from_fmt(b, info) + if (info /= 0) return + + !$acc update device(a%val, a%ja, a%irn, a%idiag) + end select + + end subroutine psb_d_oacc_ell_cp_from_fmt +end submodule psb_d_oacc_ell_cp_from_fmt_impl diff --git a/openacc/impl/psb_d_oacc_ell_csmm.F90 b/openacc/impl/psb_d_oacc_ell_csmm.F90 new file mode 100644 index 00000000..6515a306 --- /dev/null +++ b/openacc/impl/psb_d_oacc_ell_csmm.F90 @@ -0,0 +1,86 @@ +submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_csmm_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_ell_csmm(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + real(psb_dpk_), intent(in) :: x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i, j, m, n, k, nxy, nzt + logical :: tra + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'd_oacc_ell_csmm' + logical, parameter :: debug = .false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) < n) then + info = 36 + call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) + goto 9999 + end if + + if (size(y,1) < m) then + info = 36 + call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) + goto 9999 + end if + + if (tra) then + call a%psb_d_ell_sparse_mat%spmm(alpha, x, beta, y, info, trans) + else + nxy = min(size(x,2), size(y,2)) + nzt = a%nzt + + !$acc parallel loop collapse(2) present(a, x, y) + do j = 1, nxy + do i = 1, m + y(i,j) = beta * y(i,j) + end do + end do + + !$acc parallel loop collapse(2) present(a, x, y) + do j = 1, nxy + do i = 1, n + do k = 1, nzt + y(i, j) = y(i, j) + alpha * a%val(i, k) * x(a%ja(i, k), j) + end do + end do + end do + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_ell_csmm +end submodule psb_d_oacc_ell_csmm_impl diff --git a/openacc/impl/psb_d_oacc_ell_csmv.F90 b/openacc/impl/psb_d_oacc_ell_csmv.F90 new file mode 100644 index 00000000..8d5e2aaa --- /dev/null +++ b/openacc/impl/psb_d_oacc_ell_csmv.F90 @@ -0,0 +1,82 @@ +submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_csmv_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_ell_csmv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + real(psb_dpk_), intent(in) :: x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i, j, m, n, nzt + logical :: tra + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'd_oacc_ell_csmv' + logical, parameter :: debug = .false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) < n) then + info = 36 + call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) + goto 9999 + end if + + if (size(y,1) < m) then + info = 36 + call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) + goto 9999 + end if + + if (tra) then + call a%psb_d_ell_sparse_mat%spmm(alpha, x, beta, y, info, trans) + else + nzt = a%nzt + + !$acc parallel loop present(a, x, y) + do i = 1, m + y(i) = beta * y(i) + end do + + !$acc parallel loop present(a, x, y) + do i = 1, m + do j = 1, nzt + y(i) = y(i) + alpha * a%val(i, j) * x(a%ja(i, j)) + end do + end do + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_ell_csmv +end submodule psb_d_oacc_ell_csmv_impl diff --git a/openacc/impl/psb_d_oacc_ell_inner_vect_sv.F90 b/openacc/impl/psb_d_oacc_ell_inner_vect_sv.F90 new file mode 100644 index 00000000..2c5563ce --- /dev/null +++ b/openacc/impl/psb_d_oacc_ell_inner_vect_sv.F90 @@ -0,0 +1,85 @@ +submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_inner_vect_sv_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_ell_inner_vect_sv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + real(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'd_oacc_ell_inner_vect_sv' + logical, parameter :: debug = .false. + integer(psb_ipk_) :: i, j, nzt + + call psb_get_erraction(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra .or. (beta /= dzero)) then + call x%sync() + call y%sync() + call a%psb_d_ell_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans) + call y%set_host() + else + select type (xx => x) + type is (psb_d_vect_oacc) + select type(yy => y) + type is (psb_d_vect_oacc) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + nzt = a%nzt + !$acc parallel loop present(a, xx, yy) + do i = 1, size(a%val, 1) + do j = 1, nzt + yy%v(i) = alpha * a%val(i, j) * xx%v(a%ja(i, j)) + beta * yy%v(i) + end do + end do + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_d_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_d_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + endif + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info, name, a_err = 'ell_vect_sv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + end subroutine psb_d_oacc_ell_inner_vect_sv +end submodule psb_d_oacc_ell_inner_vect_sv_impl diff --git a/openacc/impl/psb_d_oacc_ell_mold.F90 b/openacc/impl/psb_d_oacc_ell_mold.F90 new file mode 100644 index 00000000..767e7f13 --- /dev/null +++ b/openacc/impl/psb_d_oacc_ell_mold.F90 @@ -0,0 +1,34 @@ +submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_mold_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_ell_mold(a, b, info) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'ell_mold' + logical, parameter :: debug = .false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b, stat=info) + end if + if (info == 0) allocate(psb_d_oacc_ell_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + + end subroutine psb_d_oacc_ell_mold +end submodule psb_d_oacc_ell_mold_impl diff --git a/openacc/impl/psb_d_oacc_ell_mv_from_coo.F90 b/openacc/impl/psb_d_oacc_ell_mv_from_coo.F90 new file mode 100644 index 00000000..688d182b --- /dev/null +++ b/openacc/impl/psb_d_oacc_ell_mv_from_coo.F90 @@ -0,0 +1,25 @@ +submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_mv_from_coo_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_ell_mv_from_coo(a, b, info) + implicit none + + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_d_ell_sparse_mat%mv_from_coo(b, info) + if (info /= 0) goto 9999 + + !$acc update device(a%val, a%ja, a%irn, a%idiag) + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_d_oacc_ell_mv_from_coo +end submodule psb_d_oacc_ell_mv_from_coo_impl diff --git a/openacc/impl/psb_d_oacc_ell_mv_from_fmt.F90 b/openacc/impl/psb_d_oacc_ell_mv_from_fmt.F90 new file mode 100644 index 00000000..8bda6e6e --- /dev/null +++ b/openacc/impl/psb_d_oacc_ell_mv_from_fmt.F90 @@ -0,0 +1,24 @@ +submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_mv_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_ell_mv_from_fmt(a, b, info) + implicit none + + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_d_coo_sparse_mat) + call a%mv_from_coo(b, info) + class default + call a%psb_d_ell_sparse_mat%mv_from_fmt(b, info) + if (info /= 0) return + + !$acc update device(a%val, a%ja, a%irn, a%idiag) + end select + + end subroutine psb_d_oacc_ell_mv_from_fmt +end submodule psb_d_oacc_ell_mv_from_fmt_impl diff --git a/openacc/impl/psb_d_oacc_ell_reallocate_nz.F90 b/openacc/impl/psb_d_oacc_ell_reallocate_nz.F90 new file mode 100644 index 00000000..11332472 --- /dev/null +++ b/openacc/impl/psb_d_oacc_ell_reallocate_nz.F90 @@ -0,0 +1,28 @@ +submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_reallocate_nz_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_ell_reallocate_nz(nz, a) + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_oacc_ell_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_d_ell_sparse_mat%reallocate(nz) + + call a%set_dev() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_ell_reallocate_nz +end submodule psb_d_oacc_ell_reallocate_nz_impl diff --git a/openacc/impl/psb_d_oacc_ell_scal.F90 b/openacc/impl/psb_d_oacc_ell_scal.F90 new file mode 100644 index 00000000..39948d5f --- /dev/null +++ b/openacc/impl/psb_d_oacc_ell_scal.F90 @@ -0,0 +1,58 @@ +submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_scal_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_ell_scal(d, a, info, side) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i, j, m, nzt + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + m = a%get_nrows() + nzt = a%nzt + + if (present(side)) then + if (side == 'L') then + !$acc parallel loop collapse(2) present(a, d) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d(i) + end do + end do + else if (side == 'R') then + !$acc parallel loop collapse(2) present(a, d) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d(a%ja(i, j)) + end do + end do + end if + else + !$acc parallel loop collapse(2) present(a, d) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d(j) + end do + end do + end if + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_ell_scal +end submodule psb_d_oacc_ell_scal_impl diff --git a/openacc/impl/psb_d_oacc_ell_scals.F90 b/openacc/impl/psb_d_oacc_ell_scals.F90 new file mode 100644 index 00000000..a6292b72 --- /dev/null +++ b/openacc/impl/psb_d_oacc_ell_scals.F90 @@ -0,0 +1,39 @@ +submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_scals_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_ell_scals(d, a, info) + implicit none + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i, j, nzt, m + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + m = a%get_nrows() + nzt = a%nzt + + !$acc parallel loop collapse(2) present(a) + do i = 1, m + do j = 1, nzt + a%val(i, j) = a%val(i, j) * d + end do + end do + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_ell_scals +end submodule psb_d_oacc_ell_scals_impl diff --git a/openacc/impl/psb_d_oacc_ell_vect_mv.F90 b/openacc/impl/psb_d_oacc_ell_vect_mv.F90 new file mode 100644 index 00000000..ebcdb405 --- /dev/null +++ b/openacc/impl/psb_d_oacc_ell_vect_mv.F90 @@ -0,0 +1,66 @@ +submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_vect_mv_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans) + implicit none + + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: m, n, nzt + + info = psb_success_ + m = a%get_nrows() + n = a%get_ncols() + nzt = a%nzt + + if ((n /= size(x%v)) .or. (m /= size(y%v))) then + write(0,*) 'Size error ', m, n, size(x%v), size(y%v) + info = psb_err_invalid_mat_state_ + return + end if + + if (a%is_host()) call a%sync() + if (x%is_host()) call x%sync() + if (y%is_host()) call y%sync() + + call inner_spmv(m, n, nzt, alpha, a%val, a%ja, x%v, beta, y%v, info) + call y%set_dev() + + contains + + subroutine inner_spmv(m, n, nzt, alpha, val, ja, x, beta, y, info) + implicit none + integer(psb_ipk_) :: m, n, nzt + real(psb_dpk_), intent(in) :: alpha, beta + real(psb_dpk_) :: val(:,:), x(:), y(:) + integer(psb_ipk_) :: ja(:,:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, j, ii, isz + real(psb_dpk_) :: tmp + integer(psb_ipk_), parameter :: vsz = 256 + + info = 0 + + !$acc parallel loop vector_length(vsz) private(isz) + do ii = 1, m, vsz + isz = min(vsz, m - ii + 1) + !$acc loop independent private(tmp) + do i = ii, ii + isz - 1 + tmp = 0.0_psb_dpk_ + !$acc loop seq + do j = 1, nzt + if (ja(i,j) > 0) then + tmp = tmp + val(i,j) * x(ja(i,j)) + end if + end do + y(i) = alpha * tmp + beta * y(i) + end do + end do + end subroutine inner_spmv + + end subroutine psb_d_oacc_ell_vect_mv +end submodule psb_d_oacc_ell_vect_mv_impl diff --git a/openacc/psb_d_oacc_ell_mat_mod.F90 b/openacc/psb_d_oacc_ell_mat_mod.F90 index 846707af..8c5946ba 100644 --- a/openacc/psb_d_oacc_ell_mat_mod.F90 +++ b/openacc/psb_d_oacc_ell_mat_mod.F90 @@ -22,9 +22,134 @@ module psb_d_oacc_ell_mat_mod procedure, pass(a) :: sync_space => d_oacc_ell_sync_space procedure, pass(a) :: sync => d_oacc_ell_sync procedure, pass(a) :: free => d_oacc_ell_free + procedure, pass(a) :: vect_mv => psb_d_oacc_ell_vect_mv + procedure, pass(a) :: in_vect_sv => psb_d_oacc_ell_inner_vect_sv + procedure, pass(a) :: csmm => psb_d_oacc_ell_csmm + procedure, pass(a) :: csmv => psb_d_oacc_ell_csmv + procedure, pass(a) :: scals => psb_d_oacc_ell_scals + procedure, pass(a) :: scalv => psb_d_oacc_ell_scal + procedure, pass(a) :: reallocate_nz => psb_d_oacc_ell_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_oacc_ell_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_d_oacc_ell_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_oacc_ell_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_oacc_ell_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_oacc_ell_mv_from_fmt + procedure, pass(a) :: mold => psb_d_oacc_ell_mold end type psb_d_oacc_ell_sparse_mat + interface + module subroutine psb_d_oacc_ell_mold(a,b,info) + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_ell_mold + end interface + + interface + module subroutine psb_d_oacc_ell_cp_from_fmt(a,b,info) + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_ell_cp_from_fmt + end interface + + interface + module subroutine psb_d_oacc_ell_mv_from_coo(a,b,info) + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_ell_mv_from_coo + end interface + + interface + module subroutine psb_d_oacc_ell_mv_from_fmt(a,b,info) + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_ell_mv_from_fmt + end interface + + interface + module subroutine psb_d_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_ell_vect_mv + end interface + + interface + module subroutine psb_d_oacc_ell_inner_vect_sv(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x,y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_ell_inner_vect_sv + end interface + + interface + module subroutine psb_d_oacc_ell_csmm(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_ell_csmm + end interface + + interface + module subroutine psb_d_oacc_ell_csmv(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_ell_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_ell_csmv + end interface + + interface + module subroutine psb_d_oacc_ell_scals(d, a, info) + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_ell_scals + end interface + + interface + module subroutine psb_d_oacc_ell_scal(d,a,info,side) + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: side + end subroutine psb_d_oacc_ell_scal + end interface + + interface + module subroutine psb_d_oacc_ell_reallocate_nz(nz,a) + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + end subroutine psb_d_oacc_ell_reallocate_nz + end interface + + interface + module subroutine psb_d_oacc_ell_allocate_mnnz(m,n,a,nz) + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: m,n + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_oacc_ell_allocate_mnnz + end interface + + interface + module subroutine psb_d_oacc_ell_cp_from_coo(a,b,info) + class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_ell_cp_from_coo + end interface + contains subroutine d_oacc_ell_free(a)