From e6fa1d17a2f134b792f7968d2a95bb61a4a95e84 Mon Sep 17 00:00:00 2001 From: tloloum Date: Tue, 30 Jul 2024 14:25:01 +0200 Subject: [PATCH] oacc hll --- openacc/Makefile | 7 +- openacc/impl/Makefile | 13 + openacc/impl/psb_d_oacc_hll_allocate_mnnz.F90 | 53 +++ openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 | 85 +++++ openacc/impl/psb_d_oacc_hll_cp_from_fmt.F90 | 24 ++ openacc/impl/psb_d_oacc_hll_csmm.F90 | 86 +++++ openacc/impl/psb_d_oacc_hll_csmv.F90 | 83 +++++ openacc/impl/psb_d_oacc_hll_inner_vect_sv.F90 | 86 +++++ openacc/impl/psb_d_oacc_hll_mold.F90 | 34 ++ openacc/impl/psb_d_oacc_hll_mv_from_coo.F90 | 25 ++ openacc/impl/psb_d_oacc_hll_mv_from_fmt.F90 | 24 ++ openacc/impl/psb_d_oacc_hll_reallocate_nz.F90 | 29 ++ openacc/impl/psb_d_oacc_hll_scal.F90 | 59 +++ openacc/impl/psb_d_oacc_hll_scals.F90 | 39 ++ openacc/impl/psb_d_oacc_hll_vect_mv.F90 | 67 ++++ openacc/psb_d_oacc_hll_mat_mod.F90 | 352 ++++++++++++++++++ 16 files changed, 1063 insertions(+), 3 deletions(-) create mode 100644 openacc/impl/psb_d_oacc_hll_allocate_mnnz.F90 create mode 100644 openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 create mode 100644 openacc/impl/psb_d_oacc_hll_cp_from_fmt.F90 create mode 100644 openacc/impl/psb_d_oacc_hll_csmm.F90 create mode 100644 openacc/impl/psb_d_oacc_hll_csmv.F90 create mode 100644 openacc/impl/psb_d_oacc_hll_inner_vect_sv.F90 create mode 100644 openacc/impl/psb_d_oacc_hll_mold.F90 create mode 100644 openacc/impl/psb_d_oacc_hll_mv_from_coo.F90 create mode 100644 openacc/impl/psb_d_oacc_hll_mv_from_fmt.F90 create mode 100644 openacc/impl/psb_d_oacc_hll_reallocate_nz.F90 create mode 100644 openacc/impl/psb_d_oacc_hll_scal.F90 create mode 100644 openacc/impl/psb_d_oacc_hll_scals.F90 create mode 100644 openacc/impl/psb_d_oacc_hll_vect_mv.F90 create mode 100644 openacc/psb_d_oacc_hll_mat_mod.F90 diff --git a/openacc/Makefile b/openacc/Makefile index cba6b1f7..e7e99072 100644 --- a/openacc/Makefile +++ b/openacc/Makefile @@ -22,7 +22,7 @@ FOBJS= psb_i_oacc_vect_mod.o \ psb_d_oacc_vect_mod.o psb_d_oacc_csr_mat_mod.o \ psb_c_oacc_vect_mod.o psb_c_oacc_csr_mat_mod.o \ psb_z_oacc_vect_mod.o psb_z_oacc_csr_mat_mod.o \ - psb_d_oacc_ell_mat_mod.o \ + psb_d_oacc_ell_mat_mod.o psb_d_oacc_hll_mat_mod.o\ psb_oacc_mod.o psb_oacc_env_mod.o @@ -49,7 +49,7 @@ psb_oacc_mod.o : psb_i_oacc_vect_mod.o \ psb_d_oacc_vect_mod.o psb_d_oacc_csr_mat_mod.o \ psb_c_oacc_vect_mod.o psb_c_oacc_csr_mat_mod.o \ psb_z_oacc_vect_mod.o psb_z_oacc_csr_mat_mod.o \ - psb_d_oacc_ell_mat_mod.o \ + psb_d_oacc_ell_mat_mod.o psb_d_oacc_hll_mat_mod.o \ psb_oacc_env_mod.o psb_s_oacc_vect_mod.o psb_d_oacc_vect_mod.o \ @@ -61,10 +61,11 @@ psb_d_oacc_csr_mat_mod.o: psb_d_oacc_vect_mod.o psb_c_oacc_csr_mat_mod.o: psb_c_oacc_vect_mod.o psb_z_oacc_csr_mat_mod.o: psb_z_oacc_vect_mod.o psb_d_oacc_ell_mat_mod.o: psb_d_oacc_vect_mod.o +psb_d_oacc_hll_mat_mod.o: psb_d_oacc_vect_mod.o clean: cclean iclean - /bin/rm -f $(FOBJS) *$(.mod) *.a + /bin/rm -f $(FOBJS) *$(.mod) *.a *.smod veryclean: clean cclean: /bin/rm -f $(COBJS) diff --git a/openacc/impl/Makefile b/openacc/impl/Makefile index a38c703d..c0a157e8 100755 --- a/openacc/impl/Makefile +++ b/openacc/impl/Makefile @@ -84,6 +84,19 @@ psb_d_oacc_ell_cp_from_fmt.o \ psb_d_oacc_ell_mv_from_coo.o \ psb_d_oacc_ell_mv_from_fmt.o \ psb_d_oacc_ell_mold.o \ +psb_d_oacc_hll_mold.o \ +psb_d_oacc_hll_mv_from_fmt.o \ +psb_d_oacc_hll_mv_from_coo.o \ +psb_d_oacc_hll_cp_from_fmt.o \ +psb_d_oacc_hll_cp_from_coo.o \ +psb_d_oacc_hll_allocate_mnnz.o \ +psb_d_oacc_hll_reallocate_nz.o \ +psb_d_oacc_hll_scal.o \ +psb_d_oacc_hll_scals.o \ +psb_d_oacc_hll_csmv.o \ +psb_d_oacc_hll_csmm.o \ +psb_d_oacc_hll_inner_vect_sv.o \ +psb_d_oacc_hll_vect_mv.o \ objs: $(OBJS) diff --git a/openacc/impl/psb_d_oacc_hll_allocate_mnnz.F90 b/openacc/impl/psb_d_oacc_hll_allocate_mnnz.F90 new file mode 100644 index 00000000..909ee90b --- /dev/null +++ b/openacc/impl/psb_d_oacc_hll_allocate_mnnz.F90 @@ -0,0 +1,53 @@ +submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_allocate_mnnz_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_hll_allocate_mnnz(m, n, a, nz) + implicit none + integer(psb_ipk_), intent(in) :: m, n + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act, nz_ + character(len=20) :: name='allocate_mnnz' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: hksz, nhacks + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(nz)) then + nz_ = nz + else + nz_ = 10 + end if + + call a%psb_d_hll_sparse_mat%allocate(m, n, nz_) + + hksz = a%hksz + nhacks = (m + hksz - 1) / hksz + + if (.not.allocated(a%val)) then + allocate(a%val(nz_ * m)) + allocate(a%ja(nz_ * m)) + allocate(a%irn(m)) + allocate(a%idiag(m)) + allocate(a%hkoffs(nhacks)) + end if + + a%val = 0.0_psb_dpk_ + a%ja = -1 + a%irn = 0 + a%idiag = 0 + a%hkoffs = 0 + + call a%set_dev() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_hll_allocate_mnnz +end submodule psb_d_oacc_hll_allocate_mnnz_impl diff --git a/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 b/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 new file mode 100644 index 00000000..fbe793d5 --- /dev/null +++ b/openacc/impl/psb_d_oacc_hll_cp_from_coo.F90 @@ -0,0 +1,85 @@ +submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_cp_from_coo_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_hll_cp_from_coo(a, b, info) + implicit none + + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: i, j, k, row, col, nz_per_row + real(psb_dpk_) :: value + integer(psb_ipk_), allocatable :: row_counts(:) + integer(psb_ipk_) :: hacksize, nza + + info = psb_success_ + hacksize = 32 ! Assuming a default hack size of 32 + + call a%set_nrows(b%get_nrows()) + call a%set_ncols(b%get_ncols()) + nz_per_row = a%nzt + + if (.not.allocated(a%val)) then + allocate(a%val(nz_per_row * a%get_nrows())) + allocate(a%ja(nz_per_row * a%get_nrows())) + allocate(a%irn(a%get_nrows())) + allocate(a%idiag(a%get_nrows())) + allocate(a%hkoffs((a%get_nrows() + hacksize - 1) / hacksize)) + end if + a%val = 0.0_psb_dpk_ + a%ja = -1 + a%irn = 0 + a%idiag = 0 + + allocate(row_counts(a%get_nrows())) + row_counts = 0 + + nza = b%get_nzeros() + + !$acc parallel loop present(b, a, row_counts) + do k = 1, nza + row = b%ia(k) + col = b%ja(k) + value = b%val(k) + if (row_counts(row) < nz_per_row) then + a%val(row_counts(row) + 1 + (row - 1) * nz_per_row) = value + a%ja(row_counts(row) + 1 + (row - 1) * nz_per_row) = col + row_counts(row) = row_counts(row) + 1 + else + info = psb_err_invalid_mat_state_ + goto 9999 + end if + end do + + a%irn = row_counts + + !$acc parallel loop present(a) + do i = 1, a%get_nrows() + do j = 1, nz_per_row + if (a%ja(j + (i - 1) * nz_per_row) == i) then + a%idiag(i) = j + exit + end if + end do + end do + + ! Calculate hkoffs for HLL format + !$acc parallel loop present(a) + do i = 1, size(a%hkoffs) + a%hkoffs(i) = (i - 1) * hacksize + end do + + deallocate(row_counts) + + call a%set_dev() + if (info /= 0) goto 9999 + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_d_oacc_hll_cp_from_coo +end submodule psb_d_oacc_hll_cp_from_coo_impl diff --git a/openacc/impl/psb_d_oacc_hll_cp_from_fmt.F90 b/openacc/impl/psb_d_oacc_hll_cp_from_fmt.F90 new file mode 100644 index 00000000..fb99737c --- /dev/null +++ b/openacc/impl/psb_d_oacc_hll_cp_from_fmt.F90 @@ -0,0 +1,24 @@ +submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_cp_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_hll_cp_from_fmt(a, b, info) + implicit none + + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_d_coo_sparse_mat) + call a%cp_from_coo(b, info) + class default + call a%psb_d_hll_sparse_mat%cp_from_fmt(b, info) + if (info /= 0) return + + !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + end select + + end subroutine psb_d_oacc_hll_cp_from_fmt +end submodule psb_d_oacc_hll_cp_from_fmt_impl diff --git a/openacc/impl/psb_d_oacc_hll_csmm.F90 b/openacc/impl/psb_d_oacc_hll_csmm.F90 new file mode 100644 index 00000000..2dd6b53b --- /dev/null +++ b/openacc/impl/psb_d_oacc_hll_csmm.F90 @@ -0,0 +1,86 @@ +submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_csmm_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_hll_csmm(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + real(psb_dpk_), intent(in) :: x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i, j, m, n, k, nxy, nhacks + logical :: tra + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'd_oacc_hll_csmm' + logical, parameter :: debug = .false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) < n) then + info = 36 + call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) + goto 9999 + end if + + if (size(y,1) < m) then + info = 36 + call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) + goto 9999 + end if + + if (tra) then + call a%psb_d_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans) + else + nxy = min(size(x,2), size(y,2)) + nhacks = (a%get_nrows() + a%hksz - 1) / a%hksz + + !$acc parallel loop collapse(2) present(a, x, y) + do j = 1, nxy + do i = 1, m + y(i,j) = beta * y(i,j) + end do + end do + + !$acc parallel loop present(a, x, y) + do j = 1, nxy + do k = 1, nhacks + do i = a%hkoffs(k), a%hkoffs(k + 1) - 1 + y(a%irn(i), j) = y(a%irn(i), j) + alpha * a%val(i) * x(a%ja(i), j) + end do + end do + end do + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_hll_csmm +end submodule psb_d_oacc_hll_csmm_impl diff --git a/openacc/impl/psb_d_oacc_hll_csmv.F90 b/openacc/impl/psb_d_oacc_hll_csmv.F90 new file mode 100644 index 00000000..504cad19 --- /dev/null +++ b/openacc/impl/psb_d_oacc_hll_csmv.F90 @@ -0,0 +1,83 @@ +submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_csmv_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_hll_csmv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + real(psb_dpk_), intent(in) :: x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i, j, m, n, hksz, nhacks + logical :: tra + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'd_oacc_hll_csmv' + logical, parameter :: debug = .false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) < n) then + info = 36 + call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) + goto 9999 + end if + + if (size(y,1) < m) then + info = 36 + call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) + goto 9999 + end if + + if (tra) then + call a%psb_d_hll_sparse_mat%spmm(alpha, x, beta, y, info, trans) + else + hksz = a%hksz + nhacks = (a%get_nrows() + hksz - 1) / hksz + + !$acc parallel loop present(a, x, y) + do i = 1, m + y(i) = beta * y(i) + end do + + !$acc parallel loop collapse(2) present(a, x, y) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + y(a%irn(j)) = y(a%irn(j)) + alpha * a%val(j) * x(a%ja(j)) + end do + end do + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_hll_csmv +end submodule psb_d_oacc_hll_csmv_impl diff --git a/openacc/impl/psb_d_oacc_hll_inner_vect_sv.F90 b/openacc/impl/psb_d_oacc_hll_inner_vect_sv.F90 new file mode 100644 index 00000000..ae1c3c94 --- /dev/null +++ b/openacc/impl/psb_d_oacc_hll_inner_vect_sv.F90 @@ -0,0 +1,86 @@ +submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_inner_vect_sv_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + real(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'd_oacc_hll_inner_vect_sv' + logical, parameter :: debug = .false. + integer(psb_ipk_) :: i, j, nhacks, hksz + + call psb_get_erraction(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra .or. (beta /= dzero)) then + call x%sync() + call y%sync() + call a%psb_d_hll_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans) + call y%set_host() + else + select type (xx => x) + type is (psb_d_vect_oacc) + select type(yy => y) + type is (psb_d_vect_oacc) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + nhacks = size(a%hkoffs) - 1 + hksz = a%hksz + !$acc parallel loop present(a, xx, yy) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i+1) - 1 + yy%v(a%irn(j)) = alpha * a%val(j) * xx%v(a%ja(j)) + beta * yy%v(a%irn(j)) + end do + end do + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_d_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_d_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + endif + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info, name, a_err = 'hll_vect_sv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + end subroutine psb_d_oacc_hll_inner_vect_sv +end submodule psb_d_oacc_hll_inner_vect_sv_impl diff --git a/openacc/impl/psb_d_oacc_hll_mold.F90 b/openacc/impl/psb_d_oacc_hll_mold.F90 new file mode 100644 index 00000000..89ead65b --- /dev/null +++ b/openacc/impl/psb_d_oacc_hll_mold.F90 @@ -0,0 +1,34 @@ +submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_mold_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_hll_mold(a, b, info) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'hll_mold' + logical, parameter :: debug = .false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b, stat=info) + end if + if (info == 0) allocate(psb_d_oacc_hll_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + +9999 call psb_error_handler(err_act) + + return + + end subroutine psb_d_oacc_hll_mold +end submodule psb_d_oacc_hll_mold_impl diff --git a/openacc/impl/psb_d_oacc_hll_mv_from_coo.F90 b/openacc/impl/psb_d_oacc_hll_mv_from_coo.F90 new file mode 100644 index 00000000..7bf22c13 --- /dev/null +++ b/openacc/impl/psb_d_oacc_hll_mv_from_coo.F90 @@ -0,0 +1,25 @@ +submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_mv_from_coo_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_hll_mv_from_coo(a, b, info) + implicit none + + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_d_hll_sparse_mat%mv_from_coo(b, info) + if (info /= 0) goto 9999 + + !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + + return + +9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_d_oacc_hll_mv_from_coo +end submodule psb_d_oacc_hll_mv_from_coo_impl diff --git a/openacc/impl/psb_d_oacc_hll_mv_from_fmt.F90 b/openacc/impl/psb_d_oacc_hll_mv_from_fmt.F90 new file mode 100644 index 00000000..e6615365 --- /dev/null +++ b/openacc/impl/psb_d_oacc_hll_mv_from_fmt.F90 @@ -0,0 +1,24 @@ +submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_mv_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_hll_mv_from_fmt(a, b, info) + implicit none + + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_d_coo_sparse_mat) + call a%mv_from_coo(b, info) + class default + call a%psb_d_hll_sparse_mat%mv_from_fmt(b, info) + if (info /= 0) return + + !$acc update device(a%val, a%ja, a%irn, a%idiag, a%hkoffs) + end select + + end subroutine psb_d_oacc_hll_mv_from_fmt +end submodule psb_d_oacc_hll_mv_from_fmt_impl diff --git a/openacc/impl/psb_d_oacc_hll_reallocate_nz.F90 b/openacc/impl/psb_d_oacc_hll_reallocate_nz.F90 new file mode 100644 index 00000000..412409d1 --- /dev/null +++ b/openacc/impl/psb_d_oacc_hll_reallocate_nz.F90 @@ -0,0 +1,29 @@ +submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_reallocate_nz_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_hll_reallocate_nz(nz, a) + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_oacc_hll_reallocate_nz' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: hksz, nhacks + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_d_hll_sparse_mat%reallocate(nz) + + call a%set_dev() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_hll_reallocate_nz +end submodule psb_d_oacc_hll_reallocate_nz_impl diff --git a/openacc/impl/psb_d_oacc_hll_scal.F90 b/openacc/impl/psb_d_oacc_hll_scal.F90 new file mode 100644 index 00000000..50210f4b --- /dev/null +++ b/openacc/impl/psb_d_oacc_hll_scal.F90 @@ -0,0 +1,59 @@ +submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_scal_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_hll_scal(d, a, info, side) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'scal' + integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + hksz = a%hksz + nhacks = (a%get_nrows() + hksz - 1) / hksz + nzt = a%nzt + + if (present(side)) then + if (side == 'L') then + !$acc parallel loop collapse(2) present(a, d) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + k = (j - a%hkoffs(i)) / nzt + (i - 1) * hksz + 1 + a%val(j) = a%val(j) * d(k) + end do + end do + else if (side == 'R') then + !$acc parallel loop collapse(2) present(a, d) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + a%val(j) = a%val(j) * d(a%ja(j)) + end do + end do + end if + else + !$acc parallel loop collapse(2) present(a, d) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + a%val(j) = a%val(j) * d(j - a%hkoffs(i) + 1) + end do + end do + end if + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_hll_scal +end submodule psb_d_oacc_hll_scal_impl diff --git a/openacc/impl/psb_d_oacc_hll_scals.F90 b/openacc/impl/psb_d_oacc_hll_scals.F90 new file mode 100644 index 00000000..ccb6b1b8 --- /dev/null +++ b/openacc/impl/psb_d_oacc_hll_scals.F90 @@ -0,0 +1,39 @@ +submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_scals_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_hll_scals(d, a, info) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'scal' + integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + hksz = a%hksz + nhacks = (a%get_nrows() + hksz - 1) / hksz + nzt = a%nzt + + !$acc parallel loop collapse(2) present(a) + do i = 1, nhacks + do j = a%hkoffs(i), a%hkoffs(i + 1) - 1 + a%val(j) = a%val(j) * d + end do + end do + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_hll_scals +end submodule psb_d_oacc_hll_scals_impl diff --git a/openacc/impl/psb_d_oacc_hll_vect_mv.F90 b/openacc/impl/psb_d_oacc_hll_vect_mv.F90 new file mode 100644 index 00000000..875b646f --- /dev/null +++ b/openacc/impl/psb_d_oacc_hll_vect_mv.F90 @@ -0,0 +1,67 @@ +submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_vect_mv_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans) + implicit none + + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + integer(psb_ipk_) :: m, n, nhacks, hksz + + info = psb_success_ + m = a%get_nrows() + n = a%get_ncols() + nhacks = size(a%hkoffs) - 1 + hksz = a%hksz + + if ((n /= size(x%v)) .or. (m /= size(y%v))) then + write(0,*) 'Size error ', m, n, size(x%v), size(y%v) + info = psb_err_invalid_mat_state_ + return + end if + + if (a%is_host()) call a%sync() + if (x%is_host()) call x%sync() + if (y%is_host()) call y%sync() + + call inner_spmv(m, nhacks, hksz, alpha, a%val, a%ja, a%hkoffs, x%v, beta, y%v, info) + call y%set_dev() + + contains + + subroutine inner_spmv(m, nhacks, hksz, alpha, val, ja, hkoffs, x, beta, y, info) + implicit none + integer(psb_ipk_) :: m, nhacks, hksz + real(psb_dpk_), intent(in) :: alpha, beta + real(psb_dpk_) :: val(:), x(:), y(:) + integer(psb_ipk_) :: ja(:), hkoffs(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, j, idx, k + real(psb_dpk_) :: tmp + + info = 0 + + !$acc parallel loop present(val, ja, hkoffs, x, y) + do i = 1, nhacks + do k = 0, hksz - 1 + idx = hkoffs(i) + k + if (idx <= hkoffs(i + 1) - 1) then + tmp = 0.0_psb_dpk_ + !$acc loop seq + do j = hkoffs(i) + k, hkoffs(i + 1) - 1, hksz + if (ja(j) > 0) then + tmp = tmp + val(j) * x(ja(j)) + end if + end do + y(k + 1 + (i - 1) * hksz) = alpha * tmp + beta * y(k + 1 + (i - 1) * hksz) + end if + end do + end do + end subroutine inner_spmv + + end subroutine psb_d_oacc_hll_vect_mv +end submodule psb_d_oacc_hll_vect_mv_impl diff --git a/openacc/psb_d_oacc_hll_mat_mod.F90 b/openacc/psb_d_oacc_hll_mat_mod.F90 new file mode 100644 index 00000000..530af94a --- /dev/null +++ b/openacc/psb_d_oacc_hll_mat_mod.F90 @@ -0,0 +1,352 @@ +module psb_d_oacc_hll_mat_mod + use iso_c_binding + use psb_d_mat_mod + use psb_d_hll_mat_mod + use psb_d_oacc_vect_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_d_hll_sparse_mat) :: psb_d_oacc_hll_sparse_mat + integer(psb_ipk_) :: devstate = is_host + contains + procedure, nopass :: get_fmt => d_oacc_hll_get_fmt + procedure, pass(a) :: sizeof => d_oacc_hll_sizeof + procedure, pass(a) :: is_host => d_oacc_hll_is_host + procedure, pass(a) :: is_sync => d_oacc_hll_is_sync + procedure, pass(a) :: is_dev => d_oacc_hll_is_dev + procedure, pass(a) :: set_host => d_oacc_hll_set_host + procedure, pass(a) :: set_sync => d_oacc_hll_set_sync + procedure, pass(a) :: set_dev => d_oacc_hll_set_dev + procedure, pass(a) :: sync_space => d_oacc_hll_sync_space + procedure, pass(a) :: sync => d_oacc_hll_sync + procedure, pass(a) :: free => d_oacc_hll_free + procedure, pass(a) :: vect_mv => psb_d_oacc_hll_vect_mv + procedure, pass(a) :: in_vect_sv => psb_d_oacc_hll_inner_vect_sv + procedure, pass(a) :: csmm => psb_d_oacc_hll_csmm + procedure, pass(a) :: csmv => psb_d_oacc_hll_csmv + procedure, pass(a) :: scals => psb_d_oacc_hll_scals + procedure, pass(a) :: scalv => psb_d_oacc_hll_scal + procedure, pass(a) :: reallocate_nz => psb_d_oacc_hll_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_oacc_hll_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_d_oacc_hll_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_oacc_hll_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_oacc_hll_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_oacc_hll_mv_from_fmt + procedure, pass(a) :: mold => psb_d_oacc_hll_mold + + end type psb_d_oacc_hll_sparse_mat + + interface + module subroutine psb_d_oacc_hll_mold(a,b,info) + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_hll_mold + end interface + + interface + module subroutine psb_d_oacc_hll_cp_from_fmt(a,b,info) + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_hll_cp_from_fmt + end interface + + interface + module subroutine psb_d_oacc_hll_mv_from_coo(a,b,info) + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_hll_mv_from_coo + end interface + + interface + module subroutine psb_d_oacc_hll_mv_from_fmt(a,b,info) + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_hll_mv_from_fmt + end interface + + interface + module subroutine psb_d_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_hll_vect_mv + end interface + + interface + module subroutine psb_d_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x,y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_hll_inner_vect_sv + end interface + + interface + module subroutine psb_d_oacc_hll_csmm(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_hll_csmm + end interface + + interface + module subroutine psb_d_oacc_hll_csmv(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_hll_csmv + end interface + + interface + module subroutine psb_d_oacc_hll_scals(d, a, info) + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_hll_scals + end interface + + interface + module subroutine psb_d_oacc_hll_scal(d,a,info,side) + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: side + end subroutine psb_d_oacc_hll_scal + end interface + + interface + module subroutine psb_d_oacc_hll_reallocate_nz(nz,a) + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + end subroutine psb_d_oacc_hll_reallocate_nz + end interface + + interface + module subroutine psb_d_oacc_hll_allocate_mnnz(m,n,a,nz) + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: m,n + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_oacc_hll_allocate_mnnz + end interface + + interface + module subroutine psb_d_oacc_hll_cp_from_coo(a,b,info) + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_hll_cp_from_coo + end interface + + contains + + subroutine d_oacc_hll_free(a) + use psb_base_mod + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + if (allocated(a%val)) then + !$acc exit data delete(a%val) + end if + if (allocated(a%ja)) then + !$acc exit data delete(a%ja) + end if + if (allocated(a%irn)) then + !$acc exit data delete(a%irn) + end if + if (allocated(a%idiag)) then + !$acc exit data delete(a%idiag) + end if + if (allocated(a%hkoffs)) then + !$acc exit data delete(a%hkoffs) + end if + + call a%psb_d_hll_sparse_mat%free() + + return + end subroutine d_oacc_hll_free + + function d_oacc_hll_sizeof(a) result(res) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%ja) + res = res + psb_sizeof_ip * size(a%irn) + res = res + psb_sizeof_ip * size(a%idiag) + res = res + psb_sizeof_ip * size(a%hkoffs) + end function d_oacc_hll_sizeof + + + + function d_oacc_hll_is_host(a) result(res) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function d_oacc_hll_is_host + + function d_oacc_hll_is_sync(a) result(res) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function d_oacc_hll_is_sync + + function d_oacc_hll_is_dev(a) result(res) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function d_oacc_hll_is_dev + + subroutine d_oacc_hll_set_host(a) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine d_oacc_hll_set_host + + subroutine d_oacc_hll_set_sync(a) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine d_oacc_hll_set_sync + + subroutine d_oacc_hll_set_dev(a) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine d_oacc_hll_set_dev + + function d_oacc_hll_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'HLL_oacc' + end function d_oacc_hll_get_fmt + + subroutine d_oacc_hll_sync_space(a) + implicit none + class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a + + if (allocated(a%val)) then + call d_oacc_create_dev(a%val) + end if + if (allocated(a%ja)) then + call i_oacc_create_dev(a%ja) + end if + if (allocated(a%irn)) then + call i_oacc_create_dev_scalar(a%irn) + end if + if (allocated(a%idiag)) then + call i_oacc_create_dev_scalar(a%idiag) + end if + if (allocated(a%hkoffs)) then + call i_oacc_create_dev_scalar(a%hkoffs) + end if + + contains + subroutine d_oacc_create_dev(v) + implicit none + real(psb_dpk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine d_oacc_create_dev + + subroutine i_oacc_create_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev + + subroutine i_oacc_create_dev_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev_scalar + + end subroutine d_oacc_hll_sync_space + + + subroutine d_oacc_hll_sync(a) + implicit none + class(psb_d_oacc_hll_sparse_mat), target, intent(in) :: a + class(psb_d_oacc_hll_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (a%is_dev()) then + call d_oacc_hll_to_host(a%val) + call i_oacc_hll_to_host(a%ja) + call i_oacc_hll_to_host_scalar(a%irn) + call i_oacc_hll_to_host_scalar(a%idiag) + call i_oacc_hll_to_host_scalar(a%hkoffs) + else if (a%is_host()) then + call d_oacc_hll_to_dev(a%val) + call i_oacc_hll_to_dev(a%ja) + call i_oacc_hll_to_dev_scalar(a%irn) + call i_oacc_hll_to_dev_scalar(a%idiag) + call i_oacc_hll_to_dev_scalar(a%hkoffs) + end if + call tmpa%set_sync() + end subroutine d_oacc_hll_sync + + subroutine d_oacc_hll_to_host(v) + implicit none + real(psb_dpk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine d_oacc_hll_to_host + + subroutine d_oacc_hll_to_dev(v) + implicit none + real(psb_dpk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine d_oacc_hll_to_dev + + subroutine i_oacc_hll_to_host(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine i_oacc_hll_to_host + + subroutine i_oacc_hll_to_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine i_oacc_hll_to_dev + + subroutine i_oacc_hll_to_host_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine i_oacc_hll_to_host_scalar + + subroutine i_oacc_hll_to_dev_scalar(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine i_oacc_hll_to_dev_scalar + + +end module psb_d_oacc_hll_mat_mod \ No newline at end of file