From 7006665d823f9ca3efcbd929e933fe0cc956adc0 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Fri, 12 Jul 2024 16:34:48 +0200 Subject: [PATCH] Introduce submodules, adjust Makefile --- openacc/impl/Makefile | 12 + openacc/impl/psb_d_oacc_csr_allocate_mnnz.F90 | 27 +- openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 | 25 +- openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 | 21 +- openacc/impl/psb_d_oacc_csr_csmm.F90 | 40 +- openacc/impl/psb_d_oacc_csr_csmv.F90 | 40 +- openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90 | 33 +- openacc/impl/psb_d_oacc_csr_mold.F90 | 24 +- openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 | 27 +- openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 | 21 +- openacc/impl/psb_d_oacc_csr_reallocate_nz.F90 | 23 +- openacc/impl/psb_d_oacc_csr_scal.F90 | 27 +- openacc/impl/psb_d_oacc_csr_scals.F90 | 46 +- openacc/impl/psb_d_oacc_csr_vect_mv.F90 | 66 +- openacc/impl/psb_d_oacc_mlt_v.f90 | 57 +- openacc/impl/psb_d_oacc_mlt_v_2.f90 | 84 +-- openacc/psb_d_oacc_csr_mat_mod.F90 | 689 +++++++++--------- openacc/psb_d_oacc_vect_mod.F90 | 14 +- 18 files changed, 645 insertions(+), 631 deletions(-) diff --git a/openacc/impl/Makefile b/openacc/impl/Makefile index 64879eb6..56df9402 100755 --- a/openacc/impl/Makefile +++ b/openacc/impl/Makefile @@ -2,6 +2,7 @@ include ../../Make.inc LIBDIR=../../lib INCDIR=../../include MODDIR=../../modules +UP=.. # # Compilers and such # @@ -22,6 +23,17 @@ objs: $(OBJS) lib: objs ar cur ../$(LIBNAME) $(OBJS) + +psb_d_oacc_csr_vect_mv.o psb_d_oacc_csr_inner_vect_sv.o \ + psb_d_oacc_csr_csmm.o psb_d_oacc_csr_csmv.o psb_d_oacc_csr_scals.o \ + psb_d_oacc_csr_scal.o psb_d_oacc_csr_allocate_mnnz.o \ + psb_d_oacc_csr_reallocate_nz.o psb_d_oacc_csr_cp_from_coo.o \ + psb_d_oacc_csr_cp_from_fmt.o psb_d_oacc_csr_mv_from_coo.o \ + psb_d_oacc_csr_mv_from_fmt.o psb_d_oacc_csr_mold.o: $(UP)/psb_d_oacc_csr_mat_mod.o + +psb_d_oacc_mlt_v_2.o psb_d_oacc_mlt_v.o: $(UP)/psb_d_oacc_vect_mod.o + + clean: /bin/rm -f $(OBJS) diff --git a/openacc/impl/psb_d_oacc_csr_allocate_mnnz.F90 b/openacc/impl/psb_d_oacc_csr_allocate_mnnz.F90 index a0b8abe0..5281edee 100644 --- a/openacc/impl/psb_d_oacc_csr_allocate_mnnz.F90 +++ b/openacc/impl/psb_d_oacc_csr_allocate_mnnz.F90 @@ -1,6 +1,7 @@ -subroutine psb_d_oacc_csr_allocate_mnnz(m, n, a, nz) - use psb_base_mod - use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_allocate_mnnz +submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_allocate_mnnz_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_csr_allocate_mnnz(m, n, a, nz) implicit none integer(psb_ipk_), intent(in) :: m, n class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a @@ -9,26 +10,26 @@ subroutine psb_d_oacc_csr_allocate_mnnz(m, n, a, nz) integer(psb_ipk_) :: err_act, nz_ character(len=20) :: name='allocate_mnz' logical, parameter :: debug=.false. - + call psb_erractionsave(err_act) info = psb_success_ - + call a%psb_d_csr_sparse_mat%allocate(m, n, nz) - + if (.not.allocated(a%val)) then allocate(a%val(nz)) allocate(a%ja(nz)) allocate(a%irp(m+1)) end if - + call a%set_dev() if (info /= 0) goto 9999 - + call psb_erractionrestore(err_act) return - - 9999 call psb_error_handler(err_act) + +9999 call psb_error_handler(err_act) return - -end subroutine psb_d_oacc_csr_allocate_mnnz - \ No newline at end of file + + end subroutine psb_d_oacc_csr_allocate_mnnz +end submodule psb_d_oacc_csr_allocate_mnnz_impl diff --git a/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 b/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 index c84c5876..ec92f618 100644 --- a/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 +++ b/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 @@ -1,25 +1,26 @@ -subroutine psb_d_oacc_csr_cp_from_coo(a, b, info) - use psb_base_mod - use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_cp_from_coo +submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_cp_from_coo_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_csr_cp_from_coo(a, b, info) implicit none - + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - + info = psb_success_ - + call a%psb_d_csr_sparse_mat%cp_from_coo(b, info) if (info /= 0) goto 9999 - + call a%set_dev() if (info /= 0) goto 9999 - + return - - 9999 continue + +9999 continue info = psb_err_alloc_dealloc_ return - + end subroutine psb_d_oacc_csr_cp_from_coo - \ No newline at end of file +end submodule psb_d_oacc_csr_cp_from_coo_impl diff --git a/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 b/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 index 2eae41d7..37541ea9 100644 --- a/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 +++ b/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 @@ -1,23 +1,24 @@ -subroutine psb_d_oacc_csr_cp_from_fmt(a, b, info) - use psb_base_mod - use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_cp_from_fmt +submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_cp_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_csr_cp_from_fmt(a, b, info) implicit none - + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(in) :: b integer(psb_ipk_), intent(out) :: info - + info = psb_success_ - + select type(b) type is (psb_d_coo_sparse_mat) call a%cp_from_coo(b, info) class default call a%psb_d_csr_sparse_mat%cp_from_fmt(b, info) if (info /= 0) return - + !$acc update device(a%val, a%ja, a%irp) end select - -end subroutine psb_d_oacc_csr_cp_from_fmt - \ No newline at end of file + + end subroutine psb_d_oacc_csr_cp_from_fmt +end submodule psb_d_oacc_csr_cp_from_fmt_impl diff --git a/openacc/impl/psb_d_oacc_csr_csmm.F90 b/openacc/impl/psb_d_oacc_csr_csmm.F90 index 4b404ad1..6ab87175 100644 --- a/openacc/impl/psb_d_oacc_csr_csmm.F90 +++ b/openacc/impl/psb_d_oacc_csr_csmm.F90 @@ -1,8 +1,7 @@ -subroutine psb_d_oacc_csr_csmm(alpha, a, x, beta, y, info, trans) - use psb_base_mod - !use elldev_mod - !use psb_vectordev_mod - use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_csmm +submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_csmm_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_csr_csmm(alpha, a, x, beta, y, info, trans) implicit none class(psb_d_oacc_csr_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta @@ -10,30 +9,30 @@ subroutine psb_d_oacc_csr_csmm(alpha, a, x, beta, y, info, trans) real(psb_dpk_), intent(inout) :: y(:,:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - + character :: trans_ integer(psb_ipk_) :: i, j, m, n,k, nxy logical :: tra integer(psb_ipk_) :: err_act character(len=20) :: name = 'd_oacc_csmm' logical, parameter :: debug = .false. - + info = psb_success_ call psb_erractionsave(err_act) - + if (present(trans)) then trans_ = trans else trans_ = 'N' end if - + if (.not.a%is_asb()) then info = psb_err_invalid_mat_state_ call psb_errpush(info, name) goto 9999 endif tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') - + if (tra) then m = a%get_ncols() n = a%get_nrows() @@ -41,31 +40,31 @@ subroutine psb_d_oacc_csr_csmm(alpha, a, x, beta, y, info, trans) n = a%get_ncols() m = a%get_nrows() end if - + if (size(x,1) < n) then info = 36 call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) goto 9999 end if - + if (size(y,1) < m) then info = 36 call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) goto 9999 end if - + if (tra) then call a%psb_d_csr_sparse_mat%spmm(alpha, x, beta, y, info, trans) else nxy = min(size(x,2), size(y,2)) - + !$acc parallel loop collapse(2) present(a, x, y) do j = 1, nxy do i = 1, m y(i,j) = beta * y(i,j) end do end do - + !$acc parallel loop collapse(2) present(a, x, y) do j = 1, nxy do i = 1, n @@ -75,12 +74,13 @@ subroutine psb_d_oacc_csr_csmm(alpha, a, x, beta, y, info, trans) end do end do endif - + call psb_erractionrestore(err_act) return - - 9999 call psb_error_handler(err_act) + +9999 call psb_error_handler(err_act) return - + end subroutine psb_d_oacc_csr_csmm - +end submodule psb_d_oacc_csr_csmm_impl + diff --git a/openacc/impl/psb_d_oacc_csr_csmv.F90 b/openacc/impl/psb_d_oacc_csr_csmv.F90 index 65acc8e4..205a7a59 100644 --- a/openacc/impl/psb_d_oacc_csr_csmv.F90 +++ b/openacc/impl/psb_d_oacc_csr_csmv.F90 @@ -1,8 +1,7 @@ -subroutine psb_d_oacc_csr_csmv(alpha, a, x, beta, y, info, trans) - use psb_base_mod - !use elldev_mod - !use psb_vectordev_mod - use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_csmv +submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_csmv_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_csr_csmv(alpha, a, x, beta, y, info, trans) implicit none class(psb_d_oacc_csr_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta @@ -10,31 +9,31 @@ subroutine psb_d_oacc_csr_csmv(alpha, a, x, beta, y, info, trans) real(psb_dpk_), intent(inout) :: y(:) integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - + character :: trans_ integer(psb_ipk_) :: i, j, m, n logical :: tra integer(psb_ipk_) :: err_act character(len=20) :: name = 'd_oacc_csmv' logical, parameter :: debug = .false. - + call psb_erractionsave(err_act) info = psb_success_ - + if (present(trans)) then trans_ = trans else trans_ = 'N' end if - + if (.not.a%is_asb()) then info = psb_err_invalid_mat_state_ call psb_errpush(info, name) goto 9999 endif - + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') - + if (tra) then m = a%get_ncols() n = a%get_nrows() @@ -42,19 +41,19 @@ subroutine psb_d_oacc_csr_csmv(alpha, a, x, beta, y, info, trans) n = a%get_ncols() m = a%get_nrows() end if - + if (size(x,1) < n) then info = 36 call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) goto 9999 end if - + if (size(y,1) < m) then info = 36 call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) goto 9999 end if - + if (tra) then call a%psb_d_csr_sparse_mat%spmm(alpha, x, beta, y, info, trans) else @@ -62,7 +61,7 @@ subroutine psb_d_oacc_csr_csmv(alpha, a, x, beta, y, info, trans) do i = 1, m y(i) = beta * y(i) end do - + !$acc parallel loop present(a, x, y) do i = 1, n do j = a%irp(i), a%irp(i+1) - 1 @@ -70,12 +69,13 @@ subroutine psb_d_oacc_csr_csmv(alpha, a, x, beta, y, info, trans) end do end do endif - + call psb_erractionrestore(err_act) return - - 9999 call psb_error_handler(err_act) + +9999 call psb_error_handler(err_act) return - + end subroutine psb_d_oacc_csr_csmv - +end submodule psb_d_oacc_csr_csmv_impl + diff --git a/openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90 b/openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90 index a15a710e..5f0ef4a0 100644 --- a/openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90 +++ b/openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90 @@ -1,16 +1,14 @@ -subroutine psb_d_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans) - use psb_base_mod - ! use elldev_mod - ! use psb_vectordev_mod - use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_inner_vect_sv - use psb_d_oacc_vect_mod +submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_inner_vect_sv_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans) implicit none class(psb_d_oacc_csr_sparse_mat), intent(in) :: a real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x, y integer(psb_ipk_), intent(out) :: info character, optional, intent(in) :: trans - + real(psb_dpk_), allocatable :: rx(:), ry(:) logical :: tra character :: trans_ @@ -18,24 +16,24 @@ subroutine psb_d_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans) character(len=20) :: name = 'd_oacc_csr_inner_vect_sv' logical, parameter :: debug = .false. integer(psb_ipk_) :: i - + call psb_get_erraction(err_act) info = psb_success_ - + if (present(trans)) then trans_ = trans else trans_ = 'N' end if - + if (.not.a%is_asb()) then info = psb_err_invalid_mat_state_ call psb_errpush(info, name) goto 9999 endif - + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') - + if (tra .or. (beta /= dzero)) then call x%sync() call y%sync() @@ -68,17 +66,18 @@ subroutine psb_d_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans) call y%bld(ry) end select endif - + if (info /= psb_success_) then info = psb_err_from_subroutine_ call psb_errpush(info, name, a_err = 'csrg_vect_sv') goto 9999 endif - + call psb_erractionrestore(err_act) return - - 9999 call psb_error_handler(err_act) + +9999 call psb_error_handler(err_act) return end subroutine psb_d_oacc_csr_inner_vect_sv - +end submodule psb_d_oacc_csr_inner_vect_sv_impl + diff --git a/openacc/impl/psb_d_oacc_csr_mold.F90 b/openacc/impl/psb_d_oacc_csr_mold.F90 index 08598cb6..dc9ff711 100644 --- a/openacc/impl/psb_d_oacc_csr_mold.F90 +++ b/openacc/impl/psb_d_oacc_csr_mold.F90 @@ -1,6 +1,7 @@ -subroutine psb_d_oacc_csr_mold(a, b, info) - use psb_base_mod - use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_mold +submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_mold_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_csr_mold(a, b, info) implicit none class(psb_d_oacc_csr_sparse_mat), intent(in) :: a class(psb_d_base_sparse_mat), intent(inout), allocatable :: b @@ -8,26 +9,27 @@ subroutine psb_d_oacc_csr_mold(a, b, info) integer(psb_ipk_) :: err_act character(len=20) :: name='csr_mold' logical, parameter :: debug=.false. - + call psb_get_erraction(err_act) - + info = 0 if (allocated(b)) then call b%free() deallocate(b, stat=info) end if if (info == 0) allocate(psb_d_oacc_csr_sparse_mat :: b, stat=info) - + if (info /= psb_success_) then info = psb_err_alloc_dealloc_ call psb_errpush(info, name) goto 9999 end if return - - 9999 call psb_error_handler(err_act) - + +9999 call psb_error_handler(err_act) + return - + end subroutine psb_d_oacc_csr_mold - \ No newline at end of file +end submodule psb_d_oacc_csr_mold_impl + diff --git a/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 b/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 index 5f4a6b41..2ed9b032 100644 --- a/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 +++ b/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 @@ -1,24 +1,25 @@ -subroutine psb_d_oacc_csr_mv_from_coo(a, b, info) - use psb_base_mod - use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_mv_from_coo +submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_mv_from_coo_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_csr_mv_from_coo(a, b, info) implicit none - + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a class(psb_d_coo_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - + info = psb_success_ - + call a%psb_d_csr_sparse_mat%mv_from_coo(b, info) if (info /= 0) goto 9999 - + !$acc update device(a%val, a%ja, a%irp) - + return - - 9999 continue + +9999 continue info = psb_err_alloc_dealloc_ return - -end subroutine psb_d_oacc_csr_mv_from_coo - \ No newline at end of file + + end subroutine psb_d_oacc_csr_mv_from_coo +end submodule psb_d_oacc_csr_mv_from_coo_impl diff --git a/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 b/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 index 16a4636b..197ed911 100644 --- a/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 +++ b/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 @@ -1,23 +1,24 @@ -subroutine psb_d_oacc_csr_mv_from_fmt(a, b, info) - use psb_base_mod - use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_mv_from_fmt +submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_mv_from_fmt_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_csr_mv_from_fmt(a, b, info) implicit none - + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a class(psb_d_base_sparse_mat), intent(inout) :: b integer(psb_ipk_), intent(out) :: info - + info = psb_success_ - + select type(b) type is (psb_d_coo_sparse_mat) call a%mv_from_coo(b, info) class default call a%psb_d_csr_sparse_mat%mv_from_fmt(b, info) if (info /= 0) return - + !$acc update device(a%val, a%ja, a%irp) end select - -end subroutine psb_d_oacc_csr_mv_from_fmt - \ No newline at end of file + + end subroutine psb_d_oacc_csr_mv_from_fmt +end submodule psb_d_oacc_csr_mv_from_fmt_impl diff --git a/openacc/impl/psb_d_oacc_csr_reallocate_nz.F90 b/openacc/impl/psb_d_oacc_csr_reallocate_nz.F90 index 7a6723d0..c345f681 100644 --- a/openacc/impl/psb_d_oacc_csr_reallocate_nz.F90 +++ b/openacc/impl/psb_d_oacc_csr_reallocate_nz.F90 @@ -1,6 +1,7 @@ -subroutine psb_d_oacc_csr_reallocate_nz(nz, a) - use psb_base_mod - use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_reallocate_nz +submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_reallocate_nz_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_csr_reallocate_nz(nz, a) implicit none integer(psb_ipk_), intent(in) :: nz class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a @@ -8,20 +9,20 @@ subroutine psb_d_oacc_csr_reallocate_nz(nz, a) integer(psb_ipk_) :: err_act character(len=20) :: name='d_oacc_csr_reallocate_nz' logical, parameter :: debug=.false. - + call psb_erractionsave(err_act) info = psb_success_ - + call a%psb_d_csr_sparse_mat%reallocate(nz) - + call a%set_dev() if (info /= 0) goto 9999 - + call psb_erractionrestore(err_act) return - - 9999 call psb_error_handler(err_act) + +9999 call psb_error_handler(err_act) return - + end subroutine psb_d_oacc_csr_reallocate_nz - \ No newline at end of file +end submodule psb_d_oacc_csr_reallocate_nz_impl diff --git a/openacc/impl/psb_d_oacc_csr_scal.F90 b/openacc/impl/psb_d_oacc_csr_scal.F90 index b2bf12e8..cc693fa2 100644 --- a/openacc/impl/psb_d_oacc_csr_scal.F90 +++ b/openacc/impl/psb_d_oacc_csr_scal.F90 @@ -1,22 +1,23 @@ -subroutine psb_d_oacc_csr_scal(d, a, info, side) - use psb_base_mod - use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_scal +submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_scal_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_csr_scal(d, a, info, side) implicit none class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a real(psb_dpk_), intent(in) :: d(:) integer(psb_ipk_), intent(out) :: info character, intent(in), optional :: side - + integer(psb_ipk_) :: err_act character(len=20) :: name='scal' logical, parameter :: debug=.false. integer(psb_ipk_) :: i, j - + info = psb_success_ call psb_erractionsave(err_act) - + if (a%is_host()) call a%sync() - + if (present(side)) then if (side == 'L') then !$acc parallel loop present(a, d) @@ -39,14 +40,14 @@ subroutine psb_d_oacc_csr_scal(d, a, info, side) a%val(i) = a%val(i) * d(i) end do end if - + call a%set_dev() - + call psb_erractionrestore(err_act) return - - 9999 call psb_error_handler(err_act) + +9999 call psb_error_handler(err_act) return - + end subroutine psb_d_oacc_csr_scal - \ No newline at end of file +end submodule psb_d_oacc_csr_scal_impl diff --git a/openacc/impl/psb_d_oacc_csr_scals.F90 b/openacc/impl/psb_d_oacc_csr_scals.F90 index 8c77f647..157355d8 100644 --- a/openacc/impl/psb_d_oacc_csr_scals.F90 +++ b/openacc/impl/psb_d_oacc_csr_scals.F90 @@ -1,32 +1,34 @@ -subroutine psb_d_oacc_csr_scals(d, a, info) +submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_scals_impl use psb_base_mod - use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_scals - implicit none - class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a - real(psb_dpk_), intent(in) :: d - integer(psb_ipk_), intent(out) :: info +contains + module subroutine psb_d_oacc_csr_scals(d, a, info) + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: err_act - character(len=20) :: name='scal' - logical, parameter :: debug=.false. - integer(psb_ipk_) :: i + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i - info = psb_success_ - call psb_erractionsave(err_act) + info = psb_success_ + call psb_erractionsave(err_act) - if (a%is_host()) call a%sync() + if (a%is_host()) call a%sync() - !$acc parallel loop present(a) - do i = 1, size(a%val) - a%val(i) = a%val(i) * d - end do + !$acc parallel loop present(a) + do i = 1, size(a%val) + a%val(i) = a%val(i) * d + end do - call a%set_dev() + call a%set_dev() - call psb_erractionrestore(err_act) - return + call psb_erractionrestore(err_act) + return 9999 call psb_error_handler(err_act) - return + return -end subroutine psb_d_oacc_csr_scals + end subroutine psb_d_oacc_csr_scals +end submodule psb_d_oacc_csr_scals_impl diff --git a/openacc/impl/psb_d_oacc_csr_vect_mv.F90 b/openacc/impl/psb_d_oacc_csr_vect_mv.F90 index f0394591..1dca2ba2 100644 --- a/openacc/impl/psb_d_oacc_csr_vect_mv.F90 +++ b/openacc/impl/psb_d_oacc_csr_vect_mv.F90 @@ -1,6 +1,7 @@ -subroutine psb_d_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans) - use psb_base_mod - use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_vect_mv +submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_vect_mv_impl + use psb_base_mod +contains + module subroutine psb_d_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans) implicit none real(psb_dpk_), intent(in) :: alpha, beta @@ -16,9 +17,9 @@ subroutine psb_d_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans) n = a%get_ncols() if ((n /= size(x%v)) .or. (n /= size(y%v))) then - write(0,*) 'Size error ', m, n, size(x%v), size(y%v) - info = psb_err_invalid_mat_state_ - return + write(0,*) 'Size error ', m, n, size(x%v), size(y%v) + info = psb_err_invalid_mat_state_ + return end if if (a%is_host()) call a%sync() @@ -28,34 +29,35 @@ subroutine psb_d_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans) call inner_spmv(m, n, alpha, a%val, a%ja, a%irp, x%v, beta, y%v, info) call y%set_dev() -contains + contains subroutine inner_spmv(m, n, alpha, val, ja, irp, x, beta, y, info) - implicit none - integer(psb_ipk_) :: m, n - real(psb_dpk_), intent(in) :: alpha, beta - real(psb_dpk_) :: val(:), x(:), y(:) - integer(psb_ipk_) :: ja(:), irp(:) - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: i, j, ii, isz - real(psb_dpk_) :: tmp - integer(psb_ipk_), parameter :: vsz = 256 - - info = 0 - - !$acc parallel loop vector_length(vsz) private(isz) - do ii = 1, m, vsz - isz = min(vsz, m - ii + 1) - !$acc loop independent private(tmp) - do i = ii, ii + isz - 1 - tmp = 0.0_psb_dpk_ - !$acc loop seq - do j = irp(i), irp(i + 1) - 1 - tmp = tmp + val(j) * x(ja(j)) - end do - y(i) = alpha * tmp + beta * y(i) - end do + implicit none + integer(psb_ipk_) :: m, n + real(psb_dpk_), intent(in) :: alpha, beta + real(psb_dpk_) :: val(:), x(:), y(:) + integer(psb_ipk_) :: ja(:), irp(:) + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: i, j, ii, isz + real(psb_dpk_) :: tmp + integer(psb_ipk_), parameter :: vsz = 256 + + info = 0 + + !$acc parallel loop vector_length(vsz) private(isz) + do ii = 1, m, vsz + isz = min(vsz, m - ii + 1) + !$acc loop independent private(tmp) + do i = ii, ii + isz - 1 + tmp = 0.0_psb_dpk_ + !$acc loop seq + do j = irp(i), irp(i + 1) - 1 + tmp = tmp + val(j) * x(ja(j)) + end do + y(i) = alpha * tmp + beta * y(i) end do + end do end subroutine inner_spmv -end subroutine psb_d_oacc_csr_vect_mv + end subroutine psb_d_oacc_csr_vect_mv +end submodule psb_d_oacc_csr_vect_mv_impl diff --git a/openacc/impl/psb_d_oacc_mlt_v.f90 b/openacc/impl/psb_d_oacc_mlt_v.f90 index bedd0247..ab242b57 100644 --- a/openacc/impl/psb_d_oacc_mlt_v.f90 +++ b/openacc/impl/psb_d_oacc_mlt_v.f90 @@ -1,31 +1,34 @@ +submodule (psb_d_oacc_vect_mod) psb_d_oacc_mlt_v_impl + use psb_string_mod +contains -subroutine d_oacc_mlt_v(x, y, info) - use psb_d_oacc_vect_mod, psb_protect_name => d_oacc_mlt_v + module subroutine psb_d_oacc_mlt_v(x, y, info) - implicit none - class(psb_d_base_vect_type), intent(inout) :: x - class(psb_d_vect_oacc), intent(inout) :: y - integer(psb_ipk_), intent(out) :: info + implicit none + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_vect_oacc), intent(inout) :: y + integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: i, n + integer(psb_ipk_) :: i, n - info = 0 - n = min(x%get_nrows(), y%get_nrows()) - select type(xx => x) - class is (psb_d_vect_oacc) - if (y%is_host()) call y%sync() - if (xx%is_host()) call xx%sync() - !$acc parallel loop - do i = 1, n - y%v(i) = y%v(i) * xx%v(i) - end do - call y%set_dev() - class default - if (xx%is_dev()) call xx%sync() - if (y%is_dev()) call y%sync() - do i = 1, n - y%v(i) = y%v(i) * xx%v(i) - end do - call y%set_host() - end select -end subroutine d_oacc_mlt_v + info = 0 + n = min(x%get_nrows(), y%get_nrows()) + select type(xx => x) + class is (psb_d_vect_oacc) + if (y%is_host()) call y%sync() + if (xx%is_host()) call xx%sync() + !$acc parallel loop + do i = 1, n + y%v(i) = y%v(i) * xx%v(i) + end do + call y%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (y%is_dev()) call y%sync() + do i = 1, n + y%v(i) = y%v(i) * xx%v(i) + end do + call y%set_host() + end select + end subroutine psb_d_oacc_mlt_v +end submodule psb_d_oacc_mlt_v_impl diff --git a/openacc/impl/psb_d_oacc_mlt_v_2.f90 b/openacc/impl/psb_d_oacc_mlt_v_2.f90 index 7e46495f..4ca2bdab 100644 --- a/openacc/impl/psb_d_oacc_mlt_v_2.f90 +++ b/openacc/impl/psb_d_oacc_mlt_v_2.f90 @@ -1,53 +1,55 @@ -subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) - use psb_d_oacc_vect_mod, psb_protect_name => d_oacc_mlt_v_2 +submodule (psb_d_oacc_vect_mod) d_oacc_mlt_v_2_impl use psb_string_mod - implicit none - real(psb_dpk_), intent(in) :: alpha, beta - class(psb_d_base_vect_type), intent(inout) :: x - class(psb_d_base_vect_type), intent(inout) :: y - class(psb_d_vect_oacc), intent(inout) :: z - integer(psb_ipk_), intent(out) :: info - character(len=1), intent(in), optional :: conjgx, conjgy - integer(psb_ipk_) :: i, n - logical :: conjgx_, conjgy_ +contains + module subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) + implicit none + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x + class(psb_d_base_vect_type), intent(inout) :: y + class(psb_d_vect_oacc), intent(inout) :: z + integer(psb_ipk_), intent(out) :: info + character(len=1), intent(in), optional :: conjgx, conjgy + integer(psb_ipk_) :: i, n + logical :: conjgx_, conjgy_ - conjgx_ = .false. - conjgy_ = .false. - if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C') - if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C') + conjgx_ = .false. + conjgy_ = .false. + if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C') + if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C') - n = min(x%get_nrows(), y%get_nrows(), z%get_nrows()) + n = min(x%get_nrows(), y%get_nrows(), z%get_nrows()) - info = 0 - select type(xx => x) - class is (psb_d_vect_oacc) - select type (yy => y) + info = 0 + select type(xx => x) class is (psb_d_vect_oacc) - if (xx%is_host()) call xx%sync() - if (yy%is_host()) call yy%sync() - if ((beta /= dzero) .and. (z%is_host())) call z%sync() - !$acc parallel loop - do i = 1, n - z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) - end do - call z%set_dev() + select type (yy => y) + class is (psb_d_vect_oacc) + if (xx%is_host()) call xx%sync() + if (yy%is_host()) call yy%sync() + if ((beta /= dzero) .and. (z%is_host())) call z%sync() + !$acc parallel loop + do i = 1, n + z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) + end do + call z%set_dev() + class default + if (xx%is_dev()) call xx%sync() + if (yy%is_dev()) call yy%sync() + if ((beta /= dzero) .and. (z%is_dev())) call z%sync() + do i = 1, n + z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) + end do + call z%set_host() + end select class default - if (xx%is_dev()) call xx%sync() - if (yy%is_dev()) call yy%sync() + if (x%is_dev()) call x%sync() + if (y%is_dev()) call y%sync() if ((beta /= dzero) .and. (z%is_dev())) call z%sync() do i = 1, n - z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) + z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i) end do call z%set_host() end select - class default - if (x%is_dev()) call x%sync() - if (y%is_dev()) call y%sync() - if ((beta /= dzero) .and. (z%is_dev())) call z%sync() - do i = 1, n - z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i) - end do - call z%set_host() - end select -end subroutine d_oacc_mlt_v_2 + end subroutine d_oacc_mlt_v_2 +end submodule d_oacc_mlt_v_2_impl diff --git a/openacc/psb_d_oacc_csr_mat_mod.F90 b/openacc/psb_d_oacc_csr_mat_mod.F90 index b8eb1be9..ca4199a8 100644 --- a/openacc/psb_d_oacc_csr_mat_mod.F90 +++ b/openacc/psb_d_oacc_csr_mat_mod.F90 @@ -1,359 +1,346 @@ module psb_d_oacc_csr_mat_mod - use iso_c_binding - use psb_d_mat_mod - use psb_d_oacc_vect_mod - !use oaccsparse_mod - - integer(psb_ipk_), parameter, private :: is_host = -1 - integer(psb_ipk_), parameter, private :: is_sync = 0 - integer(psb_ipk_), parameter, private :: is_dev = 1 - - type, extends(psb_d_csr_sparse_mat) :: psb_d_oacc_csr_sparse_mat - integer(psb_ipk_) :: devstate = is_host - contains - procedure, nopass :: get_fmt => d_oacc_csr_get_fmt - procedure, pass(a) :: sizeof => d_oacc_csr_sizeof - procedure, pass(a) :: vect_mv => psb_d_oacc_csr_vect_mv - procedure, pass(a) :: in_vect_sv => psb_d_oacc_csr_inner_vect_sv - procedure, pass(a) :: csmm => psb_d_oacc_csr_csmm - procedure, pass(a) :: csmv => psb_d_oacc_csr_csmv - procedure, pass(a) :: scals => psb_d_oacc_csr_scals - procedure, pass(a) :: scalv => psb_d_oacc_csr_scal - procedure, pass(a) :: reallocate_nz => psb_d_oacc_csr_reallocate_nz - procedure, pass(a) :: allocate_mnnz => psb_d_oacc_csr_allocate_mnnz - procedure, pass(a) :: cp_from_coo => psb_d_oacc_csr_cp_from_coo - procedure, pass(a) :: cp_from_fmt => psb_d_oacc_csr_cp_from_fmt - procedure, pass(a) :: mv_from_coo => psb_d_oacc_csr_mv_from_coo - procedure, pass(a) :: mv_from_fmt => psb_d_oacc_csr_mv_from_fmt - procedure, pass(a) :: free => d_oacc_csr_free - procedure, pass(a) :: mold => psb_d_oacc_csr_mold - procedure, pass(a) :: all => d_oacc_csr_all - procedure, pass(a) :: is_host => d_oacc_csr_is_host - procedure, pass(a) :: is_sync => d_oacc_csr_is_sync - procedure, pass(a) :: is_dev => d_oacc_csr_is_dev - procedure, pass(a) :: set_host => d_oacc_csr_set_host - procedure, pass(a) :: set_sync => d_oacc_csr_set_sync - procedure, pass(a) :: set_dev => d_oacc_csr_set_dev - procedure, pass(a) :: sync_space => d_oacc_csr_sync_space - procedure, pass(a) :: sync => d_oacc_csr_sync - end type psb_d_oacc_csr_sparse_mat - - interface - subroutine psb_d_oacc_csr_mold(a,b,info) - import :: psb_d_oacc_csr_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_oacc_csr_sparse_mat), intent(in) :: a - class(psb_d_base_sparse_mat), intent(inout), allocatable :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_oacc_csr_mold - end interface - - interface - subroutine psb_d_oacc_csr_cp_from_fmt(a,b,info) - import :: psb_d_oacc_csr_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a - class(psb_d_base_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_oacc_csr_cp_from_fmt - end interface - - interface - subroutine psb_d_oacc_csr_mv_from_coo(a,b,info) - import :: psb_d_oacc_csr_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a - class(psb_d_coo_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_oacc_csr_mv_from_coo - end interface - - interface - subroutine psb_d_oacc_csr_mv_from_fmt(a,b,info) - import :: psb_d_oacc_csr_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ - class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a - class(psb_d_base_sparse_mat), intent(inout) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_oacc_csr_mv_from_fmt - end interface - - interface - subroutine psb_d_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans) - import :: psb_d_oacc_csr_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ - class(psb_d_oacc_csr_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta - class(psb_d_base_vect_type), intent(inout) :: x, y - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_d_oacc_csr_vect_mv - end interface - - interface - subroutine psb_d_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans) - import :: psb_d_oacc_csr_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ - class(psb_d_oacc_csr_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta - class(psb_d_base_vect_type), intent(inout) :: x,y - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_d_oacc_csr_inner_vect_sv - end interface - - interface - subroutine psb_d_oacc_csr_csmm(alpha, a, x, beta, y, info, trans) - import :: psb_d_oacc_csr_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ - class(psb_d_oacc_csr_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) - real(psb_dpk_), intent(inout) :: y(:,:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_d_oacc_csr_csmm - end interface - - interface - subroutine psb_d_oacc_csr_csmv(alpha, a, x, beta, y, info, trans) - import :: psb_d_oacc_csr_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ - class(psb_d_oacc_csr_sparse_mat), intent(in) :: a - real(psb_dpk_), intent(in) :: alpha, beta, x(:) - real(psb_dpk_), intent(inout) :: y(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: trans - end subroutine psb_d_oacc_csr_csmv - end interface - - interface - subroutine psb_d_oacc_csr_scals(d, a, info) - import :: psb_d_oacc_csr_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a - real(psb_dpk_), intent(in) :: d - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_oacc_csr_scals - end interface - - interface - subroutine psb_d_oacc_csr_scal(d,a,info,side) - import :: psb_d_oacc_csr_sparse_mat, psb_dpk_, psb_ipk_ - class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a - real(psb_dpk_), intent(in) :: d(:) - integer(psb_ipk_), intent(out) :: info - character, optional, intent(in) :: side - end subroutine psb_d_oacc_csr_scal - end interface - - interface - subroutine psb_d_oacc_csr_reallocate_nz(nz,a) - import :: psb_d_oacc_csr_sparse_mat, psb_ipk_ - class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(in) :: nz - end subroutine psb_d_oacc_csr_reallocate_nz - end interface - - interface - subroutine psb_d_oacc_csr_allocate_mnnz(m,n,a,nz) - import :: psb_d_oacc_csr_sparse_mat, psb_ipk_ - class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a - integer(psb_ipk_), intent(in) :: m,n - integer(psb_ipk_), intent(in), optional :: nz - end subroutine psb_d_oacc_csr_allocate_mnnz - end interface - - interface - subroutine psb_d_oacc_csr_cp_from_coo(a,b,info) - import :: psb_d_oacc_csr_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ - class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a - class(psb_d_coo_sparse_mat), intent(in) :: b - integer(psb_ipk_), intent(out) :: info - end subroutine psb_d_oacc_csr_cp_from_coo - end interface + use iso_c_binding + use psb_d_mat_mod + use psb_d_oacc_vect_mod + !use oaccsparse_mod + + integer(psb_ipk_), parameter, private :: is_host = -1 + integer(psb_ipk_), parameter, private :: is_sync = 0 + integer(psb_ipk_), parameter, private :: is_dev = 1 + + type, extends(psb_d_csr_sparse_mat) :: psb_d_oacc_csr_sparse_mat + integer(psb_ipk_) :: devstate = is_host + contains + procedure, nopass :: get_fmt => d_oacc_csr_get_fmt + procedure, pass(a) :: sizeof => d_oacc_csr_sizeof + procedure, pass(a) :: vect_mv => psb_d_oacc_csr_vect_mv + procedure, pass(a) :: in_vect_sv => psb_d_oacc_csr_inner_vect_sv + procedure, pass(a) :: csmm => psb_d_oacc_csr_csmm + procedure, pass(a) :: csmv => psb_d_oacc_csr_csmv + procedure, pass(a) :: scals => psb_d_oacc_csr_scals + procedure, pass(a) :: scalv => psb_d_oacc_csr_scal + procedure, pass(a) :: reallocate_nz => psb_d_oacc_csr_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_oacc_csr_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_d_oacc_csr_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_oacc_csr_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_oacc_csr_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_oacc_csr_mv_from_fmt + procedure, pass(a) :: free => d_oacc_csr_free + procedure, pass(a) :: mold => psb_d_oacc_csr_mold + procedure, pass(a) :: all => d_oacc_csr_all + procedure, pass(a) :: is_host => d_oacc_csr_is_host + procedure, pass(a) :: is_sync => d_oacc_csr_is_sync + procedure, pass(a) :: is_dev => d_oacc_csr_is_dev + procedure, pass(a) :: set_host => d_oacc_csr_set_host + procedure, pass(a) :: set_sync => d_oacc_csr_set_sync + procedure, pass(a) :: set_dev => d_oacc_csr_set_dev + procedure, pass(a) :: sync_space => d_oacc_csr_sync_space + procedure, pass(a) :: sync => d_oacc_csr_sync + end type psb_d_oacc_csr_sparse_mat + + interface + module subroutine psb_d_oacc_csr_mold(a,b,info) + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_csr_mold + end interface + + interface + module subroutine psb_d_oacc_csr_cp_from_fmt(a,b,info) + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_csr_cp_from_fmt + end interface + + interface + module subroutine psb_d_oacc_csr_mv_from_coo(a,b,info) + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_csr_mv_from_coo + end interface + + interface + module subroutine psb_d_oacc_csr_mv_from_fmt(a,b,info) + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_csr_mv_from_fmt + end interface + + interface + module subroutine psb_d_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_csr_vect_mv + end interface + + interface + module subroutine psb_d_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x,y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_csr_inner_vect_sv + end interface + + interface + module subroutine psb_d_oacc_csr_csmm(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_csr_csmm + end interface + + interface + module subroutine psb_d_oacc_csr_csmv(alpha, a, x, beta, y, info, trans) + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_csr_csmv + end interface + + interface + module subroutine psb_d_oacc_csr_scals(d, a, info) + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_csr_scals + end interface + + interface + module subroutine psb_d_oacc_csr_scal(d,a,info,side) + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: side + end subroutine psb_d_oacc_csr_scal + end interface + + interface + module subroutine psb_d_oacc_csr_reallocate_nz(nz,a) + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + end subroutine psb_d_oacc_csr_reallocate_nz + end interface + + interface + module subroutine psb_d_oacc_csr_allocate_mnnz(m,n,a,nz) + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: m,n + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_oacc_csr_allocate_mnnz + end interface + + interface + module subroutine psb_d_oacc_csr_cp_from_coo(a,b,info) + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_csr_cp_from_coo + end interface contains - - - subroutine d_oacc_csr_free(a) - use psb_base_mod - implicit none - class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a - integer(psb_ipk_) :: info - - if (allocated(a%val)) then - !$acc exit data delete(a%val) - end if - if (allocated(a%ja)) then - !$acc exit data delete(a%ja) - end if - if (allocated(a%irp)) then - !$acc exit data delete(a%irp) - end if - - call a%psb_d_csr_sparse_mat%free() - - return - end subroutine d_oacc_csr_free - - - function d_oacc_csr_sizeof(a) result(res) - implicit none - class(psb_d_oacc_csr_sparse_mat), intent(in) :: a - integer(psb_epk_) :: res - if (a%is_dev()) call a%sync() - - res = 8 - res = res + psb_sizeof_dp * size(a%val) - res = res + psb_sizeof_ip * size(a%irp) - res = res + psb_sizeof_ip * size(a%ja) - - end function d_oacc_csr_sizeof - - - function d_oacc_csr_get_fmt() result(res) - implicit none - character(len=5) :: res - res = 'CSR_oacc' - end function d_oacc_csr_get_fmt - - subroutine d_oacc_csr_all(m, n, nz, a, info) - implicit none - integer(psb_ipk_), intent(in) :: m, n, nz - class(psb_d_oacc_csr_sparse_mat), intent(out) :: a - integer(psb_ipk_), intent(out) :: info - - info = 0 - if (allocated(a%val)) then - !$acc exit data delete(a%val) finalize - deallocate(a%val, stat=info) - end if - if (allocated(a%ja)) then - !$acc exit data delete(a%ja) finalize - deallocate(a%ja, stat=info) - end if - if (allocated(a%irp)) then - !$acc exit data delete(a%irp) finalize - deallocate(a%irp, stat=info) - end if - - call a%set_nrows(m) - call a%set_ncols(n) - - allocate(a%val(nz),stat=info) - allocate(a%ja(nz),stat=info) - allocate(a%irp(m+1),stat=info) - if (info == 0) call a%set_host() - if (info == 0) call a%sync_space() - end subroutine d_oacc_csr_all - - function d_oacc_csr_is_host(a) result(res) - implicit none - class(psb_d_oacc_csr_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_host) - end function d_oacc_csr_is_host - - function d_oacc_csr_is_sync(a) result(res) - implicit none - class(psb_d_oacc_csr_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_sync) - end function d_oacc_csr_is_sync - - function d_oacc_csr_is_dev(a) result(res) - implicit none - class(psb_d_oacc_csr_sparse_mat), intent(in) :: a - logical :: res - - res = (a%devstate == is_dev) - end function d_oacc_csr_is_dev - - subroutine d_oacc_csr_set_host(a) - implicit none - class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a - - a%devstate = is_host - end subroutine d_oacc_csr_set_host - - subroutine d_oacc_csr_set_sync(a) - implicit none - class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a - - a%devstate = is_sync - end subroutine d_oacc_csr_set_sync - - subroutine d_oacc_csr_set_dev(a) - implicit none - class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a - - a%devstate = is_dev - end subroutine d_oacc_csr_set_dev - - subroutine d_oacc_csr_sync_space(a) - implicit none - class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a - if (allocated(a%val)) then - call d_oacc_create_dev(a%val) - end if - if (allocated(a%ja)) then - call i_oacc_create_dev(a%ja) - end if - if (allocated(a%irp)) then - call i_oacc_create_dev(a%irp) - end if - contains - subroutine d_oacc_create_dev(v) - implicit none - real(psb_dpk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine d_oacc_create_dev - subroutine i_oacc_create_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc enter data copyin(v) - end subroutine i_oacc_create_dev - end subroutine d_oacc_csr_sync_space - - subroutine d_oacc_csr_sync(a) - implicit none - class(psb_d_oacc_csr_sparse_mat), target, intent(in) :: a - class(psb_d_oacc_csr_sparse_mat), pointer :: tmpa - integer(psb_ipk_) :: info - - tmpa => a - if (a%is_dev()) then - call d_oacc_csr_to_host(a%val) - call i_oacc_csr_to_host(a%ja) - call i_oacc_csr_to_host(a%irp) - else if (a%is_host()) then - call d_oacc_csr_to_dev(a%val) - call i_oacc_csr_to_dev(a%ja) - call i_oacc_csr_to_dev(a%irp) - end if - call tmpa%set_sync() - end subroutine d_oacc_csr_sync - - subroutine d_oacc_csr_to_dev(v) - implicit none - real(psb_dpk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine d_oacc_csr_to_dev - - subroutine d_oacc_csr_to_host(v) - implicit none - real(psb_dpk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine d_oacc_csr_to_host - - subroutine i_oacc_csr_to_dev(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update device(v) - end subroutine i_oacc_csr_to_dev - - subroutine i_oacc_csr_to_host(v) - implicit none - integer(psb_ipk_), intent(in) :: v(:) - !$acc update self(v) - end subroutine i_oacc_csr_to_host - - + subroutine d_oacc_csr_free(a) + use psb_base_mod + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + if (allocated(a%val)) then + !$acc exit data delete(a%val) + end if + if (allocated(a%ja)) then + !$acc exit data delete(a%ja) + end if + if (allocated(a%irp)) then + !$acc exit data delete(a%irp) + end if + + call a%psb_d_csr_sparse_mat%free() + + return + end subroutine d_oacc_csr_free + + + + function d_oacc_csr_sizeof(a) result(res) + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + + end function d_oacc_csr_sizeof + + + function d_oacc_csr_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSR_oacc' + end function d_oacc_csr_get_fmt + + subroutine d_oacc_csr_all(m, n, nz, a, info) + implicit none + integer(psb_ipk_), intent(in) :: m, n, nz + class(psb_d_oacc_csr_sparse_mat), intent(out) :: a + integer(psb_ipk_), intent(out) :: info + + info = 0 + if (allocated(a%val)) then + !$acc exit data delete(a%val) finalize + deallocate(a%val, stat=info) + end if + if (allocated(a%ja)) then + !$acc exit data delete(a%ja) finalize + deallocate(a%ja, stat=info) + end if + if (allocated(a%irp)) then + !$acc exit data delete(a%irp) finalize + deallocate(a%irp, stat=info) + end if + + call a%set_nrows(m) + call a%set_ncols(n) + + allocate(a%val(nz),stat=info) + allocate(a%ja(nz),stat=info) + allocate(a%irp(m+1),stat=info) + if (info == 0) call a%set_host() + if (info == 0) call a%sync_space() + end subroutine d_oacc_csr_all + + function d_oacc_csr_is_host(a) result(res) + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_host) + end function d_oacc_csr_is_host + + function d_oacc_csr_is_sync(a) result(res) + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_sync) + end function d_oacc_csr_is_sync + + function d_oacc_csr_is_dev(a) result(res) + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + logical :: res + + res = (a%devstate == is_dev) + end function d_oacc_csr_is_dev + + subroutine d_oacc_csr_set_host(a) + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + + a%devstate = is_host + end subroutine d_oacc_csr_set_host + + subroutine d_oacc_csr_set_sync(a) + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + + a%devstate = is_sync + end subroutine d_oacc_csr_set_sync + + subroutine d_oacc_csr_set_dev(a) + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + + a%devstate = is_dev + end subroutine d_oacc_csr_set_dev + + subroutine d_oacc_csr_sync_space(a) + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + if (allocated(a%val)) then + call d_oacc_create_dev(a%val) + end if + if (allocated(a%ja)) then + call i_oacc_create_dev(a%ja) + end if + if (allocated(a%irp)) then + call i_oacc_create_dev(a%irp) + end if + contains + subroutine d_oacc_create_dev(v) + implicit none + real(psb_dpk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine d_oacc_create_dev + subroutine i_oacc_create_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc enter data copyin(v) + end subroutine i_oacc_create_dev + end subroutine d_oacc_csr_sync_space + + subroutine d_oacc_csr_sync(a) + implicit none + class(psb_d_oacc_csr_sparse_mat), target, intent(in) :: a + class(psb_d_oacc_csr_sparse_mat), pointer :: tmpa + integer(psb_ipk_) :: info + + tmpa => a + if (a%is_dev()) then + call d_oacc_csr_to_host(a%val) + call i_oacc_csr_to_host(a%ja) + call i_oacc_csr_to_host(a%irp) + else if (a%is_host()) then + call d_oacc_csr_to_dev(a%val) + call i_oacc_csr_to_dev(a%ja) + call i_oacc_csr_to_dev(a%irp) + end if + call tmpa%set_sync() + end subroutine d_oacc_csr_sync + + subroutine d_oacc_csr_to_dev(v) + implicit none + real(psb_dpk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine d_oacc_csr_to_dev + + subroutine d_oacc_csr_to_host(v) + implicit none + real(psb_dpk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine d_oacc_csr_to_host + + subroutine i_oacc_csr_to_dev(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update device(v) + end subroutine i_oacc_csr_to_dev + + subroutine i_oacc_csr_to_host(v) + implicit none + integer(psb_ipk_), intent(in) :: v(:) + !$acc update self(v) + end subroutine i_oacc_csr_to_host + + end module psb_d_oacc_csr_mat_mod diff --git a/openacc/psb_d_oacc_vect_mod.F90 b/openacc/psb_d_oacc_vect_mod.F90 index 3139f9b8..3385f1ec 100644 --- a/openacc/psb_d_oacc_vect_mod.F90 +++ b/openacc/psb_d_oacc_vect_mod.F90 @@ -49,8 +49,8 @@ module psb_d_oacc_vect_mod procedure, pass(z) :: abgdxyz => d_oacc_abgdxyz procedure, pass(y) :: mlt_a => d_oacc_mlt_a procedure, pass(z) :: mlt_a_2 => d_oacc_mlt_a_2 - procedure, pass(y) :: mlt_v => d_oacc_mlt_v - procedure, pass(z) :: mlt_v_2 => d_oacc_mlt_v_2 + procedure, pass(y) :: mlt_v => psb_d_oacc_mlt_v + procedure, pass(z) :: mlt_v_2 => psb_d_oacc_mlt_v_2 procedure, pass(x) :: scal => d_oacc_scal procedure, pass(x) :: nrm2 => d_oacc_nrm2 procedure, pass(x) :: amax => d_oacc_amax @@ -63,19 +63,17 @@ module psb_d_oacc_vect_mod real(psb_dpk_), allocatable :: v1(:),v2(:),p(:) interface - subroutine d_oacc_mlt_v(x, y, info) - import + module subroutine psb_d_oacc_mlt_v(x, y, info) implicit none class(psb_d_base_vect_type), intent(inout) :: x class(psb_d_vect_oacc), intent(inout) :: y integer(psb_ipk_), intent(out) :: info - end subroutine d_oacc_mlt_v + end subroutine psb_d_oacc_mlt_v end interface interface - subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) - import + module subroutine psb_d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) implicit none real(psb_dpk_), intent(in) :: alpha, beta class(psb_d_base_vect_type), intent(inout) :: x @@ -83,7 +81,7 @@ module psb_d_oacc_vect_mod class(psb_d_vect_oacc), intent(inout) :: z integer(psb_ipk_), intent(out) :: info character(len=1), intent(in), optional :: conjgx, conjgy - end subroutine d_oacc_mlt_v_2 + end subroutine psb_d_oacc_mlt_v_2 end interface contains