From 2b5f09ddf9f746267c2e88530bbb708b89a532dd Mon Sep 17 00:00:00 2001 From: tloloum Date: Fri, 12 Jul 2024 09:26:06 +0200 Subject: [PATCH] all methods implementations for psb_d_oacc_csr_sparse_mat --- openacc/Makefile | 10 +- openacc/impl/Makefile | 8 +- openacc/impl/psb_d_oacc_csr_allocate_mnnz.F90 | 34 +++ openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 | 25 +++ openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 | 23 +++ openacc/impl/psb_d_oacc_csr_csmm.F90 | 86 ++++++++ openacc/impl/psb_d_oacc_csr_csmv.F90 | 81 ++++++++ openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90 | 84 ++++++++ openacc/impl/psb_d_oacc_csr_mold.F90 | 33 +++ openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 | 24 +++ openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 | 23 +++ openacc/impl/psb_d_oacc_csr_reallocate_nz.F90 | 27 +++ openacc/impl/psb_d_oacc_csr_scal.F90 | 52 +++++ openacc/impl/psb_d_oacc_csr_scals.F90 | 32 +++ openacc/psb_d_oacc_csr_mat_mod.F90 | 194 +++++++++++++++++- 15 files changed, 719 insertions(+), 17 deletions(-) create mode 100644 openacc/impl/psb_d_oacc_csr_allocate_mnnz.F90 create mode 100644 openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 create mode 100644 openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 create mode 100644 openacc/impl/psb_d_oacc_csr_csmm.F90 create mode 100644 openacc/impl/psb_d_oacc_csr_csmv.F90 create mode 100644 openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90 create mode 100644 openacc/impl/psb_d_oacc_csr_mold.F90 create mode 100644 openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 create mode 100644 openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 create mode 100644 openacc/impl/psb_d_oacc_csr_reallocate_nz.F90 create mode 100644 openacc/impl/psb_d_oacc_csr_scal.F90 create mode 100644 openacc/impl/psb_d_oacc_csr_scals.F90 diff --git a/openacc/Makefile b/openacc/Makefile index aa6ea23d..d1311fe2 100644 --- a/openacc/Makefile +++ b/openacc/Makefile @@ -1,34 +1,32 @@ include ../Make.inc -# Compilers and flags + #CC=mpicc #FC=mpif90 #FCOPT=-O0 -march=native #OFFLOAD=-fopenacc -foffload=nvptx-none="-march=sm_70" -# Directories LIBDIR=../lib INCDIR=../include MODDIR=../modules IMPLDIR=./impl -# Include and library paths INCLUDES=-I$(LIBDIR) -I$(INCDIR) -I$(MODDIR) FINCLUDES=$(FMFLAG). $(FMFLAG)$(INCDIR) $(FMFLAG)$(MODDIR) $(FIFLAG). CINCLUDES= #LIBS=-L$(LIBDIR) -lpsb_util -lpsb_ext -lpsb_base -lopenblas -lmetis -# Source files + FOBJS= psb_i_oacc_vect_mod.o psb_d_oacc_vect_mod.o \ psb_oacc_mod.o psb_d_oacc_csr_mat_mod.o \ psb_oacc_env_mod.o -# Library name + LIBNAME=libpsb_openacc.a OBJS=$(COBJS) $(FOBJS) -# Rules + lib: objs ilib ar cur $(LIBNAME) $(OBJS) /bin/cp -p $(LIBNAME) $(LIBDIR) diff --git a/openacc/impl/Makefile b/openacc/impl/Makefile index 7bab6654..ff0e8251 100755 --- a/openacc/impl/Makefile +++ b/openacc/impl/Makefile @@ -9,7 +9,13 @@ MODDIR=../../modules FINCLUDES=$(FMFLAG).. $(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FIFLAG).. LIBNAME=libpsb_openacc.a -OBJS= psb_d_oacc_csr_vect_mv.o psb_d_oacc_mlt_v_2.o psb_d_oacc_mlt_v.o +OBJS= psb_d_oacc_csr_vect_mv.o psb_d_oacc_csr_inner_vect_sv.o \ + psb_d_oacc_csr_csmm.o psb_d_oacc_csr_csmv.o psb_d_oacc_csr_scals.o \ + psb_d_oacc_csr_scal.o psb_d_oacc_csr_allocate_mnnz.o \ + psb_d_oacc_csr_reallocate_nz.o psb_d_oacc_csr_cp_from_coo.o \ + psb_d_oacc_csr_cp_from_fmt.o psb_d_oacc_csr_mv_from_coo \ + psb_d_oacc_csr_mv_from_fmt psb_d_oacc_csr_mold.o \ + psb_d_oacc_mlt_v_2.o psb_d_oacc_mlt_v.o objs: $(OBJS) diff --git a/openacc/impl/psb_d_oacc_csr_allocate_mnnz.F90 b/openacc/impl/psb_d_oacc_csr_allocate_mnnz.F90 new file mode 100644 index 00000000..a0b8abe0 --- /dev/null +++ b/openacc/impl/psb_d_oacc_csr_allocate_mnnz.F90 @@ -0,0 +1,34 @@ +subroutine psb_d_oacc_csr_allocate_mnnz(m, n, a, nz) + use psb_base_mod + use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_allocate_mnnz + implicit none + integer(psb_ipk_), intent(in) :: m, n + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in), optional :: nz + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act, nz_ + character(len=20) :: name='allocate_mnz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_d_csr_sparse_mat%allocate(m, n, nz) + + if (.not.allocated(a%val)) then + allocate(a%val(nz)) + allocate(a%ja(nz)) + allocate(a%irp(m+1)) + end if + + call a%set_dev() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + + 9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_oacc_csr_allocate_mnnz + \ No newline at end of file diff --git a/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 b/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 new file mode 100644 index 00000000..c84c5876 --- /dev/null +++ b/openacc/impl/psb_d_oacc_csr_cp_from_coo.F90 @@ -0,0 +1,25 @@ +subroutine psb_d_oacc_csr_cp_from_coo(a, b, info) + use psb_base_mod + use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_cp_from_coo + implicit none + + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_d_csr_sparse_mat%cp_from_coo(b, info) + if (info /= 0) goto 9999 + + call a%set_dev() + if (info /= 0) goto 9999 + + return + + 9999 continue + info = psb_err_alloc_dealloc_ + return + + end subroutine psb_d_oacc_csr_cp_from_coo + \ No newline at end of file diff --git a/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 b/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 new file mode 100644 index 00000000..2eae41d7 --- /dev/null +++ b/openacc/impl/psb_d_oacc_csr_cp_from_fmt.F90 @@ -0,0 +1,23 @@ +subroutine psb_d_oacc_csr_cp_from_fmt(a, b, info) + use psb_base_mod + use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_cp_from_fmt + implicit none + + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_d_coo_sparse_mat) + call a%cp_from_coo(b, info) + class default + call a%psb_d_csr_sparse_mat%cp_from_fmt(b, info) + if (info /= 0) return + + !$acc update device(a%val, a%ja, a%irp) + end select + +end subroutine psb_d_oacc_csr_cp_from_fmt + \ No newline at end of file diff --git a/openacc/impl/psb_d_oacc_csr_csmm.F90 b/openacc/impl/psb_d_oacc_csr_csmm.F90 new file mode 100644 index 00000000..e7224764 --- /dev/null +++ b/openacc/impl/psb_d_oacc_csr_csmm.F90 @@ -0,0 +1,86 @@ +subroutine psb_d_oacc_csr_csmm(alpha, a, x, beta, y, info, trans) + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_csmm + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + real(psb_dpk_), intent(in) :: x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i, j, m, n,k, nxy + logical :: tra + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'd_oacc_csmm' + logical, parameter :: debug = .false. + + info = psb_success_ + call psb_erractionsave(err_act) + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) < n) then + info = 36 + call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) + goto 9999 + end if + + if (size(y,1) < m) then + info = 36 + call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) + goto 9999 + end if + + if (tra) then + call a%psb_d_csr_sparse_mat%spmm(alpha, x, beta, y, info, trans) + else + nxy = min(size(x,2), size(y,2)) + + !$acc parallel loop collapse(2) present(a, x, y) + do j = 1, nxy + do i = 1, m + y(i,j) = beta * y(i,j) + end do + end do + + !$acc parallel loop collapse(2) present(a, x, y) + do j = 1, nxy + do i = 1, n + do k = a%irp(i), a%irp(i+1) - 1 + y(a%ja(k), j) = y(a%ja(k), j) + alpha * a%val(k) * x(i, j) + end do + end do + end do + endif + + call psb_erractionrestore(err_act) + return + + 9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_csr_csmm + \ No newline at end of file diff --git a/openacc/impl/psb_d_oacc_csr_csmv.F90 b/openacc/impl/psb_d_oacc_csr_csmv.F90 new file mode 100644 index 00000000..c3291acf --- /dev/null +++ b/openacc/impl/psb_d_oacc_csr_csmv.F90 @@ -0,0 +1,81 @@ +subroutine psb_d_oacc_csr_csmv(alpha, a, x, beta, y, info, trans) + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_csmv + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + real(psb_dpk_), intent(in) :: x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + character :: trans_ + integer(psb_ipk_) :: i, j, m, n + logical :: tra + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'd_oacc_csmv' + logical, parameter :: debug = .false. + + call psb_erractionsave(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra) then + m = a%get_ncols() + n = a%get_nrows() + else + n = a%get_ncols() + m = a%get_nrows() + end if + + if (size(x,1) < n) then + info = 36 + call psb_errpush(info, name, i_err = (/3 * ione, n, izero, izero, izero/)) + goto 9999 + end if + + if (size(y,1) < m) then + info = 36 + call psb_errpush(info, name, i_err = (/5 * ione, m, izero, izero, izero/)) + goto 9999 + end if + + if (tra) then + call a%psb_d_csr_sparse_mat%spmm(alpha, x, beta, y, info, trans) + else + !$acc parallel loop present(a, x, y) + do i = 1, m + y(i) = beta * y(i) + end do + + !$acc parallel loop present(a, x, y) + do i = 1, n + do j = a%irp(i), a%irp(i+1) - 1 + y(a%ja(j)) = y(a%ja(j)) + alpha * a%val(j) * x(i) + end do + end do + endif + + call psb_erractionrestore(err_act) + return + + 9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_csr_csmv + \ No newline at end of file diff --git a/openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90 b/openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90 new file mode 100644 index 00000000..c0461c82 --- /dev/null +++ b/openacc/impl/psb_d_oacc_csr_inner_vect_sv.F90 @@ -0,0 +1,84 @@ +subroutine psb_d_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans) + use psb_base_mod + use elldev_mod + use psb_vectordev_mod + use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_inner_vect_sv + use psb_d_oacc_vect_mod + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x, y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + + real(psb_dpk_), allocatable :: rx(:), ry(:) + logical :: tra + character :: trans_ + integer(psb_ipk_) :: err_act + character(len=20) :: name = 'd_oacc_csr_inner_vect_sv' + logical, parameter :: debug = .false. + integer(psb_ipk_) :: i + + call psb_get_erraction(err_act) + info = psb_success_ + + if (present(trans)) then + trans_ = trans + else + trans_ = 'N' + end if + + if (.not.a%is_asb()) then + info = psb_err_invalid_mat_state_ + call psb_errpush(info, name) + goto 9999 + endif + + tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C') + + if (tra .or. (beta /= dzero)) then + call x%sync() + call y%sync() + call a%psb_d_csr_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans) + call y%set_host() + else + select type (xx => x) + type is (psb_d_vect_oacc) + select type(yy => y) + type is (psb_d_vect_oacc) + if (xx%is_host()) call xx%sync() + if (beta /= dzero) then + if (yy%is_host()) call yy%sync() + end if + !$acc parallel loop present(a, xx, yy) + do i = 1, size(a%val) + yy%v(i) = alpha * a%val(i) * xx%v(a%ja(i)) + beta * yy%v(i) + end do + call yy%set_dev() + class default + rx = xx%get_vect() + ry = y%get_vect() + call a%psb_d_csr_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + class default + rx = x%get_vect() + ry = y%get_vect() + call a%psb_d_csr_sparse_mat%inner_spsm(alpha, rx, beta, ry, info) + call y%bld(ry) + end select + endif + + if (info /= psb_success_) then + info = psb_err_from_subroutine_ + call psb_errpush(info, name, a_err = 'csrg_vect_sv') + goto 9999 + endif + + call psb_erractionrestore(err_act) + return + + 9999 call psb_error_handler(err_act) + return + end subroutine psb_d_oacc_csr_inner_vect_sv + \ No newline at end of file diff --git a/openacc/impl/psb_d_oacc_csr_mold.F90 b/openacc/impl/psb_d_oacc_csr_mold.F90 new file mode 100644 index 00000000..08598cb6 --- /dev/null +++ b/openacc/impl/psb_d_oacc_csr_mold.F90 @@ -0,0 +1,33 @@ +subroutine psb_d_oacc_csr_mold(a, b, info) + use psb_base_mod + use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_mold + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='csr_mold' + logical, parameter :: debug=.false. + + call psb_get_erraction(err_act) + + info = 0 + if (allocated(b)) then + call b%free() + deallocate(b, stat=info) + end if + if (info == 0) allocate(psb_d_oacc_csr_sparse_mat :: b, stat=info) + + if (info /= psb_success_) then + info = psb_err_alloc_dealloc_ + call psb_errpush(info, name) + goto 9999 + end if + return + + 9999 call psb_error_handler(err_act) + + return + + end subroutine psb_d_oacc_csr_mold + \ No newline at end of file diff --git a/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 b/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 new file mode 100644 index 00000000..5f4a6b41 --- /dev/null +++ b/openacc/impl/psb_d_oacc_csr_mv_from_coo.F90 @@ -0,0 +1,24 @@ +subroutine psb_d_oacc_csr_mv_from_coo(a, b, info) + use psb_base_mod + use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_mv_from_coo + implicit none + + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + call a%psb_d_csr_sparse_mat%mv_from_coo(b, info) + if (info /= 0) goto 9999 + + !$acc update device(a%val, a%ja, a%irp) + + return + + 9999 continue + info = psb_err_alloc_dealloc_ + return + +end subroutine psb_d_oacc_csr_mv_from_coo + \ No newline at end of file diff --git a/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 b/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 new file mode 100644 index 00000000..16a4636b --- /dev/null +++ b/openacc/impl/psb_d_oacc_csr_mv_from_fmt.F90 @@ -0,0 +1,23 @@ +subroutine psb_d_oacc_csr_mv_from_fmt(a, b, info) + use psb_base_mod + use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_mv_from_fmt + implicit none + + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + + info = psb_success_ + + select type(b) + type is (psb_d_coo_sparse_mat) + call a%mv_from_coo(b, info) + class default + call a%psb_d_csr_sparse_mat%mv_from_fmt(b, info) + if (info /= 0) return + + !$acc update device(a%val, a%ja, a%irp) + end select + +end subroutine psb_d_oacc_csr_mv_from_fmt + \ No newline at end of file diff --git a/openacc/impl/psb_d_oacc_csr_reallocate_nz.F90 b/openacc/impl/psb_d_oacc_csr_reallocate_nz.F90 new file mode 100644 index 00000000..7a6723d0 --- /dev/null +++ b/openacc/impl/psb_d_oacc_csr_reallocate_nz.F90 @@ -0,0 +1,27 @@ +subroutine psb_d_oacc_csr_reallocate_nz(nz, a) + use psb_base_mod + use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_reallocate_nz + implicit none + integer(psb_ipk_), intent(in) :: nz + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + integer(psb_ipk_) :: err_act + character(len=20) :: name='d_oacc_csr_reallocate_nz' + logical, parameter :: debug=.false. + + call psb_erractionsave(err_act) + info = psb_success_ + + call a%psb_d_csr_sparse_mat%reallocate(nz) + + call a%set_dev() + if (info /= 0) goto 9999 + + call psb_erractionrestore(err_act) + return + + 9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_csr_reallocate_nz + \ No newline at end of file diff --git a/openacc/impl/psb_d_oacc_csr_scal.F90 b/openacc/impl/psb_d_oacc_csr_scal.F90 new file mode 100644 index 00000000..b2bf12e8 --- /dev/null +++ b/openacc/impl/psb_d_oacc_csr_scal.F90 @@ -0,0 +1,52 @@ +subroutine psb_d_oacc_csr_scal(d, a, info, side) + use psb_base_mod + use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_scal + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, intent(in), optional :: side + + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i, j + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + if (present(side)) then + if (side == 'L') then + !$acc parallel loop present(a, d) + do i = 1, a%get_nrows() + do j = a%irp(i), a%irp(i+1) - 1 + a%val(j) = a%val(j) * d(i) + end do + end do + else if (side == 'R') then + !$acc parallel loop present(a, d) + do i = 1, a%get_ncols() + do j = a%irp(i), a%irp(i+1) - 1 + a%val(j) = a%val(j) * d(a%ja(j)) + end do + end do + end if + else + !$acc parallel loop present(a, d) + do i = 1, size(a%val) + a%val(i) = a%val(i) * d(i) + end do + end if + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + + 9999 call psb_error_handler(err_act) + return + + end subroutine psb_d_oacc_csr_scal + \ No newline at end of file diff --git a/openacc/impl/psb_d_oacc_csr_scals.F90 b/openacc/impl/psb_d_oacc_csr_scals.F90 new file mode 100644 index 00000000..8c77f647 --- /dev/null +++ b/openacc/impl/psb_d_oacc_csr_scals.F90 @@ -0,0 +1,32 @@ +subroutine psb_d_oacc_csr_scals(d, a, info) + use psb_base_mod + use psb_d_oacc_csr_mat_mod, psb_protect_name => psb_d_oacc_csr_scals + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + + integer(psb_ipk_) :: err_act + character(len=20) :: name='scal' + logical, parameter :: debug=.false. + integer(psb_ipk_) :: i + + info = psb_success_ + call psb_erractionsave(err_act) + + if (a%is_host()) call a%sync() + + !$acc parallel loop present(a) + do i = 1, size(a%val) + a%val(i) = a%val(i) * d + end do + + call a%set_dev() + + call psb_erractionrestore(err_act) + return + +9999 call psb_error_handler(err_act) + return + +end subroutine psb_d_oacc_csr_scals diff --git a/openacc/psb_d_oacc_csr_mat_mod.F90 b/openacc/psb_d_oacc_csr_mat_mod.F90 index d9c8a2a6..b8eb1be9 100644 --- a/openacc/psb_d_oacc_csr_mat_mod.F90 +++ b/openacc/psb_d_oacc_csr_mat_mod.F90 @@ -12,18 +12,69 @@ module psb_d_oacc_csr_mat_mod type, extends(psb_d_csr_sparse_mat) :: psb_d_oacc_csr_sparse_mat integer(psb_ipk_) :: devstate = is_host contains - procedure, pass(a) :: all => d_oacc_csr_all - procedure, pass(a) :: is_host => d_oacc_csr_is_host - procedure, pass(a) :: is_sync => d_oacc_csr_is_sync - procedure, pass(a) :: is_dev => d_oacc_csr_is_dev - procedure, pass(a) :: set_host => d_oacc_csr_set_host - procedure, pass(a) :: set_sync => d_oacc_csr_set_sync - procedure, pass(a) :: set_dev => d_oacc_csr_set_dev - procedure, pass(a) :: sync_space => d_oacc_csr_sync_space - procedure, pass(a) :: sync => d_oacc_csr_sync - procedure, pass(a) :: vect_mv => psb_d_oacc_csr_vect_mv + procedure, nopass :: get_fmt => d_oacc_csr_get_fmt + procedure, pass(a) :: sizeof => d_oacc_csr_sizeof + procedure, pass(a) :: vect_mv => psb_d_oacc_csr_vect_mv + procedure, pass(a) :: in_vect_sv => psb_d_oacc_csr_inner_vect_sv + procedure, pass(a) :: csmm => psb_d_oacc_csr_csmm + procedure, pass(a) :: csmv => psb_d_oacc_csr_csmv + procedure, pass(a) :: scals => psb_d_oacc_csr_scals + procedure, pass(a) :: scalv => psb_d_oacc_csr_scal + procedure, pass(a) :: reallocate_nz => psb_d_oacc_csr_reallocate_nz + procedure, pass(a) :: allocate_mnnz => psb_d_oacc_csr_allocate_mnnz + procedure, pass(a) :: cp_from_coo => psb_d_oacc_csr_cp_from_coo + procedure, pass(a) :: cp_from_fmt => psb_d_oacc_csr_cp_from_fmt + procedure, pass(a) :: mv_from_coo => psb_d_oacc_csr_mv_from_coo + procedure, pass(a) :: mv_from_fmt => psb_d_oacc_csr_mv_from_fmt + procedure, pass(a) :: free => d_oacc_csr_free + procedure, pass(a) :: mold => psb_d_oacc_csr_mold + procedure, pass(a) :: all => d_oacc_csr_all + procedure, pass(a) :: is_host => d_oacc_csr_is_host + procedure, pass(a) :: is_sync => d_oacc_csr_is_sync + procedure, pass(a) :: is_dev => d_oacc_csr_is_dev + procedure, pass(a) :: set_host => d_oacc_csr_set_host + procedure, pass(a) :: set_sync => d_oacc_csr_set_sync + procedure, pass(a) :: set_dev => d_oacc_csr_set_dev + procedure, pass(a) :: sync_space => d_oacc_csr_sync_space + procedure, pass(a) :: sync => d_oacc_csr_sync end type psb_d_oacc_csr_sparse_mat + interface + subroutine psb_d_oacc_csr_mold(a,b,info) + import :: psb_d_oacc_csr_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + class(psb_d_base_sparse_mat), intent(inout), allocatable :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_csr_mold + end interface + + interface + subroutine psb_d_oacc_csr_cp_from_fmt(a,b,info) + import :: psb_d_oacc_csr_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_csr_cp_from_fmt + end interface + + interface + subroutine psb_d_oacc_csr_mv_from_coo(a,b,info) + import :: psb_d_oacc_csr_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_csr_mv_from_coo + end interface + + interface + subroutine psb_d_oacc_csr_mv_from_fmt(a,b,info) + import :: psb_d_oacc_csr_sparse_mat, psb_d_base_sparse_mat, psb_ipk_ + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_d_base_sparse_mat), intent(inout) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_csr_mv_from_fmt + end interface + interface subroutine psb_d_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans) import :: psb_d_oacc_csr_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ @@ -35,7 +86,130 @@ module psb_d_oacc_csr_mat_mod end subroutine psb_d_oacc_csr_vect_mv end interface + interface + subroutine psb_d_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans) + import :: psb_d_oacc_csr_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta + class(psb_d_base_vect_type), intent(inout) :: x,y + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_csr_inner_vect_sv + end interface + + interface + subroutine psb_d_oacc_csr_csmm(alpha, a, x, beta, y, info, trans) + import :: psb_d_oacc_csr_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:,:) + real(psb_dpk_), intent(inout) :: y(:,:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_csr_csmm + end interface + + interface + subroutine psb_d_oacc_csr_csmv(alpha, a, x, beta, y, info, trans) + import :: psb_d_oacc_csr_sparse_mat, psb_dpk_, psb_d_base_vect_type, psb_ipk_ + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + real(psb_dpk_), intent(in) :: alpha, beta, x(:) + real(psb_dpk_), intent(inout) :: y(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: trans + end subroutine psb_d_oacc_csr_csmv + end interface + + interface + subroutine psb_d_oacc_csr_scals(d, a, info) + import :: psb_d_oacc_csr_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_csr_scals + end interface + + interface + subroutine psb_d_oacc_csr_scal(d,a,info,side) + import :: psb_d_oacc_csr_sparse_mat, psb_dpk_, psb_ipk_ + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + real(psb_dpk_), intent(in) :: d(:) + integer(psb_ipk_), intent(out) :: info + character, optional, intent(in) :: side + end subroutine psb_d_oacc_csr_scal + end interface + + interface + subroutine psb_d_oacc_csr_reallocate_nz(nz,a) + import :: psb_d_oacc_csr_sparse_mat, psb_ipk_ + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: nz + end subroutine psb_d_oacc_csr_reallocate_nz + end interface + + interface + subroutine psb_d_oacc_csr_allocate_mnnz(m,n,a,nz) + import :: psb_d_oacc_csr_sparse_mat, psb_ipk_ + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_), intent(in) :: m,n + integer(psb_ipk_), intent(in), optional :: nz + end subroutine psb_d_oacc_csr_allocate_mnnz + end interface + + interface + subroutine psb_d_oacc_csr_cp_from_coo(a,b,info) + import :: psb_d_oacc_csr_sparse_mat, psb_d_coo_sparse_mat, psb_ipk_ + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + class(psb_d_coo_sparse_mat), intent(in) :: b + integer(psb_ipk_), intent(out) :: info + end subroutine psb_d_oacc_csr_cp_from_coo + end interface + contains + + + subroutine d_oacc_csr_free(a) + use psb_base_mod + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a + integer(psb_ipk_) :: info + + if (allocated(a%val)) then + !$acc exit data delete(a%val) + end if + if (allocated(a%ja)) then + !$acc exit data delete(a%ja) + end if + if (allocated(a%irp)) then + !$acc exit data delete(a%irp) + end if + + call a%psb_d_csr_sparse_mat%free() + + return + end subroutine d_oacc_csr_free + + + + function d_oacc_csr_sizeof(a) result(res) + implicit none + class(psb_d_oacc_csr_sparse_mat), intent(in) :: a + integer(psb_epk_) :: res + + if (a%is_dev()) call a%sync() + + res = 8 + res = res + psb_sizeof_dp * size(a%val) + res = res + psb_sizeof_ip * size(a%irp) + res = res + psb_sizeof_ip * size(a%ja) + + end function d_oacc_csr_sizeof + + + function d_oacc_csr_get_fmt() result(res) + implicit none + character(len=5) :: res + res = 'CSR_oacc' + end function d_oacc_csr_get_fmt subroutine d_oacc_csr_all(m, n, nz, a, info) implicit none