From 93c9df0277619a4a3d8d56d4acfd99cb717a79a4 Mon Sep 17 00:00:00 2001 From: sfilippone Date: Tue, 9 Jul 2024 10:03:57 +0200 Subject: [PATCH] Adjust Makefiles & source --- openacc/Makefile | 61 +- openacc/psb_d_oacc_csr_mat_mod.F90 | 2 +- openacc/psb_d_oacc_vect_mod.F90 | 1702 ++++++++++++++-------------- 3 files changed, 888 insertions(+), 877 deletions(-) diff --git a/openacc/Makefile b/openacc/Makefile index 3590249c..3327610f 100644 --- a/openacc/Makefile +++ b/openacc/Makefile @@ -1,11 +1,10 @@ -.SUFFIXES: -.SUFFIXES: .F90 .f90 .o .s .c +include ../Make.inc # Compilers and flags -CC=mpicc -FC=mpif90 -FCOPT=-O0 -march=native -OFFLOAD=-fopenacc -foffload=nvptx-none="-march=sm_70" +#CC=mpicc +#FC=mpif90 +#FCOPT=-O0 -march=native +#OFFLOAD=-fopenacc -foffload=nvptx-none="-march=sm_70" # Directories LIBDIR=../lib @@ -14,35 +13,49 @@ MODDIR=../modules IMPLDIR=./impl # Adding the impl directory # Include and library paths -INCLUDES=-I$(LIBDIR) -I$(INCDIR) -I$(MODDIR) -I$(IMPLDIR) -LIBS=-L$(LIBDIR) -lpsb_util -lpsb_ext -lpsb_base -lopenblas -lmetis +INCLUDES=-I$(LIBDIR) -I$(INCDIR) -I$(MODDIR) +FINCLUDES=$(FMFLAG). $(FMFLAG)$(INCDIR) $(FMFLAG)$(MODDIR) $(FIFLAG). +CINCLUDES= +#LIBS=-L$(LIBDIR) -lpsb_util -lpsb_ext -lpsb_base -lopenblas -lmetis # Source files FOBJS= psb_i_oacc_vect_mod.o psb_d_oacc_vect_mod.o \ - psb_oacc_mod.o psb_d_oacc_csr_mat_mod.o \ - impl/psb_d_oacc_csr_vect_mv.o + psb_oacc_mod.o psb_d_oacc_csr_mat_mod.o # Library name LIBNAME=libpsb_openacc.a -# Rules -all: $(LIBNAME) +OBJS=$(COBJS) $(FOBJS) -$(LIBNAME): $(FOBJS) - ar cr $(LIBNAME) $(FOBJS) +# Rules +lib: objs ilib + ar cur $(LIBNAME) $(OBJS) /bin/cp -p $(LIBNAME) $(LIBDIR) -clean: - /bin/rm -fr *.o $(LIBNAME) *.mod impl/*.o +objs: $(OBJS) iobjs + /bin/cp -p *$(.mod) $(MODDIR) -.f90.o: - $(FC) $(FCOPT) $(OFFLOAD) $(INCLUDES) -c $< -o $@ +iobjs: $(OBJS) + $(MAKE) -C impl objs -.c.o: - $(CC) -c $< -o $@ +ilib: $(OBJS) + $(MAKE) -C impl lib -.F90.o: - $(FC) $(FCOPT) $(OFFLOAD) $(INCLUDES) -c $< -o $@ +psb_oacc_mod.o : psb_i_oacc_vect_mod.o psb_d_oacc_vect_mod.o \ + psb_d_oacc_csr_mat_mod.o +clean: cclean iclean + /bin/rm -f $(FOBJS) *$(.mod) *.a +veryclean: clean +cclean: + /bin/rm -f $(COBJS) +iclean: + $(MAKE) -C impl clean -.F90.s: - $(FC) $(FCOPT) $(INCLUDES) -c -S $< -o $@ +.c.o: + $(CC) $(CCOPT) $(CCOPENACC) $(CINCLUDES) $(CDEFINES) -c $< -o $@ +.f90.o: + $(FC) $(FCOPT) $(FCOPENACC) $(FINCLUDES) -c $< -o $@ +.F90.o: + $(FC) $(FCOPT) $(FCOPENACC) $(FINCLUDES) $(FDEFINES) -c $< -o $@ +.cpp.o: + $(CXX) $(CXXOPT) $(CXXOPENACC) $(CXXINCLUDES) $(CXXDEFINES) -c $< -o $@ diff --git a/openacc/psb_d_oacc_csr_mat_mod.F90 b/openacc/psb_d_oacc_csr_mat_mod.F90 index c6b7e3d0..d9c8a2a6 100644 --- a/openacc/psb_d_oacc_csr_mat_mod.F90 +++ b/openacc/psb_d_oacc_csr_mat_mod.F90 @@ -3,7 +3,7 @@ module psb_d_oacc_csr_mat_mod use iso_c_binding use psb_d_mat_mod use psb_d_oacc_vect_mod - use oaccsparse_mod + !use oaccsparse_mod integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 diff --git a/openacc/psb_d_oacc_vect_mod.F90 b/openacc/psb_d_oacc_vect_mod.F90 index ac5428f3..eda804ce 100644 --- a/openacc/psb_d_oacc_vect_mod.F90 +++ b/openacc/psb_d_oacc_vect_mod.F90 @@ -3,9 +3,9 @@ module psb_d_oacc_vect_mod use psb_const_mod use psb_error_mod use psb_d_vect_mod - use psb_i_oacc_vect_mod use psb_i_vect_mod - + use psb_i_oacc_vect_mod + integer(psb_ipk_), parameter, private :: is_host = -1 integer(psb_ipk_), parameter, private :: is_sync = 0 integer(psb_ipk_), parameter, private :: is_dev = 1 @@ -14,49 +14,49 @@ module psb_d_oacc_vect_mod integer :: state = is_host contains - procedure, pass(x) :: get_nrows => d_oacc_get_nrows - procedure, nopass :: get_fmt => d_oacc_get_fmt - - procedure, pass(x) :: all => d_oacc_vect_all - procedure, pass(x) :: zero => d_oacc_zero - procedure, pass(x) :: asb_m => d_oacc_asb_m +!!$ procedure, pass(x) :: get_nrows => d_oacc_get_nrows +!!$ procedure, nopass :: get_fmt => d_oacc_get_fmt +!!$ +!!$ procedure, pass(x) :: all => d_oacc_vect_all +!!$ procedure, pass(x) :: zero => d_oacc_zero +!!$ procedure, pass(x) :: asb_m => d_oacc_asb_m procedure, pass(x) :: sync => d_oacc_sync procedure, pass(x) :: sync_space => d_oacc_sync_space - procedure, pass(x) :: bld_x => d_oacc_bld_x - procedure, pass(x) :: bld_mn => d_oacc_bld_mn - procedure, pass(x) :: free => d_oacc_vect_free - procedure, pass(x) :: ins_a => d_oacc_ins_a - procedure, pass(x) :: ins_v => d_oacc_ins_v +!!$ procedure, pass(x) :: bld_x => d_oacc_bld_x +!!$ procedure, pass(x) :: bld_mn => d_oacc_bld_mn +!!$ procedure, pass(x) :: free => d_oacc_vect_free +!!$ procedure, pass(x) :: ins_a => d_oacc_ins_a +!!$ procedure, pass(x) :: ins_v => d_oacc_ins_v procedure, pass(x) :: is_host => d_oacc_is_host procedure, pass(x) :: is_dev => d_oacc_is_dev procedure, pass(x) :: is_sync => d_oacc_is_sync procedure, pass(x) :: set_host => d_oacc_set_host procedure, pass(x) :: set_dev => d_oacc_set_dev procedure, pass(x) :: set_sync => d_oacc_set_sync - procedure, pass(x) :: set_scal => d_oacc_set_scal - - procedure, pass(x) :: gthzv_x => d_oacc_gthzv_x - procedure, pass(x) :: gthzbuf_x => d_oacc_gthzbuf - procedure, pass(y) :: sctb => d_oacc_sctb - procedure, pass(y) :: sctb_x => d_oacc_sctb_x - procedure, pass(y) :: sctb_buf => d_oacc_sctb_buf - - procedure, pass(x) :: get_size => oacc_get_size - procedure, pass(x) :: dot_v => d_oacc_vect_dot - procedure, pass(x) :: dot_a => d_oacc_dot_a - procedure, pass(y) :: axpby_v => d_oacc_axpby_v - procedure, pass(y) :: axpby_a => d_oacc_axpby_a - procedure, pass(z) :: abgdxyz => d_oacc_abgdxyz - procedure, pass(y) :: mlt_v => d_oacc_mlt_v - procedure, pass(y) :: mlt_a => d_oacc_mlt_a - procedure, pass(z) :: mlt_a_2 => d_oacc_mlt_a_2 - procedure, pass(z) :: mlt_v_2 => d_oacc_mlt_v_2 - procedure, pass(x) :: scal => d_oacc_scal - procedure, pass(x) :: nrm2 => d_oacc_nrm2 - procedure, pass(x) :: amax => d_oacc_amax - procedure, pass(x) :: asum => d_oacc_asum +!!$ procedure, pass(x) :: set_scal => d_oacc_set_scal +!!$ +!!$ procedure, pass(x) :: gthzv_x => d_oacc_gthzv_x +!!$ procedure, pass(x) :: gthzbuf_x => d_oacc_gthzbuf +!!$ procedure, pass(y) :: sctb => d_oacc_sctb +!!$ procedure, pass(y) :: sctb_x => d_oacc_sctb_x +!!$ procedure, pass(y) :: sctb_buf => d_oacc_sctb_buf +!!$ +!!$ procedure, pass(x) :: get_size => oacc_get_size +!!$ procedure, pass(x) :: dot_v => d_oacc_vect_dot +!!$ procedure, pass(x) :: dot_a => d_oacc_dot_a +!!$ procedure, pass(y) :: axpby_v => d_oacc_axpby_v +!!$ procedure, pass(y) :: axpby_a => d_oacc_axpby_a +!!$ procedure, pass(z) :: abgdxyz => d_oacc_abgdxyz +!!$ procedure, pass(y) :: mlt_v => d_oacc_mlt_v +!!$ procedure, pass(y) :: mlt_a => d_oacc_mlt_a +!!$ procedure, pass(z) :: mlt_a_2 => d_oacc_mlt_a_2 +!!$ procedure, pass(z) :: mlt_v_2 => d_oacc_mlt_v_2 +!!$ procedure, pass(x) :: scal => d_oacc_scal +!!$ procedure, pass(x) :: nrm2 => d_oacc_nrm2 +!!$ procedure, pass(x) :: amax => d_oacc_amax +!!$ procedure, pass(x) :: asum => d_oacc_asum procedure, pass(x) :: absval1 => d_oacc_absval1 - procedure, pass(x) :: absval2 => d_oacc_absval2 +!!$ procedure, pass(x) :: absval2 => d_oacc_absval2 end type psb_d_vect_oacc @@ -78,729 +78,727 @@ contains call x%set_dev() end subroutine d_oacc_absval1 - subroutine d_oacc_absval2(x, y) - implicit none - class(psb_d_vect_oacc), intent(inout) :: x - class(psb_d_base_vect_type), intent(inout) :: y - integer(psb_ipk_) :: n - integer(psb_ipk_) :: i - - n = min(size(x%v), size(y%v)) - select type (yy => y) - class is (psb_d_vect_oacc) - if (x%is_host()) call x%sync() - if (yy%is_host()) call yy%sync() - !$acc parallel loop - do i = 1, n - yy%v(i) = abs(x%v(i)) - end do - class default - if (x%is_dev()) call x%sync() - if (y%is_dev()) call y%sync() - call x%psb_d_base_vect_type%absval(y) - end select - end subroutine d_oacc_absval2 - - - - subroutine d_oacc_scal(alpha, x) - implicit none - class(psb_d_vect_oacc), intent(inout) :: x - real(psb_dpk_), intent(in) :: alpha - integer(psb_ipk_) :: info - integer(psb_ipk_) :: i - - if (x%is_host()) call x%sync_space() - !$acc parallel loop - do i = 1, size(x%v) - x%v(i) = alpha * x%v(i) - end do - call x%set_dev() - end subroutine d_oacc_scal - - function d_oacc_nrm2(n, x) result(res) - implicit none - class(psb_d_vect_oacc), intent(inout) :: x - integer(psb_ipk_), intent(in) :: n - real(psb_dpk_) :: res - integer(psb_ipk_) :: info - real(psb_dpk_) :: sum - integer(psb_ipk_) :: i - - if (x%is_host()) call x%sync_space() - sum = 0.0 - !$acc parallel loop reduction(+:sum) - do i = 1, n - sum = sum + x%v(i) * x%v(i) - end do - res = sqrt(sum) - end function d_oacc_nrm2 - - function d_oacc_amax(n, x) result(res) - implicit none - class(psb_d_vect_oacc), intent(inout) :: x - integer(psb_ipk_), intent(in) :: n - real(psb_dpk_) :: res - integer(psb_ipk_) :: info - real(psb_dpk_) :: max_val - integer(psb_ipk_) :: i - - if (x%is_host()) call x%sync_space() - max_val = -huge(0.0) - !$acc parallel loop reduction(max:max_val) - do i = 1, n - if (x%v(i) > max_val) max_val = x%v(i) - end do - res = max_val - end function d_oacc_amax - - function d_oacc_asum(n, x) result(res) - implicit none - class(psb_d_vect_oacc), intent(inout) :: x - integer(psb_ipk_), intent(in) :: n - real(psb_dpk_) :: res - integer(psb_ipk_) :: info - real(psb_dpk_) :: sum - integer(psb_ipk_) :: i - - if (x%is_host()) call x%sync_space() - sum = 0.0 - !$acc parallel loop reduction(+:sum) - do i = 1, n - sum = sum + abs(x%v(i)) - end do - res = sum - end function d_oacc_asum - - - subroutine d_oacc_mlt_v(x, y, info) - use psi_serial_mod - implicit none - class(psb_d_base_vect_type), intent(inout) :: x - class(psb_d_vect_oacc), intent(inout) :: y - integer(psb_ipk_), intent(out) :: info - - integer(psb_ipk_) :: i, n - - info = 0 - n = min(x%get_nrows(), y%get_nrows()) - select type(xx => x) - type is (psb_d_base_vect_type) - if (y%is_dev()) call y%sync() - !$acc parallel loop - do i = 1, n - y%v(i) = y%v(i) * xx%v(i) - end do - call y%set_host() - class default - if (xx%is_dev()) call xx%sync() - if (y%is_dev()) call y%sync() - !$acc parallel loop - do i = 1, n - y%v(i) = y%v(i) * xx%v(i) - end do - call y%set_host() - end select - end subroutine d_oacc_mlt_v - - subroutine d_oacc_mlt_a(x, y, info) - use psi_serial_mod - implicit none - real(psb_dpk_), intent(in) :: x(:) - class(psb_d_vect_oacc), intent(inout) :: y - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: i, n - - info = 0 - if (y%is_dev()) call y%sync_space() - !$acc parallel loop - do i = 1, size(x) - y%v(i) = y%v(i) * x(i) - end do - call y%set_host() - end subroutine d_oacc_mlt_a - - subroutine d_oacc_mlt_a_2(alpha, x, y, beta, z, info) - use psi_serial_mod - implicit none - real(psb_dpk_), intent(in) :: alpha, beta - real(psb_dpk_), intent(in) :: x(:) - real(psb_dpk_), intent(in) :: y(:) - class(psb_d_vect_oacc), intent(inout) :: z - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: i, n - - info = 0 - if (z%is_dev()) call z%sync_space() - !$acc parallel loop - do i = 1, size(x) - z%v(i) = alpha * x(i) * y(i) + beta * z%v(i) - end do - call z%set_host() - end subroutine d_oacc_mlt_a_2 - - subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) - use psi_serial_mod - use psb_string_mod - implicit none - real(psb_dpk_), intent(in) :: alpha, beta - class(psb_d_base_vect_type), intent(inout) :: x - class(psb_d_base_vect_type), intent(inout) :: y - class(psb_d_vect_oacc), intent(inout) :: z - integer(psb_ipk_), intent(out) :: info - character(len=1), intent(in), optional :: conjgx, conjgy - integer(psb_ipk_) :: i, n - logical :: conjgx_, conjgy_ - - conjgx_ = .false. - conjgy_ = .false. - if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C') - if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C') - - n = min(x%get_nrows(), y%get_nrows(), z%get_nrows()) - - info = 0 - select type(xx => x) - class is (psb_d_vect_oacc) - select type (yy => y) - class is (psb_d_vect_oacc) - if (xx%is_host()) call xx%sync_space() - if (yy%is_host()) call yy%sync_space() - if ((beta /= dzero) .and. (z%is_host())) call z%sync_space() - !$acc parallel loop - do i = 1, n - z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) - end do - call z%set_dev() - class default - if (xx%is_dev()) call xx%sync_space() - if (yy%is_dev()) call yy%sync() - if ((beta /= dzero) .and. (z%is_dev())) call z%sync_space() - !$acc parallel loop - do i = 1, n - z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) - end do - call z%set_host() - end select - class default - if (x%is_dev()) call x%sync() - if (y%is_dev()) call y%sync() - if ((beta /= dzero) .and. (z%is_dev())) call z%sync_space() - !$acc parallel loop - do i = 1, n - z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i) - end do - call z%set_host() - end select - end subroutine d_oacc_mlt_v_2 - - - subroutine d_oacc_axpby_v(m, alpha, x, beta, y, info) - use psi_serial_mod - implicit none - integer(psb_ipk_), intent(in) :: m - class(psb_d_base_vect_type), intent(inout) :: x - class(psb_d_vect_oacc), intent(inout) :: y - real(psb_dpk_), intent(in) :: alpha, beta - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: nx, ny, i - - info = psb_success_ - - select type(xx => x) - type is (psb_d_vect_oacc) - if ((beta /= dzero) .and. y%is_host()) call y%sync_space() - if (xx%is_host()) call xx%sync_space() - nx = size(xx%v) - ny = size(y%v) - if ((nx < m) .or. (ny < m)) then - info = psb_err_internal_error_ - else - !$acc parallel loop - do i = 1, m - y%v(i) = alpha * xx%v(i) + beta * y%v(i) - end do - end if - call y%set_dev() - class default - if ((alpha /= dzero) .and. (x%is_dev())) call x%sync() - call y%axpby(m, alpha, x%v, beta, info) - end select - end subroutine d_oacc_axpby_v - - subroutine d_oacc_axpby_a(m, alpha, x, beta, y, info) - use psi_serial_mod - implicit none - integer(psb_ipk_), intent(in) :: m - real(psb_dpk_), intent(in) :: x(:) - class(psb_d_vect_oacc), intent(inout) :: y - real(psb_dpk_), intent(in) :: alpha, beta - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: i - - if ((beta /= dzero) .and. (y%is_dev())) call y%sync_space() - !$acc parallel loop - do i = 1, m - y%v(i) = alpha * x(i) + beta * y%v(i) - end do - call y%set_host() - end subroutine d_oacc_axpby_a - - subroutine d_oacc_abgdxyz(m, alpha, beta, gamma, delta, x, y, z, info) - use psi_serial_mod - implicit none - integer(psb_ipk_), intent(in) :: m - class(psb_d_base_vect_type), intent(inout) :: x - class(psb_d_base_vect_type), intent(inout) :: y - class(psb_d_vect_oacc), intent(inout) :: z - real(psb_dpk_), intent(in) :: alpha, beta, gamma, delta - integer(psb_ipk_), intent(out) :: info - integer(psb_ipk_) :: nx, ny, nz, i - logical :: gpu_done - - info = psb_success_ - gpu_done = .false. - - select type(xx => x) - class is (psb_d_vect_oacc) - select type(yy => y) - class is (psb_d_vect_oacc) - select type(zz => z) - class is (psb_d_vect_oacc) - if ((beta /= dzero) .and. yy%is_host()) call yy%sync_space() - if ((delta /= dzero) .and. zz%is_host()) call zz%sync_space() - if (xx%is_host()) call xx%sync_space() - nx = size(xx%v) - ny = size(yy%v) - nz = size(zz%v) - if ((nx < m) .or. (ny < m) .or. (nz < m)) then - info = psb_err_internal_error_ - else - !$acc parallel loop - do i = 1, m - yy%v(i) = alpha * xx%v(i) + beta * yy%v(i) - zz%v(i) = gamma * yy%v(i) + delta * zz%v(i) - end do - end if - call yy%set_dev() - call zz%set_dev() - gpu_done = .true. - end select - end select - end select - - if (.not. gpu_done) then - if (x%is_host()) call x%sync() - if (y%is_host()) call y%sync() - if (z%is_host()) call z%sync() - call y%axpby(m, alpha, x, beta, info) - call z%axpby(m, gamma, y, delta, info) - end if - end subroutine d_oacc_abgdxyz - - - subroutine d_oacc_sctb_buf(i, n, idx, beta, y) - use psb_base_mod - implicit none - integer(psb_ipk_) :: i, n - class(psb_i_base_vect_type) :: idx - real(psb_dpk_) :: beta - class(psb_d_vect_oacc) :: y - integer(psb_ipk_) :: info - - if (.not.allocated(y%combuf)) then - call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') - return - end if - - select type(ii => idx) - class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space(info) - if (y%is_host()) call y%sync_space() - - !$acc parallel loop - do i = 1, n - y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i) - end do - - class default - !$acc parallel loop - do i = 1, n - y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i) - end do - end select - end subroutine d_oacc_sctb_buf - - subroutine d_oacc_sctb_x(i, n, idx, x, beta, y) - use psb_base_mod - implicit none - integer(psb_ipk_):: i, n - class(psb_i_base_vect_type) :: idx - real(psb_dpk_) :: beta, x(:) - class(psb_d_vect_oacc) :: y - integer(psb_ipk_) :: info, ni - - select type(ii => idx) - class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space(info) - class default - call psb_errpush(info, 'd_oacc_sctb_x') - return - end select - - if (y%is_host()) call y%sync_space() - - !$acc parallel loop - do i = 1, n - y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i) - end do - - call y%set_dev() - end subroutine d_oacc_sctb_x - - - - subroutine d_oacc_sctb(n, idx, x, beta, y) - use psb_base_mod - implicit none - integer(psb_ipk_) :: n - integer(psb_ipk_) :: idx(:) - real(psb_dpk_) :: beta, x(:) - class(psb_d_vect_oacc) :: y - integer(psb_ipk_) :: info - integer(psb_ipk_) :: i - - if (n == 0) return - if (y%is_dev()) call y%sync_space() - - !$acc parallel loop - do i = 1, n - y%v(idx(i)) = beta * y%v(idx(i)) + x(i) - end do - - call y%set_host() - end subroutine d_oacc_sctb - - - subroutine d_oacc_gthzbuf(i, n, idx, x) - use psb_base_mod - implicit none - integer(psb_ipk_) :: i, n - class(psb_i_base_vect_type) :: idx - class(psb_d_vect_oacc) :: x - integer(psb_ipk_) :: info - - info = 0 - if (.not.allocated(x%combuf)) then - call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') - return - end if - - select type(ii => idx) - class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space(info) - class default - call psb_errpush(info, 'd_oacc_gthzbuf') - return - end select - - if (x%is_host()) call x%sync_space() - - !$acc parallel loop - do i = 1, n - x%combuf(i) = x%v(idx%v(i)) - end do - end subroutine d_oacc_gthzbuf - - subroutine d_oacc_gthzv_x(i, n, idx, x, y) - use psb_base_mod - implicit none - integer(psb_ipk_) :: i, n - class(psb_i_base_vect_type):: idx - real(psb_dpk_) :: y(:) - class(psb_d_vect_oacc):: x - integer(psb_ipk_) :: info - - info = 0 - - select type(ii => idx) - class is (psb_i_vect_oacc) - if (ii%is_host()) call ii%sync_space(info) - class default - call psb_errpush(info, 'd_oacc_gthzv_x') - return - end select - - if (x%is_host()) call x%sync_space() - - !$acc parallel loop - do i = 1, n - y(i) = x%v(idx%v(i)) - end do - end subroutine d_oacc_gthzv_x - - subroutine d_oacc_ins_v(n, irl, val, dupl, x, info) - use psi_serial_mod - implicit none - class(psb_d_vect_oacc), intent(inout) :: x - integer(psb_ipk_), intent(in) :: n, dupl - class(psb_i_base_vect_type), intent(inout) :: irl - class(psb_d_base_vect_type), intent(inout) :: val - integer(psb_ipk_), intent(out) :: info - - integer(psb_ipk_) :: i, isz - logical :: done_oacc - - info = 0 - if (psb_errstatus_fatal()) return - - done_oacc = .false. - select type(virl => irl) - type is (psb_i_vect_oacc) - select type(vval => val) - type is (psb_d_vect_oacc) - if (vval%is_host()) call vval%sync_space() - if (virl%is_host()) call virl%sync_space(info) - if (x%is_host()) call x%sync_space() - !$acc parallel loop - do i = 1, n - x%v(virl%v(i)) = vval%v(i) - end do - call x%set_dev() - done_oacc = .true. - end select - end select - - if (.not.done_oacc) then - select type(virl => irl) - type is (psb_i_vect_oacc) - if (virl%is_dev()) call virl%sync_space(info) - end select - select type(vval => val) - type is (psb_d_vect_oacc) - if (vval%is_dev()) call vval%sync_space() - end select - call x%ins(n, irl%v, val%v, dupl, info) - end if - - if (info /= 0) then - call psb_errpush(info, 'oacc_vect_ins') - return - end if - - end subroutine d_oacc_ins_v - - - - subroutine d_oacc_ins_a(n, irl, val, dupl, x, info) - use psi_serial_mod - implicit none - class(psb_d_vect_oacc), intent(inout) :: x - integer(psb_ipk_), intent(in) :: n, dupl - integer(psb_ipk_), intent(in) :: irl(:) - real(psb_dpk_), intent(in) :: val(:) - integer(psb_ipk_), intent(out) :: info - - integer(psb_ipk_) :: i - - info = 0 - if (x%is_dev()) call x%sync_space() - call x%psb_d_base_vect_type%ins(n, irl, val, dupl, info) - call x%set_host() - !$acc update device(x%v) - - end subroutine d_oacc_ins_a - - - - subroutine d_oacc_bld_mn(x, n) - use psb_base_mod - implicit none - integer(psb_mpk_), intent(in) :: n - class(psb_d_vect_oacc), intent(inout) :: x - integer(psb_ipk_) :: info - - call x%all(n, info) - if (info /= 0) then - call psb_errpush(info, 'd_oacc_bld_mn', i_err=(/n, n, n, n, n/)) - end if - call x%set_host() - !$acc update device(x%v) - - end subroutine d_oacc_bld_mn - - - subroutine d_oacc_bld_x(x, this) - use psb_base_mod - implicit none - real(psb_dpk_), intent(in) :: this(:) - class(psb_d_vect_oacc), intent(inout) :: x - integer(psb_ipk_) :: info - - call psb_realloc(size(this), x%v, info) - if (info /= 0) then - info = psb_err_alloc_request_ - call psb_errpush(info, 'd_oacc_bld_x', & - i_err=(/size(this), izero, izero, izero, izero/)) - return - end if - - x%v(:) = this(:) - call x%set_host() - !$acc update device(x%v) - - end subroutine d_oacc_bld_x - - - subroutine d_oacc_asb_m(n, x, info) - use psb_base_mod - implicit none - integer(psb_mpk_), intent(in) :: n - class(psb_d_vect_oacc), intent(inout) :: x - integer(psb_ipk_), intent(out) :: info - integer(psb_mpk_) :: nd - - info = psb_success_ - - if (x%is_dev()) then - nd = size(x%v) - if (nd < n) then - call x%sync() - call x%psb_d_base_vect_type%asb(n, info) - if (info == psb_success_) call x%sync_space() - call x%set_host() - end if - else - if (size(x%v) < n) then - call x%psb_d_base_vect_type%asb(n, info) - if (info == psb_success_) call x%sync_space() - call x%set_host() - end if - end if - end subroutine d_oacc_asb_m - - - - subroutine d_oacc_set_scal(x, val, first, last) - class(psb_d_vect_oacc), intent(inout) :: x - real(psb_dpk_), intent(in) :: val - integer(psb_ipk_), optional :: first, last - - integer(psb_ipk_) :: first_, last_ - first_ = 1 - last_ = x%get_nrows() - if (present(first)) first_ = max(1, first) - if (present(last)) last_ = min(last, last_) - - !$acc parallel loop - do i = first_, last_ - x%v(i) = val - end do - !$acc end parallel loop - - call x%set_dev() - end subroutine d_oacc_set_scal - - - - subroutine d_oacc_zero(x) - use psi_serial_mod - implicit none - class(psb_d_vect_oacc), intent(inout) :: x - call x%set_dev() - call x%set_scal(dzero) - end subroutine d_oacc_zero - - function d_oacc_get_nrows(x) result(res) - implicit none - class(psb_d_vect_oacc), intent(in) :: x - integer(psb_ipk_) :: res - - if (allocated(x%v)) res = size(x%v) - end function d_oacc_get_nrows - - function d_oacc_get_fmt() result(res) - implicit none - character(len=5) :: res - res = "dOACC" - - end function d_oacc_get_fmt - - function d_oacc_vect_dot(n, x, y) result(res) - implicit none - class(psb_d_vect_oacc), intent(inout) :: x - class(psb_d_base_vect_type), intent(inout) :: y - integer(psb_ipk_), intent(in) :: n - real(psb_dpk_) :: res - real(psb_dpk_), external :: ddot - integer(psb_ipk_) :: info - integer(psb_ipk_) :: i - - res = dzero - - select type(yy => y) - type is (psb_d_base_vect_type) - if (x%is_dev()) call x%sync() - res = ddot(n, x%v, 1, yy%v, 1) - type is (psb_d_vect_oacc) - if (x%is_host()) call x%sync() - if (yy%is_host()) call yy%sync() - - !$acc parallel loop reduction(+:res) present(x%v, yy%v) - do i = 1, n - res = res + x%v(i) * yy%v(i) - end do - !$acc end parallel loop - - class default - call x%sync() - res = y%dot(n, x%v) - end select - - end function d_oacc_vect_dot - - - - - function d_oacc_dot_a(n, x, y) result(res) - implicit none - class(psb_d_vect_oacc), intent(inout) :: x - real(psb_dpk_), intent(in) :: y(:) - integer(psb_ipk_), intent(in) :: n - real(psb_dpk_) :: res - real(psb_dpk_), external :: ddot - - if (x%is_dev()) call x%sync() - res = ddot(n, y, 1, x%v, 1) - - end function d_oacc_dot_a - - ! subroutine d_oacc_set_vect(x,y) - ! implicit none - ! class(psb_d_vect_oacc), intent(inout) :: x - ! real(psb_dpk_), intent(in) :: y(:) - ! integer(psb_ipk_) :: info - - ! if (size(x%v) /= size(y)) then - ! call x%free(info) - ! call x%all(size(y),info) - ! end if - ! x%v(:) = y(:) - ! call x%set_host() - ! end subroutine d_oacc_set_vect - - subroutine d_oacc_to_dev(v) - implicit none - real(psb_dpk_) :: v(:) - !$acc update device(v) - end subroutine d_oacc_to_dev - - subroutine d_oacc_to_host(v) - implicit none - real(psb_dpk_) :: v(:) - !$acc update self(v) - end subroutine d_oacc_to_host - +!!$ subroutine d_oacc_absval2(x, y) +!!$ implicit none +!!$ class(psb_d_vect_oacc), intent(inout) :: x +!!$ class(psb_d_base_vect_type), intent(inout) :: y +!!$ integer(psb_ipk_) :: n +!!$ integer(psb_ipk_) :: i +!!$ +!!$ n = min(size(x%v), size(y%v)) +!!$ select type (yy => y) +!!$ class is (psb_d_vect_oacc) +!!$ if (x%is_host()) call x%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ yy%v(i) = abs(x%v(i)) +!!$ end do +!!$ class default +!!$ if (x%is_dev()) call x%sync() +!!$ if (y%is_dev()) call y%sync() +!!$ call x%psb_d_base_vect_type%absval(y) +!!$ end select +!!$ end subroutine d_oacc_absval2 +!!$ +!!$ subroutine d_oacc_scal(alpha, x) +!!$ implicit none +!!$ class(psb_d_vect_oacc), intent(inout) :: x +!!$ real(psb_dpk_), intent(in) :: alpha +!!$ integer(psb_ipk_) :: info +!!$ integer(psb_ipk_) :: i +!!$ +!!$ if (x%is_host()) call x%sync_space() +!!$ !$acc parallel loop +!!$ do i = 1, size(x%v) +!!$ x%v(i) = alpha * x%v(i) +!!$ end do +!!$ call x%set_dev() +!!$ end subroutine d_oacc_scal +!!$ +!!$ function d_oacc_nrm2(n, x) result(res) +!!$ implicit none +!!$ class(psb_d_vect_oacc), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_dpk_) :: res +!!$ integer(psb_ipk_) :: info +!!$ real(psb_dpk_) :: sum +!!$ integer(psb_ipk_) :: i +!!$ +!!$ if (x%is_host()) call x%sync_space() +!!$ sum = 0.0 +!!$ !$acc parallel loop reduction(+:sum) +!!$ do i = 1, n +!!$ sum = sum + x%v(i) * x%v(i) +!!$ end do +!!$ res = sqrt(sum) +!!$ end function d_oacc_nrm2 +!!$ +!!$ function d_oacc_amax(n, x) result(res) +!!$ implicit none +!!$ class(psb_d_vect_oacc), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_dpk_) :: res +!!$ integer(psb_ipk_) :: info +!!$ real(psb_dpk_) :: max_val +!!$ integer(psb_ipk_) :: i +!!$ +!!$ if (x%is_host()) call x%sync_space() +!!$ max_val = -huge(0.0) +!!$ !$acc parallel loop reduction(max:max_val) +!!$ do i = 1, n +!!$ if (x%v(i) > max_val) max_val = x%v(i) +!!$ end do +!!$ res = max_val +!!$ end function d_oacc_amax +!!$ +!!$ function d_oacc_asum(n, x) result(res) +!!$ implicit none +!!$ class(psb_d_vect_oacc), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_dpk_) :: res +!!$ integer(psb_ipk_) :: info +!!$ real(psb_dpk_) :: sum +!!$ integer(psb_ipk_) :: i +!!$ +!!$ if (x%is_host()) call x%sync_space() +!!$ sum = 0.0 +!!$ !$acc parallel loop reduction(+:sum) +!!$ do i = 1, n +!!$ sum = sum + abs(x%v(i)) +!!$ end do +!!$ res = sum +!!$ end function d_oacc_asum +!!$ +!!$ +!!$ subroutine d_oacc_mlt_v(x, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ class(psb_d_base_vect_type), intent(inout) :: x +!!$ class(psb_d_vect_oacc), intent(inout) :: y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ n = min(x%get_nrows(), y%get_nrows()) +!!$ select type(xx => x) +!!$ type is (psb_d_base_vect_type) +!!$ if (y%is_dev()) call y%sync() +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ y%v(i) = y%v(i) * xx%v(i) +!!$ end do +!!$ call y%set_host() +!!$ class default +!!$ if (xx%is_dev()) call xx%sync() +!!$ if (y%is_dev()) call y%sync() +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ y%v(i) = y%v(i) * xx%v(i) +!!$ end do +!!$ call y%set_host() +!!$ end select +!!$ end subroutine d_oacc_mlt_v +!!$ +!!$ subroutine d_oacc_mlt_a(x, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ real(psb_dpk_), intent(in) :: x(:) +!!$ class(psb_d_vect_oacc), intent(inout) :: y +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ if (y%is_dev()) call y%sync_space() +!!$ !$acc parallel loop +!!$ do i = 1, size(x) +!!$ y%v(i) = y%v(i) * x(i) +!!$ end do +!!$ call y%set_host() +!!$ end subroutine d_oacc_mlt_a +!!$ +!!$ subroutine d_oacc_mlt_a_2(alpha, x, y, beta, z, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ real(psb_dpk_), intent(in) :: alpha, beta +!!$ real(psb_dpk_), intent(in) :: x(:) +!!$ real(psb_dpk_), intent(in) :: y(:) +!!$ class(psb_d_vect_oacc), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i, n +!!$ +!!$ info = 0 +!!$ if (z%is_dev()) call z%sync_space() +!!$ !$acc parallel loop +!!$ do i = 1, size(x) +!!$ z%v(i) = alpha * x(i) * y(i) + beta * z%v(i) +!!$ end do +!!$ call z%set_host() +!!$ end subroutine d_oacc_mlt_a_2 +!!$ +!!$ subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy) +!!$ use psi_serial_mod +!!$ use psb_string_mod +!!$ implicit none +!!$ real(psb_dpk_), intent(in) :: alpha, beta +!!$ class(psb_d_base_vect_type), intent(inout) :: x +!!$ class(psb_d_base_vect_type), intent(inout) :: y +!!$ class(psb_d_vect_oacc), intent(inout) :: z +!!$ integer(psb_ipk_), intent(out) :: info +!!$ character(len=1), intent(in), optional :: conjgx, conjgy +!!$ integer(psb_ipk_) :: i, n +!!$ logical :: conjgx_, conjgy_ +!!$ +!!$ conjgx_ = .false. +!!$ conjgy_ = .false. +!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C') +!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C') +!!$ +!!$ n = min(x%get_nrows(), y%get_nrows(), z%get_nrows()) +!!$ +!!$ info = 0 +!!$ select type(xx => x) +!!$ class is (psb_d_vect_oacc) +!!$ select type (yy => y) +!!$ class is (psb_d_vect_oacc) +!!$ if (xx%is_host()) call xx%sync_space() +!!$ if (yy%is_host()) call yy%sync_space() +!!$ if ((beta /= dzero) .and. (z%is_host())) call z%sync_space() +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) +!!$ end do +!!$ call z%set_dev() +!!$ class default +!!$ if (xx%is_dev()) call xx%sync_space() +!!$ if (yy%is_dev()) call yy%sync() +!!$ if ((beta /= dzero) .and. (z%is_dev())) call z%sync_space() +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i) +!!$ end do +!!$ call z%set_host() +!!$ end select +!!$ class default +!!$ if (x%is_dev()) call x%sync() +!!$ if (y%is_dev()) call y%sync() +!!$ if ((beta /= dzero) .and. (z%is_dev())) call z%sync_space() +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i) +!!$ end do +!!$ call z%set_host() +!!$ end select +!!$ end subroutine d_oacc_mlt_v_2 +!!$ +!!$ +!!$ subroutine d_oacc_axpby_v(m, alpha, x, beta, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ integer(psb_ipk_), intent(in) :: m +!!$ class(psb_d_base_vect_type), intent(inout) :: x +!!$ class(psb_d_vect_oacc), intent(inout) :: y +!!$ real(psb_dpk_), intent(in) :: alpha, beta +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: nx, ny, i +!!$ +!!$ info = psb_success_ +!!$ +!!$ select type(xx => x) +!!$ type is (psb_d_vect_oacc) +!!$ if ((beta /= dzero) .and. y%is_host()) call y%sync_space() +!!$ if (xx%is_host()) call xx%sync_space() +!!$ nx = size(xx%v) +!!$ ny = size(y%v) +!!$ if ((nx < m) .or. (ny < m)) then +!!$ info = psb_err_internal_error_ +!!$ else +!!$ !$acc parallel loop +!!$ do i = 1, m +!!$ y%v(i) = alpha * xx%v(i) + beta * y%v(i) +!!$ end do +!!$ end if +!!$ call y%set_dev() +!!$ class default +!!$ if ((alpha /= dzero) .and. (x%is_dev())) call x%sync() +!!$ call y%axpby(m, alpha, x%v, beta, info) +!!$ end select +!!$ end subroutine d_oacc_axpby_v +!!$ +!!$ subroutine d_oacc_axpby_a(m, alpha, x, beta, y, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ integer(psb_ipk_), intent(in) :: m +!!$ real(psb_dpk_), intent(in) :: x(:) +!!$ class(psb_d_vect_oacc), intent(inout) :: y +!!$ real(psb_dpk_), intent(in) :: alpha, beta +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: i +!!$ +!!$ if ((beta /= dzero) .and. (y%is_dev())) call y%sync_space() +!!$ !$acc parallel loop +!!$ do i = 1, m +!!$ y%v(i) = alpha * x(i) + beta * y%v(i) +!!$ end do +!!$ call y%set_host() +!!$ end subroutine d_oacc_axpby_a +!!$ +!!$ subroutine d_oacc_abgdxyz(m, alpha, beta, gamma, delta, x, y, z, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ integer(psb_ipk_), intent(in) :: m +!!$ class(psb_d_base_vect_type), intent(inout) :: x +!!$ class(psb_d_base_vect_type), intent(inout) :: y +!!$ class(psb_d_vect_oacc), intent(inout) :: z +!!$ real(psb_dpk_), intent(in) :: alpha, beta, gamma, delta +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_ipk_) :: nx, ny, nz, i +!!$ logical :: gpu_done +!!$ +!!$ info = psb_success_ +!!$ gpu_done = .false. +!!$ +!!$ select type(xx => x) +!!$ class is (psb_d_vect_oacc) +!!$ select type(yy => y) +!!$ class is (psb_d_vect_oacc) +!!$ select type(zz => z) +!!$ class is (psb_d_vect_oacc) +!!$ if ((beta /= dzero) .and. yy%is_host()) call yy%sync_space() +!!$ if ((delta /= dzero) .and. zz%is_host()) call zz%sync_space() +!!$ if (xx%is_host()) call xx%sync_space() +!!$ nx = size(xx%v) +!!$ ny = size(yy%v) +!!$ nz = size(zz%v) +!!$ if ((nx < m) .or. (ny < m) .or. (nz < m)) then +!!$ info = psb_err_internal_error_ +!!$ else +!!$ !$acc parallel loop +!!$ do i = 1, m +!!$ yy%v(i) = alpha * xx%v(i) + beta * yy%v(i) +!!$ zz%v(i) = gamma * yy%v(i) + delta * zz%v(i) +!!$ end do +!!$ end if +!!$ call yy%set_dev() +!!$ call zz%set_dev() +!!$ gpu_done = .true. +!!$ end select +!!$ end select +!!$ end select +!!$ +!!$ if (.not. gpu_done) then +!!$ if (x%is_host()) call x%sync() +!!$ if (y%is_host()) call y%sync() +!!$ if (z%is_host()) call z%sync() +!!$ call y%axpby(m, alpha, x, beta, info) +!!$ call z%axpby(m, gamma, y, delta, info) +!!$ end if +!!$ end subroutine d_oacc_abgdxyz +!!$ +!!$ +!!$ subroutine d_oacc_sctb_buf(i, n, idx, beta, y) +!!$ use psb_base_mod +!!$ implicit none +!!$ integer(psb_ipk_) :: i, n +!!$ class(psb_i_base_vect_type) :: idx +!!$ real(psb_dpk_) :: beta +!!$ class(psb_d_vect_oacc) :: y +!!$ integer(psb_ipk_) :: info +!!$ +!!$ if (.not.allocated(y%combuf)) then +!!$ call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf') +!!$ return +!!$ end if +!!$ +!!$ select type(ii => idx) +!!$ class is (psb_i_vect_oacc) +!!$ if (ii%is_host()) call ii%sync_space(info) +!!$ if (y%is_host()) call y%sync_space() +!!$ +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i) +!!$ end do +!!$ +!!$ class default +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i) +!!$ end do +!!$ end select +!!$ end subroutine d_oacc_sctb_buf +!!$ +!!$ subroutine d_oacc_sctb_x(i, n, idx, x, beta, y) +!!$ use psb_base_mod +!!$ implicit none +!!$ integer(psb_ipk_):: i, n +!!$ class(psb_i_base_vect_type) :: idx +!!$ real(psb_dpk_) :: beta, x(:) +!!$ class(psb_d_vect_oacc) :: y +!!$ integer(psb_ipk_) :: info, ni +!!$ +!!$ select type(ii => idx) +!!$ class is (psb_i_vect_oacc) +!!$ if (ii%is_host()) call ii%sync_space(info) +!!$ class default +!!$ call psb_errpush(info, 'd_oacc_sctb_x') +!!$ return +!!$ end select +!!$ +!!$ if (y%is_host()) call y%sync_space() +!!$ +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i) +!!$ end do +!!$ +!!$ call y%set_dev() +!!$ end subroutine d_oacc_sctb_x +!!$ +!!$ +!!$ +!!$ subroutine d_oacc_sctb(n, idx, x, beta, y) +!!$ use psb_base_mod +!!$ implicit none +!!$ integer(psb_ipk_) :: n +!!$ integer(psb_ipk_) :: idx(:) +!!$ real(psb_dpk_) :: beta, x(:) +!!$ class(psb_d_vect_oacc) :: y +!!$ integer(psb_ipk_) :: info +!!$ integer(psb_ipk_) :: i +!!$ +!!$ if (n == 0) return +!!$ if (y%is_dev()) call y%sync_space() +!!$ +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ y%v(idx(i)) = beta * y%v(idx(i)) + x(i) +!!$ end do +!!$ +!!$ call y%set_host() +!!$ end subroutine d_oacc_sctb +!!$ +!!$ +!!$ subroutine d_oacc_gthzbuf(i, n, idx, x) +!!$ use psb_base_mod +!!$ implicit none +!!$ integer(psb_ipk_) :: i, n +!!$ class(psb_i_base_vect_type) :: idx +!!$ class(psb_d_vect_oacc) :: x +!!$ integer(psb_ipk_) :: info +!!$ +!!$ info = 0 +!!$ if (.not.allocated(x%combuf)) then +!!$ call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf') +!!$ return +!!$ end if +!!$ +!!$ select type(ii => idx) +!!$ class is (psb_i_vect_oacc) +!!$ if (ii%is_host()) call ii%sync_space(info) +!!$ class default +!!$ call psb_errpush(info, 'd_oacc_gthzbuf') +!!$ return +!!$ end select +!!$ +!!$ if (x%is_host()) call x%sync_space() +!!$ +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ x%combuf(i) = x%v(idx%v(i)) +!!$ end do +!!$ end subroutine d_oacc_gthzbuf +!!$ +!!$ subroutine d_oacc_gthzv_x(i, n, idx, x, y) +!!$ use psb_base_mod +!!$ implicit none +!!$ integer(psb_ipk_) :: i, n +!!$ class(psb_i_base_vect_type):: idx +!!$ real(psb_dpk_) :: y(:) +!!$ class(psb_d_vect_oacc):: x +!!$ integer(psb_ipk_) :: info +!!$ +!!$ info = 0 +!!$ +!!$ select type(ii => idx) +!!$ class is (psb_i_vect_oacc) +!!$ if (ii%is_host()) call ii%sync_space(info) +!!$ class default +!!$ call psb_errpush(info, 'd_oacc_gthzv_x') +!!$ return +!!$ end select +!!$ +!!$ if (x%is_host()) call x%sync_space() +!!$ +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ y(i) = x%v(idx%v(i)) +!!$ end do +!!$ end subroutine d_oacc_gthzv_x +!!$ +!!$ subroutine d_oacc_ins_v(n, irl, val, dupl, x, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ class(psb_d_vect_oacc), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n, dupl +!!$ class(psb_i_base_vect_type), intent(inout) :: irl +!!$ class(psb_d_base_vect_type), intent(inout) :: val +!!$ integer(psb_ipk_), intent(out) :: info +!!$ +!!$ integer(psb_ipk_) :: i, isz +!!$ logical :: done_oacc +!!$ +!!$ info = 0 +!!$ if (psb_errstatus_fatal()) return +!!$ +!!$ done_oacc = .false. +!!$ select type(virl => irl) +!!$ type is (psb_i_vect_oacc) +!!$ select type(vval => val) +!!$ type is (psb_d_vect_oacc) +!!$ if (vval%is_host()) call vval%sync_space() +!!$ if (virl%is_host()) call virl%sync_space(info) +!!$ if (x%is_host()) call x%sync_space() +!!$ !$acc parallel loop +!!$ do i = 1, n +!!$ x%v(virl%v(i)) = vval%v(i) +!!$ end do +!!$ call x%set_dev() +!!$ done_oacc = .true. +!!$ end select +!!$ end select +!!$ +!!$ if (.not.done_oacc) then +!!$ select type(virl => irl) +!!$ type is (psb_i_vect_oacc) +!!$ if (virl%is_dev()) call virl%sync_space(info) +!!$ end select +!!$ select type(vval => val) +!!$ type is (psb_d_vect_oacc) +!!$ if (vval%is_dev()) call vval%sync_space() +!!$ end select +!!$ call x%ins(n, irl%v, val%v, dupl, info) +!!$ end if +!!$ +!!$ if (info /= 0) then +!!$ call psb_errpush(info, 'oacc_vect_ins') +!!$ return +!!$ end if +!!$ +!!$ end subroutine d_oacc_ins_v +!!$ +!!$ +!!$ +!!$ subroutine d_oacc_ins_a(n, irl, val, dupl, x, info) +!!$ use psi_serial_mod +!!$ implicit none +!!$ class(psb_d_vect_oacc), intent(inout) :: x +!!$ integer(psb_ipk_), intent(in) :: n, dupl +!!$ integer(psb_ipk_), intent(in) :: irl(:) +!!$ real(psb_dpk_), intent(in) :: val(:) +!!$ integer(psb_ipk_), intent(out) :: info +!!$ +!!$ integer(psb_ipk_) :: i +!!$ +!!$ info = 0 +!!$ if (x%is_dev()) call x%sync_space() +!!$ call x%psb_d_base_vect_type%ins(n, irl, val, dupl, info) +!!$ call x%set_host() +!!$ !$acc update device(x%v) +!!$ +!!$ end subroutine d_oacc_ins_a +!!$ +!!$ +!!$ +!!$ subroutine d_oacc_bld_mn(x, n) +!!$ use psb_base_mod +!!$ implicit none +!!$ integer(psb_mpk_), intent(in) :: n +!!$ class(psb_d_vect_oacc), intent(inout) :: x +!!$ integer(psb_ipk_) :: info +!!$ +!!$ call x%all(n, info) +!!$ if (info /= 0) then +!!$ call psb_errpush(info, 'd_oacc_bld_mn', i_err=(/n, n, n, n, n/)) +!!$ end if +!!$ call x%set_host() +!!$ !$acc update device(x%v) +!!$ +!!$ end subroutine d_oacc_bld_mn +!!$ +!!$ +!!$ subroutine d_oacc_bld_x(x, this) +!!$ use psb_base_mod +!!$ implicit none +!!$ real(psb_dpk_), intent(in) :: this(:) +!!$ class(psb_d_vect_oacc), intent(inout) :: x +!!$ integer(psb_ipk_) :: info +!!$ +!!$ call psb_realloc(size(this), x%v, info) +!!$ if (info /= 0) then +!!$ info = psb_err_alloc_request_ +!!$ call psb_errpush(info, 'd_oacc_bld_x', & +!!$ i_err=(/size(this), izero, izero, izero, izero/)) +!!$ return +!!$ end if +!!$ +!!$ x%v(:) = this(:) +!!$ call x%set_host() +!!$ !$acc update device(x%v) +!!$ +!!$ end subroutine d_oacc_bld_x +!!$ +!!$ +!!$ subroutine d_oacc_asb_m(n, x, info) +!!$ use psb_base_mod +!!$ implicit none +!!$ integer(psb_mpk_), intent(in) :: n +!!$ class(psb_d_vect_oacc), intent(inout) :: x +!!$ integer(psb_ipk_), intent(out) :: info +!!$ integer(psb_mpk_) :: nd +!!$ +!!$ info = psb_success_ +!!$ +!!$ if (x%is_dev()) then +!!$ nd = size(x%v) +!!$ if (nd < n) then +!!$ call x%sync() +!!$ call x%psb_d_base_vect_type%asb(n, info) +!!$ if (info == psb_success_) call x%sync_space() +!!$ call x%set_host() +!!$ end if +!!$ else +!!$ if (size(x%v) < n) then +!!$ call x%psb_d_base_vect_type%asb(n, info) +!!$ if (info == psb_success_) call x%sync_space() +!!$ call x%set_host() +!!$ end if +!!$ end if +!!$ end subroutine d_oacc_asb_m +!!$ +!!$ +!!$ +!!$ subroutine d_oacc_set_scal(x, val, first, last) +!!$ class(psb_d_vect_oacc), intent(inout) :: x +!!$ real(psb_dpk_), intent(in) :: val +!!$ integer(psb_ipk_), optional :: first, last +!!$ +!!$ integer(psb_ipk_) :: first_, last_ +!!$ first_ = 1 +!!$ last_ = x%get_nrows() +!!$ if (present(first)) first_ = max(1, first) +!!$ if (present(last)) last_ = min(last, last_) +!!$ +!!$ !$acc parallel loop +!!$ do i = first_, last_ +!!$ x%v(i) = val +!!$ end do +!!$ !$acc end parallel loop +!!$ +!!$ call x%set_dev() +!!$ end subroutine d_oacc_set_scal +!!$ +!!$ +!!$ +!!$ subroutine d_oacc_zero(x) +!!$ use psi_serial_mod +!!$ implicit none +!!$ class(psb_d_vect_oacc), intent(inout) :: x +!!$ call x%set_dev() +!!$ call x%set_scal(dzero) +!!$ end subroutine d_oacc_zero +!!$ +!!$ function d_oacc_get_nrows(x) result(res) +!!$ implicit none +!!$ class(psb_d_vect_oacc), intent(in) :: x +!!$ integer(psb_ipk_) :: res +!!$ +!!$ if (allocated(x%v)) res = size(x%v) +!!$ end function d_oacc_get_nrows +!!$ +!!$ function d_oacc_get_fmt() result(res) +!!$ implicit none +!!$ character(len=5) :: res +!!$ res = "dOACC" +!!$ +!!$ end function d_oacc_get_fmt +!!$ +!!$ function d_oacc_vect_dot(n, x, y) result(res) +!!$ implicit none +!!$ class(psb_d_vect_oacc), intent(inout) :: x +!!$ class(psb_d_base_vect_type), intent(inout) :: y +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_dpk_) :: res +!!$ real(psb_dpk_), external :: ddot +!!$ integer(psb_ipk_) :: info +!!$ integer(psb_ipk_) :: i +!!$ +!!$ res = dzero +!!$ +!!$ select type(yy => y) +!!$ type is (psb_d_base_vect_type) +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n, x%v, 1, yy%v, 1) +!!$ type is (psb_d_vect_oacc) +!!$ if (x%is_host()) call x%sync() +!!$ if (yy%is_host()) call yy%sync() +!!$ +!!$ !$acc parallel loop reduction(+:res) present(x%v, yy%v) +!!$ do i = 1, n +!!$ res = res + x%v(i) * yy%v(i) +!!$ end do +!!$ !$acc end parallel loop +!!$ +!!$ class default +!!$ call x%sync() +!!$ res = y%dot(n, x%v) +!!$ end select +!!$ +!!$ end function d_oacc_vect_dot +!!$ +!!$ +!!$ +!!$ +!!$ function d_oacc_dot_a(n, x, y) result(res) +!!$ implicit none +!!$ class(psb_d_vect_oacc), intent(inout) :: x +!!$ real(psb_dpk_), intent(in) :: y(:) +!!$ integer(psb_ipk_), intent(in) :: n +!!$ real(psb_dpk_) :: res +!!$ real(psb_dpk_), external :: ddot +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = ddot(n, y, 1, x%v, 1) +!!$ +!!$ end function d_oacc_dot_a +!!$ +!!$ ! subroutine d_oacc_set_vect(x,y) +!!$ ! implicit none +!!$ ! class(psb_d_vect_oacc), intent(inout) :: x +!!$ ! real(psb_dpk_), intent(in) :: y(:) +!!$ ! integer(psb_ipk_) :: info +!!$ +!!$ ! if (size(x%v) /= size(y)) then +!!$ ! call x%free(info) +!!$ ! call x%all(size(y),info) +!!$ ! end if +!!$ ! x%v(:) = y(:) +!!$ ! call x%set_host() +!!$ ! end subroutine d_oacc_set_vect +!!$ +!!$ subroutine d_oacc_to_dev(v) +!!$ implicit none +!!$ real(psb_dpk_) :: v(:) +!!$ !$acc update device(v) +!!$ end subroutine d_oacc_to_dev +!!$ +!!$ subroutine d_oacc_to_host(v) +!!$ implicit none +!!$ real(psb_dpk_) :: v(:) +!!$ !$acc update self(v) +!!$ end subroutine d_oacc_to_host +!!$ subroutine d_oacc_sync_space(x) implicit none class(psb_d_vect_oacc), intent(inout) :: x @@ -871,96 +869,96 @@ contains res = (x%state == is_sync) end function d_oacc_is_sync - - subroutine d_oacc_vect_all(n, x, info) - use psi_serial_mod - use psb_realloc_mod - implicit none - integer(psb_ipk_), intent(in) :: n - class(psb_d_vect_oacc), intent(out) :: x - integer(psb_ipk_), intent(out) :: info - - call psb_realloc(n, x%v, info) - if (info == 0) then - call x%set_host() - !$acc enter data create(x%v) - call x%sync_space() - end if - if (info /= 0) then - info = psb_err_alloc_request_ - call psb_errpush(info, 'd_oacc_all', & - i_err=(/n, n, n, n, n/)) - end if - end subroutine d_oacc_vect_all - - - subroutine d_oacc_vect_free(x, info) - implicit none - class(psb_d_vect_oacc), intent(inout) :: x - integer(psb_ipk_), intent(out) :: info - info = 0 - if (allocated(x%v)) then - !$acc exit data delete(x%v) finalize - deallocate(x%v, stat=info) - end if - - end subroutine d_oacc_vect_free - - function oacc_get_size(x) result(res) - implicit none - class(psb_d_vect_oacc), intent(inout) :: x - integer(psb_ipk_) :: res - - if (x%is_dev()) call x%sync() - res = size(x%v) - end function oacc_get_size - - - subroutine initialize(N) - integer(psb_ipk_) :: N - integer(psb_ipk_) :: i - allocate(v1(N),v2(N),p(N)) - !$acc enter data create(v1,v2,p) - !$acc parallel - !$acc loop - do i=1,n - v1(i) = i - v2(i) = n+i - end do - !$acc end parallel - end subroutine initialize - subroutine finalize_dev() - !$acc exit data delete(v1,v2,p) - end subroutine finalize_dev - subroutine finalize_host() - deallocate(v1,v2,p) - end subroutine finalize_host - subroutine to_dev() - !$acc update device(v1,v2) - end subroutine to_dev - subroutine to_host() - !$acc update self(v1,v2) - end subroutine to_host - function d_dot(N) result(res) - real(kind(1.d0)) :: res - integer(psb_ipk_) :: i,N - real(kind(1.d0)) :: t1,t2,t3 - res = 0.0d0 - !$acc parallel - !$acc loop reduction(+:res) - do i=1,N - res = res + v1(i) * v2(i) - end do - !$acc end parallel - - end function d_dot - function h_dot(N) result(res) - integer(psb_ipk_) :: i,N - real(kind(1.d0)) :: t1,t2,t3,res - res = 0.0d0 - do i=1,N - res = res + v1(i) * v2(i) - end do - end function h_dot - -end module psb_d_oacc_vect_mod \ No newline at end of file +!!$ +!!$ subroutine d_oacc_vect_all(n, x, info) +!!$ use psi_serial_mod +!!$ use psb_realloc_mod +!!$ implicit none +!!$ integer(psb_ipk_), intent(in) :: n +!!$ class(psb_d_vect_oacc), intent(out) :: x +!!$ integer(psb_ipk_), intent(out) :: info +!!$ +!!$ call psb_realloc(n, x%v, info) +!!$ if (info == 0) then +!!$ call x%set_host() +!!$ !$acc enter data create(x%v) +!!$ call x%sync_space() +!!$ end if +!!$ if (info /= 0) then +!!$ info = psb_err_alloc_request_ +!!$ call psb_errpush(info, 'd_oacc_all', & +!!$ i_err=(/n, n, n, n, n/)) +!!$ end if +!!$ end subroutine d_oacc_vect_all +!!$ +!!$ +!!$ subroutine d_oacc_vect_free(x, info) +!!$ implicit none +!!$ class(psb_d_vect_oacc), intent(inout) :: x +!!$ integer(psb_ipk_), intent(out) :: info +!!$ info = 0 +!!$ if (allocated(x%v)) then +!!$ !$acc exit data delete(x%v) finalize +!!$ deallocate(x%v, stat=info) +!!$ end if +!!$ +!!$ end subroutine d_oacc_vect_free +!!$ +!!$ function oacc_get_size(x) result(res) +!!$ implicit none +!!$ class(psb_d_vect_oacc), intent(inout) :: x +!!$ integer(psb_ipk_) :: res +!!$ +!!$ if (x%is_dev()) call x%sync() +!!$ res = size(x%v) +!!$ end function oacc_get_size +!!$ +!!$ +!!$ subroutine initialize(N) +!!$ integer(psb_ipk_) :: N +!!$ integer(psb_ipk_) :: i +!!$ allocate(v1(N),v2(N),p(N)) +!!$ !$acc enter data create(v1,v2,p) +!!$ !$acc parallel +!!$ !$acc loop +!!$ do i=1,n +!!$ v1(i) = i +!!$ v2(i) = n+i +!!$ end do +!!$ !$acc end parallel +!!$ end subroutine initialize +!!$ subroutine finalize_dev() +!!$ !$acc exit data delete(v1,v2,p) +!!$ end subroutine finalize_dev +!!$ subroutine finalize_host() +!!$ deallocate(v1,v2,p) +!!$ end subroutine finalize_host +!!$ subroutine to_dev() +!!$ !$acc update device(v1,v2) +!!$ end subroutine to_dev +!!$ subroutine to_host() +!!$ !$acc update self(v1,v2) +!!$ end subroutine to_host +!!$ function d_dot(N) result(res) +!!$ real(kind(1.d0)) :: res +!!$ integer(psb_ipk_) :: i,N +!!$ real(kind(1.d0)) :: t1,t2,t3 +!!$ res = 0.0d0 +!!$ !$acc parallel +!!$ !$acc loop reduction(+:res) +!!$ do i=1,N +!!$ res = res + v1(i) * v2(i) +!!$ end do +!!$ !$acc end parallel +!!$ +!!$ end function d_dot +!!$ function h_dot(N) result(res) +!!$ integer(psb_ipk_) :: i,N +!!$ real(kind(1.d0)) :: t1,t2,t3,res +!!$ res = 0.0d0 +!!$ do i=1,N +!!$ res = res + v1(i) * v2(i) +!!$ end do +!!$ end function h_dot +!!$ +end module psb_d_oacc_vect_mod