Compare commits

..

7 Commits

@ -67,6 +67,12 @@ UTILMODNAME=@UTILMODNAME@
CBINDLIBNAME=libpsb_cbind.a
OACCD=@OACCD@
OACCLD=@OACCLD@
FCOPENACC=@FCOPENACC@
CCOPENACC=@CCOPENACC@
CXXOPENACC=@CXXOPENACC@
CUDAD=@CUDAD@
CUDALD=@CUDALD@
LCUDA=@LCUDA@
@ -82,6 +88,7 @@ CUDA_SHORT_VERSION=@CUDA_SHORT_VERSION@
NVCC=@CUDA_NVCC@
CUDEFINES=@CUDEFINES@
.SUFFIXES: .cu
.cu.o:
$(NVCC) $(CINCLUDES) $(CDEFINES) $(CUDEFINES) -c $<

@ -1,6 +1,6 @@
include Make.inc
all: dirs based precd kryld utild cbindd extd $(CUDAD) libd
all: dirs based precd kryld utild cbindd extd $(CUDAD) $(OACCD) libd
@echo "====================================="
@echo "PSBLAS libraries Compilation Successful."
@ -14,9 +14,10 @@ utild: based
kryld: precd
extd: based
cudad: extd
oaccd: extd
cbindd: based precd kryld utild
libd: based precd kryld utild cbindd extd $(CUDALD)
libd: based precd kryld utild cbindd extd $(CUDALD) $(OACCLD)
$(MAKE) -C base lib
$(MAKE) -C prec lib
$(MAKE) -C krylov lib
@ -25,6 +26,8 @@ libd: based precd kryld utild cbindd extd $(CUDALD)
$(MAKE) -C ext lib
cudald: cudad
$(MAKE) -C cuda lib
oaccld: oaccd
$(MAKE) -C openacc lib
based:
@ -41,6 +44,8 @@ extd: based
$(MAKE) -C ext objs
cudad: based extd
$(MAKE) -C cuda objs
oaccd: based extd
$(MAKE) -C openacc objs
install: all
@ -67,6 +72,7 @@ clean:
$(MAKE) -C cbind clean
$(MAKE) -C ext clean
$(MAKE) -C cuda clean
$(MAKE) -C openacc clean
check: all
make check -C test/serial
@ -84,6 +90,7 @@ veryclean: cleanlib
cd cbind && $(MAKE) veryclean
cd ext && $(MAKE) veryclean
cd cuda && $(MAKE) veryclean
cd openacc && $(MAKE) veryclean
cd test/fileread && $(MAKE) clean
cd test/pargen && $(MAKE) clean
cd test/util && $(MAKE) clean

@ -2267,3 +2267,47 @@ CPPFLAGS="$SAVE_CPPFLAGS"
])dnl
dnl @synopsis PAC_ARG_OPENACC
dnl
dnl Test for --enable-openacc
dnl
dnl
dnl
dnl Example use:
dnl
dnl
dnl @author Salvatore Filippone <salvatore.filippone@uniroma2.it>
dnl
AC_DEFUN([PAC_ARG_OPENACC],
[AC_MSG_CHECKING([whether we want openacc ])
AC_ARG_ENABLE(openacc,
AS_HELP_STRING([--enable-openacc],
[Specify whether to enable openacc. ]),
[
pac_cv_openacc="yes";
]
dnl ,
dnl [pac_cv_openacc="no";]
)
if test x"$pac_cv_openacc" == x"yes" ; then
AC_MSG_RESULT([yes.])
# AC_LANG_PUSH([Fortran])
# AC_OPENACC()
# pac_cv_openacc_fcopt="$OPENACC_FCFLAGS";
# AC_LANG_POP()
# AC_LANG_PUSH([C])
# AC_OPENACC()
# pac_cv_openacc_ccopt="$OPENACC_CFLAGS";
# AC_LANG_POP()
# AC_LANG_PUSH([C++])
# AC_OPENACC()
# pac_cv_openacc_cxxopt="$OPENACC_CXXFLAGS";
# AC_LANG_POP()
else
pac_cv_openacc="no";
AC_MSG_RESULT([no.])
fi
]
)

123
configure vendored

@ -667,6 +667,11 @@ CUDA_DIR
EXTRALDLIBS
SPGPU_LIBS
SPGPU_FLAGS
CXXOPENACC
CCOPENACC
FCOPENACC
OACCLD
OACCD
METISINCFILE
UTILLIBNAME
METHDLIBNAME
@ -840,6 +845,10 @@ with_amdincdir
with_amdlibdir
with_cuda
with_cudacc
enable_openacc
with_ccopenacc
with_cxxopenacc
with_fcopenacc
'
ac_precious_vars='build_alias
host_alias
@ -1490,6 +1499,7 @@ Optional Features:
--disable-silent-rules verbose build output (undo: "make V=0")
--enable-openmp Specify whether to enable openmp.
--disable-openmp do not use OpenMP
--enable-openacc Specify whether to enable openacc.
Optional Packages:
--with-PACKAGE[=ARG] use PACKAGE [ARG=yes]
@ -1535,6 +1545,12 @@ Optional Packages:
--with-cuda=DIR Specify the CUDA install directory.
--with-cudacc A comma-separated list of CCs to compile to, for
example, --with-cudacc=50,60,70,75
--with-ccopenacc additional [CCOPENACC] flags to be added: will
prepend to [CCOPENACC]
--with-cxxopenacc additional [CXXOPENACC] flags to be added: will
prepend to [CXXOPENACC]
--with-fcopenacc additional [FCOPENACC] flags to be added: will
prepend to [FCOPENACC]
Some influential environment variables:
FC Fortran compiler command
@ -10867,6 +10883,106 @@ printf "%s\n" "$as_me: For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_
CUDA_LIBS="";
fi
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether we want openacc " >&5
printf %s "checking whether we want openacc ... " >&6; }
# Check whether --enable-openacc was given.
if test ${enable_openacc+y}
then :
enableval=$enable_openacc;
pac_cv_openacc="yes";
fi
if test x"$pac_cv_openacc" == x"yes" ; then
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes." >&5
printf "%s\n" "yes." >&6; }
# AC_LANG_PUSH([Fortran])
# AC_OPENACC()
# pac_cv_openacc_fcopt="$OPENACC_FCFLAGS";
# AC_LANG_POP()
# AC_LANG_PUSH([C])
# AC_OPENACC()
# pac_cv_openacc_ccopt="$OPENACC_CFLAGS";
# AC_LANG_POP()
# AC_LANG_PUSH([C++])
# AC_OPENACC()
# pac_cv_openacc_cxxopt="$OPENACC_CXXFLAGS";
# AC_LANG_POP()
else
pac_cv_openacc="no";
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no." >&5
printf "%s\n" "no." >&6; }
fi
if test x"$pac_cv_openacc" == x"yes" ; then
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether additional CCOPENACC flags should be added (should be invoked only once)" >&5
printf %s "checking whether additional CCOPENACC flags should be added (should be invoked only once)... " >&6; }
# Check whether --with-ccopenacc was given.
if test ${with_ccopenacc+y}
then :
withval=$with_ccopenacc;
CCOPENACC="${withval} ${CCOPENACC}"
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: CCOPENACC = ${CCOPENACC}" >&5
printf "%s\n" "CCOPENACC = ${CCOPENACC}" >&6; }
else $as_nop
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
printf "%s\n" "no" >&6; }
fi
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether additional CXXOPENACC flags should be added (should be invoked only once)" >&5
printf %s "checking whether additional CXXOPENACC flags should be added (should be invoked only once)... " >&6; }
# Check whether --with-cxxopenacc was given.
if test ${with_cxxopenacc+y}
then :
withval=$with_cxxopenacc;
CXXOPENACC="${withval} ${CXXOPENACC}"
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: CXXOPENACC = ${CXXOPENACC}" >&5
printf "%s\n" "CXXOPENACC = ${CXXOPENACC}" >&6; }
else $as_nop
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
printf "%s\n" "no" >&6; }
fi
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether additional FCOPENACC flags should be added (should be invoked only once)" >&5
printf %s "checking whether additional FCOPENACC flags should be added (should be invoked only once)... " >&6; }
# Check whether --with-fcopenacc was given.
if test ${with_fcopenacc+y}
then :
withval=$with_fcopenacc;
FCOPENACC="${withval} ${FCOPENACC}"
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: FCOPENACC = ${FCOPENACC}" >&5
printf "%s\n" "FCOPENACC = ${FCOPENACC}" >&6; }
else $as_nop
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
printf "%s\n" "no" >&6; }
fi
OACCD=oaccd;
OACCLD=oaccld;
#FCOPT="$FCOPT $FCOPENACC";
#CCOPT="$CCOPT $CCOPENACC"
#CXXOPT="$CXXOPT $CXXOPENACC"
#FLINK="$FLINK $FCOPENACC";
fi
@ -10957,6 +11073,13 @@ FDEFINES=$(PSBFDEFINES) $(CUDA_DEFINES)

@ -843,6 +843,20 @@ if test "x$pac_cv_ipk_size" != "x4"; then
CUDA_LIBS="";
fi
PAC_ARG_OPENACC()
if test x"$pac_cv_openacc" == x"yes" ; then
PAC_ARG_WITH_FLAGS(ccopenacc,CCOPENACC)
PAC_ARG_WITH_FLAGS(cxxopenacc,CXXOPENACC)
PAC_ARG_WITH_FLAGS(fcopenacc,FCOPENACC)
OACCD=oaccd;
OACCLD=oaccld;
#FCOPT="$FCOPT $FCOPENACC";
#CCOPT="$CCOPT $CCOPENACC"
#CXXOPT="$CXXOPT $CXXOPENACC"
#FLINK="$FLINK $FCOPENACC";
fi
@ -927,6 +941,12 @@ AC_SUBST(PRECLIBNAME)
AC_SUBST(METHDLIBNAME)
AC_SUBST(UTILLIBNAME)
AC_SUBST(METISINCFILE)
AC_SUBST(OACCD)
AC_SUBST(OACCLD)
AC_SUBST(FCOPENACC)
AC_SUBST(CCOPENACC)
AC_SUBST(CXXOPENACC)
AC_SUBST(SPGPU_FLAGS)
AC_SUBST(SPGPU_LIBS)
dnl AC_SUBST(SPGPU_DIR)
@ -944,6 +964,7 @@ AC_SUBST(CUDEFINES)
AC_SUBST(CUDAD)
AC_SUBST(CUDALD)
AC_SUBST(LCUDA)
###############################################################################
# the following files will be created by Automake

@ -1,48 +1,63 @@
.SUFFIXES:
.SUFFIXES: .F90 .f90 .o .s .c
include ../Make.inc
# Compilers and flags
CC=mpicc
FC=mpif90
FCOPT=-O0 -march=native
OFFLOAD=-fopenacc -foffload=nvptx-none="-march=sm_70"
#CC=mpicc
#FC=mpif90
#FCOPT=-O0 -march=native
#OFFLOAD=-fopenacc -foffload=nvptx-none="-march=sm_70"
# Directories
LIBDIR=../lib
INCDIR=../include
MODDIR=../modules
IMPLDIR=./impl # Adding the impl directory
IMPLDIR=./impl
# Include and library paths
INCLUDES=-I$(LIBDIR) -I$(INCDIR) -I$(MODDIR) -I$(IMPLDIR)
LIBS=-L$(LIBDIR) -lpsb_util -lpsb_ext -lpsb_base -lopenblas -lmetis
INCLUDES=-I$(LIBDIR) -I$(INCDIR) -I$(MODDIR)
FINCLUDES=$(FMFLAG). $(FMFLAG)$(INCDIR) $(FMFLAG)$(MODDIR) $(FIFLAG).
CINCLUDES=
#LIBS=-L$(LIBDIR) -lpsb_util -lpsb_ext -lpsb_base -lopenblas -lmetis
# Source files
FOBJS= psb_i_oacc_vect_mod.o psb_d_oacc_vect_mod.o \
psb_oacc_mod.o psb_d_oacc_csr_mat_mod.o \
impl/psb_d_oacc_csr_vect_mv.o
psb_oacc_env_mod.o
# Library name
LIBNAME=libpsb_openacc.a
# Rules
all: $(LIBNAME)
OBJS=$(COBJS) $(FOBJS)
$(LIBNAME): $(FOBJS)
ar cr $(LIBNAME) $(FOBJS)
# Rules
lib: objs ilib
ar cur $(LIBNAME) $(OBJS)
/bin/cp -p $(LIBNAME) $(LIBDIR)
clean:
/bin/rm -fr *.o $(LIBNAME) *.mod impl/*.o
objs: $(OBJS) iobjs
/bin/cp -p *$(.mod) $(MODDIR)
.f90.o:
$(FC) $(FCOPT) $(OFFLOAD) $(INCLUDES) -c $< -o $@
iobjs: $(OBJS)
$(MAKE) -C impl objs
.c.o:
$(CC) -c $< -o $@
ilib: $(OBJS)
$(MAKE) -C impl lib
.F90.o:
$(FC) $(FCOPT) $(OFFLOAD) $(INCLUDES) -c $< -o $@
psb_oacc_mod.o : psb_i_oacc_vect_mod.o psb_d_oacc_vect_mod.o \
psb_d_oacc_csr_mat_mod.o psb_oacc_env_mod.o
.F90.s:
$(FC) $(FCOPT) $(INCLUDES) -c -S $< -o $@
clean: cclean iclean
/bin/rm -f $(FOBJS) *$(.mod) *.a
veryclean: clean
cclean:
/bin/rm -f $(COBJS)
iclean:
$(MAKE) -C impl clean
.c.o:
$(CC) $(CCOPT) $(CCOPENACC) $(CINCLUDES) $(CDEFINES) -c $< -o $@
.f90.o:
$(FC) $(FCOPT) $(FCOPENACC) $(FINCLUDES) -c $< -o $@
.F90.o:
$(FC) $(FCOPT) $(FCOPENACC) $(FINCLUDES) $(FDEFINES) -c $< -o $@
.cpp.o:
$(CXX) $(CXXOPT) $(CXXOPENACC) $(CXXINCLUDES) $(CXXDEFINES) -c $< -o $@

@ -0,0 +1,29 @@
include ../../Make.inc
LIBDIR=../../lib
INCDIR=../../include
MODDIR=../../modules
#
# Compilers and such
#
#CCOPT= -g
FINCLUDES=$(FMFLAG).. $(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FIFLAG)..
LIBNAME=libpsb_openacc.a
OBJS= psb_d_oacc_csr_vect_mv.o psb_d_oacc_mlt_v_2.o psb_d_oacc_mlt_v.o
objs: $(OBJS)
lib: objs
ar cur ../$(LIBNAME) $(OBJS)
clean:
/bin/rm -f $(OBJS)
.c.o:
$(CC) $(CCOPT) $(CCOPENACC) $(CINCLUDES) $(CDEFINES) -c $< -o $@
.f90.o:
$(FC) $(FCOPT) $(FCOPENACC) $(FINCLUDES) -c $< -o $@
.F90.o:
$(FC) $(FCOPT) $(FCOPENACC) $(FINCLUDES) $(FDEFINES) -c $< -o $@
.cpp.o:
$(CXX) $(CXXOPT) $(CXXOPENACC) $(CXXINCLUDES) $(CXXDEFINES) -c $< -o $@

@ -0,0 +1,31 @@
subroutine d_oacc_mlt_v(x, y, info)
use psb_d_oacc_vect_mod, psb_protect_name => d_oacc_mlt_v
implicit none
class(psb_d_base_vect_type), intent(inout) :: x
class(psb_d_vect_oacc), intent(inout) :: y
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i, n
info = 0
n = min(x%get_nrows(), y%get_nrows())
select type(xx => x)
class is (psb_d_vect_oacc)
if (y%is_host()) call y%sync()
if (xx%is_host()) call xx%sync()
!$acc parallel loop
do i = 1, n
y%v(i) = y%v(i) * xx%v(i)
end do
call y%set_dev()
class default
if (xx%is_dev()) call xx%sync()
if (y%is_dev()) call y%sync()
do i = 1, n
y%v(i) = y%v(i) * xx%v(i)
end do
call y%set_host()
end select
end subroutine d_oacc_mlt_v

@ -0,0 +1,53 @@
subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
use psb_d_oacc_vect_mod, psb_protect_name => d_oacc_mlt_v_2
use psb_string_mod
implicit none
real(psb_dpk_), intent(in) :: alpha, beta
class(psb_d_base_vect_type), intent(inout) :: x
class(psb_d_base_vect_type), intent(inout) :: y
class(psb_d_vect_oacc), intent(inout) :: z
integer(psb_ipk_), intent(out) :: info
character(len=1), intent(in), optional :: conjgx, conjgy
integer(psb_ipk_) :: i, n
logical :: conjgx_, conjgy_
conjgx_ = .false.
conjgy_ = .false.
if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C')
if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C')
n = min(x%get_nrows(), y%get_nrows(), z%get_nrows())
info = 0
select type(xx => x)
class is (psb_d_vect_oacc)
select type (yy => y)
class is (psb_d_vect_oacc)
if (xx%is_host()) call xx%sync()
if (yy%is_host()) call yy%sync()
if ((beta /= dzero) .and. (z%is_host())) call z%sync()
!$acc parallel loop
do i = 1, n
z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
end do
call z%set_dev()
class default
if (xx%is_dev()) call xx%sync()
if (yy%is_dev()) call yy%sync()
if ((beta /= dzero) .and. (z%is_dev())) call z%sync()
do i = 1, n
z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
end do
call z%set_host()
end select
class default
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
if ((beta /= dzero) .and. (z%is_dev())) call z%sync()
do i = 1, n
z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i)
end do
call z%set_host()
end select
end subroutine d_oacc_mlt_v_2

@ -3,7 +3,7 @@ module psb_d_oacc_csr_mat_mod
use iso_c_binding
use psb_d_mat_mod
use psb_d_oacc_vect_mod
use oaccsparse_mod
!use oaccsparse_mod
integer(psb_ipk_), parameter, private :: is_host = -1
integer(psb_ipk_), parameter, private :: is_sync = 0

@ -3,13 +3,13 @@ module psb_d_oacc_vect_mod
use psb_const_mod
use psb_error_mod
use psb_d_vect_mod
use psb_i_oacc_vect_mod
use psb_i_vect_mod
use psb_i_oacc_vect_mod
integer(psb_ipk_), parameter, private :: is_host = -1
integer(psb_ipk_), parameter, private :: is_sync = 0
integer(psb_ipk_), parameter, private :: is_dev = 1
type, extends(psb_d_base_vect_type) :: psb_d_vect_oacc
integer :: state = is_host
@ -47,9 +47,9 @@ module psb_d_oacc_vect_mod
procedure, pass(y) :: axpby_v => d_oacc_axpby_v
procedure, pass(y) :: axpby_a => d_oacc_axpby_a
procedure, pass(z) :: abgdxyz => d_oacc_abgdxyz
procedure, pass(y) :: mlt_v => d_oacc_mlt_v
procedure, pass(y) :: mlt_a => d_oacc_mlt_a
procedure, pass(z) :: mlt_a_2 => d_oacc_mlt_a_2
procedure, pass(y) :: mlt_v => d_oacc_mlt_v
procedure, pass(z) :: mlt_v_2 => d_oacc_mlt_v_2
procedure, pass(x) :: scal => d_oacc_scal
procedure, pass(x) :: nrm2 => d_oacc_nrm2
@ -62,6 +62,30 @@ module psb_d_oacc_vect_mod
real(psb_dpk_), allocatable :: v1(:),v2(:),p(:)
interface
subroutine d_oacc_mlt_v(x, y, info)
import
implicit none
class(psb_d_base_vect_type), intent(inout) :: x
class(psb_d_vect_oacc), intent(inout) :: y
integer(psb_ipk_), intent(out) :: info
end subroutine d_oacc_mlt_v
end interface
interface
subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
import
implicit none
real(psb_dpk_), intent(in) :: alpha, beta
class(psb_d_base_vect_type), intent(inout) :: x
class(psb_d_base_vect_type), intent(inout) :: y
class(psb_d_vect_oacc), intent(inout) :: z
integer(psb_ipk_), intent(out) :: info
character(len=1), intent(in), optional :: conjgx, conjgy
end subroutine d_oacc_mlt_v_2
end interface
contains
subroutine d_oacc_absval1(x)
@ -73,7 +97,7 @@ contains
n = size(x%v)
!$acc parallel loop
do i = 1, n
x%v(i) = abs(x%v(i))
x%v(i) = abs(x%v(i))
end do
call x%set_dev()
end subroutine d_oacc_absval1
@ -101,8 +125,6 @@ contains
end select
end subroutine d_oacc_absval2
subroutine d_oacc_scal(alpha, x)
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
@ -173,38 +195,7 @@ contains
end function d_oacc_asum
subroutine d_oacc_mlt_v(x, y, info)
use psi_serial_mod
implicit none
class(psb_d_base_vect_type), intent(inout) :: x
class(psb_d_vect_oacc), intent(inout) :: y
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i, n
info = 0
n = min(x%get_nrows(), y%get_nrows())
select type(xx => x)
type is (psb_d_base_vect_type)
if (y%is_dev()) call y%sync()
!$acc parallel loop
do i = 1, n
y%v(i) = y%v(i) * xx%v(i)
end do
call y%set_host()
class default
if (xx%is_dev()) call xx%sync()
if (y%is_dev()) call y%sync()
!$acc parallel loop
do i = 1, n
y%v(i) = y%v(i) * xx%v(i)
end do
call y%set_host()
end select
end subroutine d_oacc_mlt_v
subroutine d_oacc_mlt_a(x, y, info)
use psi_serial_mod
implicit none
real(psb_dpk_), intent(in) :: x(:)
class(psb_d_vect_oacc), intent(inout) :: y
@ -221,7 +212,6 @@ contains
end subroutine d_oacc_mlt_a
subroutine d_oacc_mlt_a_2(alpha, x, y, beta, z, info)
use psi_serial_mod
implicit none
real(psb_dpk_), intent(in) :: alpha, beta
real(psb_dpk_), intent(in) :: x(:)
@ -239,64 +229,94 @@ contains
call z%set_host()
end subroutine d_oacc_mlt_a_2
subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
use psi_serial_mod
use psb_string_mod
implicit none
real(psb_dpk_), intent(in) :: alpha, beta
class(psb_d_base_vect_type), intent(inout) :: x
class(psb_d_base_vect_type), intent(inout) :: y
class(psb_d_vect_oacc), intent(inout) :: z
integer(psb_ipk_), intent(out) :: info
character(len=1), intent(in), optional :: conjgx, conjgy
integer(psb_ipk_) :: i, n
logical :: conjgx_, conjgy_
conjgx_ = .false.
conjgy_ = .false.
if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C')
if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C')
n = min(x%get_nrows(), y%get_nrows(), z%get_nrows())
info = 0
select type(xx => x)
class is (psb_d_vect_oacc)
select type (yy => y)
class is (psb_d_vect_oacc)
if (xx%is_host()) call xx%sync_space()
if (yy%is_host()) call yy%sync_space()
if ((beta /= dzero) .and. (z%is_host())) call z%sync_space()
!$acc parallel loop
do i = 1, n
z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
end do
call z%set_dev()
class default
if (xx%is_dev()) call xx%sync_space()
if (yy%is_dev()) call yy%sync()
if ((beta /= dzero) .and. (z%is_dev())) call z%sync_space()
!$acc parallel loop
do i = 1, n
z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
end do
call z%set_host()
end select
class default
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
if ((beta /= dzero) .and. (z%is_dev())) call z%sync_space()
!$acc parallel loop
do i = 1, n
z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i)
end do
call z%set_host()
end select
end subroutine d_oacc_mlt_v_2
!!$ subroutine d_oacc_mlt_v(x, y, info)
!!$ implicit none
!!$ class(psb_d_base_vect_type), intent(inout) :: x
!!$ class(psb_d_vect_oacc), intent(inout) :: y
!!$ integer(psb_ipk_), intent(out) :: info
!!$
!!$ integer(psb_ipk_) :: i, n
!!$
!!$ info = 0
!!$ n = min(x%get_nrows(), y%get_nrows())
!!$ select type(xx => x)
!!$ type is (psb_d_base_vect_type)
!!$ if (y%is_dev()) call y%sync()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ y%v(i) = y%v(i) * xx%v(i)
!!$ end do
!!$ call y%set_host()
!!$ class default
!!$ if (xx%is_dev()) call xx%sync()
!!$ if (y%is_dev()) call y%sync()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ y%v(i) = y%v(i) * xx%v(i)
!!$ end do
!!$ call y%set_host()
!!$ end select
!!$ end subroutine d_oacc_mlt_v
!!$
!!$ subroutine d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
!!$ use psi_serial_mod
!!$ use psb_string_mod
!!$ implicit none
!!$ real(psb_dpk_), intent(in) :: alpha, beta
!!$ class(psb_d_base_vect_type), intent(inout) :: x
!!$ class(psb_d_base_vect_type), intent(inout) :: y
!!$ class(psb_d_vect_oacc), intent(inout) :: z
!!$ integer(psb_ipk_), intent(out) :: info
!!$ character(len=1), intent(in), optional :: conjgx, conjgy
!!$ integer(psb_ipk_) :: i, n
!!$ logical :: conjgx_, conjgy_
!!$
!!$ conjgx_ = .false.
!!$ conjgy_ = .false.
!!$ if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C')
!!$ if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C')
!!$
!!$ n = min(x%get_nrows(), y%get_nrows(), z%get_nrows())
!!$
!!$ info = 0
!!$ select type(xx => x)
!!$ class is (psb_d_vect_oacc)
!!$ select type (yy => y)
!!$ class is (psb_d_vect_oacc)
!!$ if (xx%is_host()) call xx%sync_space()
!!$ if (yy%is_host()) call yy%sync_space()
!!$ if ((beta /= dzero) .and. (z%is_host())) call z%sync_space()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
!!$ end do
!!$ call z%set_dev()
!!$ class default
!!$ if (xx%is_dev()) call xx%sync_space()
!!$ if (yy%is_dev()) call yy%sync()
!!$ if ((beta /= dzero) .and. (z%is_dev())) call z%sync_space()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ z%v(i) = alpha * xx%v(i) * yy%v(i) + beta * z%v(i)
!!$ end do
!!$ call z%set_host()
!!$ end select
!!$ class default
!!$ if (x%is_dev()) call x%sync()
!!$ if (y%is_dev()) call y%sync()
!!$ if ((beta /= dzero) .and. (z%is_dev())) call z%sync_space()
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ z%v(i) = alpha * x%v(i) * y%v(i) + beta * z%v(i)
!!$ end do
!!$ call z%set_host()
!!$ end select
!!$ end subroutine d_oacc_mlt_v_2
subroutine d_oacc_axpby_v(m, alpha, x, beta, y, info)
use psi_serial_mod
!use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
class(psb_d_base_vect_type), intent(inout) :: x
@ -329,7 +349,7 @@ contains
end subroutine d_oacc_axpby_v
subroutine d_oacc_axpby_a(m, alpha, x, beta, y, info)
use psi_serial_mod
!use psi_serial_mod
implicit none
integer(psb_ipk_), intent(in) :: m
real(psb_dpk_), intent(in) :: x(:)
@ -398,7 +418,6 @@ contains
end if
end subroutine d_oacc_abgdxyz
subroutine d_oacc_sctb_buf(i, n, idx, beta, y)
use psb_base_mod
implicit none
@ -409,25 +428,25 @@ contains
integer(psb_ipk_) :: info
if (.not.allocated(y%combuf)) then
call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
return
call psb_errpush(psb_err_alloc_dealloc_, 'sctb_buf')
return
end if
select type(ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync_space(info)
if (y%is_host()) call y%sync_space()
if (ii%is_host()) call ii%sync_space(info)
if (y%is_host()) call y%sync_space()
!$acc parallel loop
do i = 1, n
y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i)
end do
!$acc parallel loop
do i = 1, n
y%v(ii%v(i)) = beta * y%v(ii%v(i)) + y%combuf(i)
end do
class default
!$acc parallel loop
do i = 1, n
y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i)
end do
!$acc parallel loop
do i = 1, n
y%v(idx%v(i)) = beta * y%v(idx%v(i)) + y%combuf(i)
end do
end select
end subroutine d_oacc_sctb_buf
@ -442,17 +461,17 @@ contains
select type(ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync_space(info)
if (ii%is_host()) call ii%sync_space(info)
class default
call psb_errpush(info, 'd_oacc_sctb_x')
return
call psb_errpush(info, 'd_oacc_sctb_x')
return
end select
if (y%is_host()) call y%sync_space()
!$acc parallel loop
do i = 1, n
y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i)
y%v(idx%v(i)) = beta * y%v(idx%v(i)) + x(i)
end do
call y%set_dev()
@ -475,7 +494,7 @@ contains
!$acc parallel loop
do i = 1, n
y%v(idx(i)) = beta * y%v(idx(i)) + x(i)
y%v(idx(i)) = beta * y%v(idx(i)) + x(i)
end do
call y%set_host()
@ -492,23 +511,23 @@ contains
info = 0
if (.not.allocated(x%combuf)) then
call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
return
call psb_errpush(psb_err_alloc_dealloc_, 'gthzbuf')
return
end if
select type(ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync_space(info)
if (ii%is_host()) call ii%sync_space(info)
class default
call psb_errpush(info, 'd_oacc_gthzbuf')
return
call psb_errpush(info, 'd_oacc_gthzbuf')
return
end select
if (x%is_host()) call x%sync_space()
!$acc parallel loop
do i = 1, n
x%combuf(i) = x%v(idx%v(i))
x%combuf(i) = x%v(idx%v(i))
end do
end subroutine d_oacc_gthzbuf
@ -522,20 +541,20 @@ contains
integer(psb_ipk_) :: info
info = 0
select type(ii => idx)
class is (psb_i_vect_oacc)
if (ii%is_host()) call ii%sync_space(info)
if (ii%is_host()) call ii%sync_space(info)
class default
call psb_errpush(info, 'd_oacc_gthzv_x')
return
call psb_errpush(info, 'd_oacc_gthzv_x')
return
end select
if (x%is_host()) call x%sync_space()
!$acc parallel loop
do i = 1, n
y(i) = x%v(idx%v(i))
y(i) = x%v(idx%v(i))
end do
end subroutine d_oacc_gthzv_x
@ -557,35 +576,35 @@ contains
done_oacc = .false.
select type(virl => irl)
type is (psb_i_vect_oacc)
select type(vval => val)
type is (psb_d_vect_oacc)
if (vval%is_host()) call vval%sync_space()
if (virl%is_host()) call virl%sync_space(info)
if (x%is_host()) call x%sync_space()
!$acc parallel loop
do i = 1, n
x%v(virl%v(i)) = vval%v(i)
end do
call x%set_dev()
done_oacc = .true.
end select
select type(vval => val)
type is (psb_d_vect_oacc)
if (vval%is_host()) call vval%sync_space()
if (virl%is_host()) call virl%sync_space(info)
if (x%is_host()) call x%sync_space()
!$acc parallel loop
do i = 1, n
x%v(virl%v(i)) = vval%v(i)
end do
call x%set_dev()
done_oacc = .true.
end select
end select
if (.not.done_oacc) then
select type(virl => irl)
type is (psb_i_vect_oacc)
if (virl%is_dev()) call virl%sync_space(info)
end select
select type(vval => val)
type is (psb_d_vect_oacc)
if (vval%is_dev()) call vval%sync_space()
end select
call x%ins(n, irl%v, val%v, dupl, info)
select type(virl => irl)
type is (psb_i_vect_oacc)
if (virl%is_dev()) call virl%sync_space(info)
end select
select type(vval => val)
type is (psb_d_vect_oacc)
if (vval%is_dev()) call vval%sync_space()
end select
call x%ins(n, irl%v, val%v, dupl, info)
end if
if (info /= 0) then
call psb_errpush(info, 'oacc_vect_ins')
return
call psb_errpush(info, 'oacc_vect_ins')
return
end if
end subroutine d_oacc_ins_v
@ -622,7 +641,7 @@ contains
call x%all(n, info)
if (info /= 0) then
call psb_errpush(info, 'd_oacc_bld_mn', i_err=(/n, n, n, n, n/))
call psb_errpush(info, 'd_oacc_bld_mn', i_err=(/n, n, n, n, n/))
end if
call x%set_host()
!$acc update device(x%v)
@ -639,10 +658,10 @@ contains
call psb_realloc(size(this), x%v, info)
if (info /= 0) then
info = psb_err_alloc_request_
call psb_errpush(info, 'd_oacc_bld_x', &
i_err=(/size(this), izero, izero, izero, izero/))
return
info = psb_err_alloc_request_
call psb_errpush(info, 'd_oacc_bld_x', &
i_err=(/size(this), izero, izero, izero, izero/))
return
end if
x%v(:) = this(:)
@ -663,19 +682,19 @@ contains
info = psb_success_
if (x%is_dev()) then
nd = size(x%v)
if (nd < n) then
call x%sync()
call x%psb_d_base_vect_type%asb(n, info)
if (info == psb_success_) call x%sync_space()
call x%set_host()
end if
nd = size(x%v)
if (nd < n) then
call x%sync()
call x%psb_d_base_vect_type%asb(n, info)
if (info == psb_success_) call x%sync_space()
call x%set_host()
end if
else
if (size(x%v) < n) then
call x%psb_d_base_vect_type%asb(n, info)
if (info == psb_success_) call x%sync_space()
call x%set_host()
end if
if (size(x%v) < n) then
call x%psb_d_base_vect_type%asb(n, info)
if (info == psb_success_) call x%sync_space()
call x%set_host()
end if
end if
end subroutine d_oacc_asb_m
@ -685,19 +704,19 @@ contains
class(psb_d_vect_oacc), intent(inout) :: x
real(psb_dpk_), intent(in) :: val
integer(psb_ipk_), optional :: first, last
integer(psb_ipk_) :: first_, last_
first_ = 1
last_ = x%get_nrows()
if (present(first)) first_ = max(1, first)
if (present(last)) last_ = min(last, last_)
!$acc parallel loop
do i = first_, last_
x%v(i) = val
x%v(i) = val
end do
!$acc end parallel loop
call x%set_dev()
end subroutine d_oacc_set_scal
@ -830,21 +849,21 @@ contains
subroutine d_oacc_set_host(x)
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
x%state = is_host
end subroutine d_oacc_set_host
subroutine d_oacc_set_dev(x)
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
x%state = is_dev
end subroutine d_oacc_set_dev
subroutine d_oacc_set_sync(x)
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
x%state = is_sync
end subroutine d_oacc_set_sync
@ -879,17 +898,17 @@ contains
integer(psb_ipk_), intent(in) :: n
class(psb_d_vect_oacc), intent(out) :: x
integer(psb_ipk_), intent(out) :: info
call psb_realloc(n, x%v, info)
if (info == 0) then
call x%set_host()
!$acc enter data create(x%v)
call x%sync_space()
call x%set_host()
!$acc enter data create(x%v)
call x%sync_space()
end if
if (info /= 0) then
info = psb_err_alloc_request_
call psb_errpush(info, 'd_oacc_all', &
i_err=(/n, n, n, n, n/))
info = psb_err_alloc_request_
call psb_errpush(info, 'd_oacc_all', &
i_err=(/n, n, n, n, n/))
end if
end subroutine d_oacc_vect_all
@ -903,64 +922,44 @@ contains
!$acc exit data delete(x%v) finalize
deallocate(x%v, stat=info)
end if
end subroutine d_oacc_vect_free
function oacc_get_size(x) result(res)
implicit none
class(psb_d_vect_oacc), intent(inout) :: x
integer(psb_ipk_) :: res
if (x%is_dev()) call x%sync()
res = size(x%v)
end function oacc_get_size
subroutine initialize(N)
integer(psb_ipk_) :: N
integer(psb_ipk_) :: i
allocate(v1(N),v2(N),p(N))
!$acc enter data create(v1,v2,p)
!$acc parallel
!$acc loop
do i=1,n
v1(i) = i
v2(i) = n+i
end do
!$acc end parallel
end subroutine initialize
subroutine finalize_dev()
!$acc exit data delete(v1,v2,p)
end subroutine finalize_dev
subroutine finalize_host()
deallocate(v1,v2,p)
end subroutine finalize_host
subroutine to_dev()
!$acc update device(v1,v2)
end subroutine to_dev
subroutine to_host()
!$acc update self(v1,v2)
end subroutine to_host
function d_dot(N) result(res)
real(kind(1.d0)) :: res
integer(psb_ipk_) :: i,N
real(kind(1.d0)) :: t1,t2,t3
res = 0.0d0
!$acc parallel
!$acc loop reduction(+:res)
do i=1,N
res = res + v1(i) * v2(i)
end do
!$acc end parallel
end function d_dot
function h_dot(N) result(res)
integer(psb_ipk_) :: i,N
real(kind(1.d0)) :: t1,t2,t3,res
res = 0.0d0
do i=1,N
res = res + v1(i) * v2(i)
end do
end function h_dot
end module psb_d_oacc_vect_mod
!!$
!!$ subroutine initialize(N)
!!$ integer(psb_ipk_) :: N
!!$ integer(psb_ipk_) :: i
!!$ allocate(v1(N),v2(N),p(N))
!!$ !$acc enter data create(v1,v2,p)
!!$ !$acc parallel
!!$ !$acc loop
!!$ do i=1,n
!!$ v1(i) = i
!!$ v2(i) = n+i
!!$ end do
!!$ !$acc end parallel
!!$ end subroutine initialize
!!$ subroutine finalize_dev()
!!$ !$acc exit data delete(v1,v2,p)
!!$ end subroutine finalize_dev
!!$ subroutine finalize_host()
!!$ deallocate(v1,v2,p)
!!$ end subroutine finalize_host
!!$ subroutine to_dev()
!!$ !$acc update device(v1,v2)
!!$ end subroutine to_dev
!!$ subroutine to_host()
!!$ !$acc update self(v1,v2)
!!$ end subroutine to_host
!!$
end module psb_d_oacc_vect_mod

@ -0,0 +1,18 @@
module psb_oacc_env_mod
contains
subroutine psb_oacc_init(ctxt, dev)
use psb_penv_mod
use psb_const_mod
use psb_error_mod
type(psb_ctxt_type), intent(in) :: ctxt
integer, intent(in), optional :: dev
end subroutine psb_oacc_init
subroutine psb_oacc_exit()
integer :: res
end subroutine psb_oacc_exit
end module psb_oacc_env_mod

@ -1,6 +1,8 @@
module psb_oacc_mod
use psb_const_mod
use psb_oacc_env_mod
use psb_d_oacc_vect_mod
use psb_d_oacc_csr_mat_mod

Loading…
Cancel
Save