Merge branch 'oacc_loloum' into repackage

repack-newsolve
sfilippone 5 months ago
commit 174a8e7aef

1
.gitignore vendored

@ -1,6 +1,7 @@
*.a *.a
*.o *.o
*.mod *.mod
*.smod
*~ *~
# header files generated # header files generated

@ -67,6 +67,12 @@ UTILMODNAME=@UTILMODNAME@
CBINDLIBNAME=libpsb_cbind.a CBINDLIBNAME=libpsb_cbind.a
OACCD=@OACCD@
OACCLD=@OACCLD@
FCOPENACC=@FCOPENACC@
CCOPENACC=@CCOPENACC@
CXXOPENACC=@CXXOPENACC@
CUDAD=@CUDAD@ CUDAD=@CUDAD@
CUDALD=@CUDALD@ CUDALD=@CUDALD@
LCUDA=@LCUDA@ LCUDA=@LCUDA@
@ -82,6 +88,7 @@ CUDA_SHORT_VERSION=@CUDA_SHORT_VERSION@
NVCC=@CUDA_NVCC@ NVCC=@CUDA_NVCC@
CUDEFINES=@CUDEFINES@ CUDEFINES=@CUDEFINES@
.SUFFIXES: .cu .SUFFIXES: .cu
.cu.o: .cu.o:
$(NVCC) $(CINCLUDES) $(CDEFINES) $(CUDEFINES) -c $< $(NVCC) $(CINCLUDES) $(CDEFINES) $(CUDEFINES) -c $<

@ -1,6 +1,6 @@
include Make.inc include Make.inc
all: dirs based precd kryld utild cbindd extd $(CUDAD) libd all: dirs based precd kryld utild cbindd extd $(CUDAD) $(OACCD) libd
@echo "=====================================" @echo "====================================="
@echo "PSBLAS libraries Compilation Successful." @echo "PSBLAS libraries Compilation Successful."
@ -14,9 +14,10 @@ utild: based
kryld: precd kryld: precd
extd: based extd: based
cudad: extd cudad: extd
oaccd: extd
cbindd: based precd kryld utild cbindd: based precd kryld utild
libd: based precd kryld utild cbindd extd $(CUDALD) libd: based precd kryld utild cbindd extd $(CUDALD) $(OACCLD)
$(MAKE) -C base lib $(MAKE) -C base lib
$(MAKE) -C prec lib $(MAKE) -C prec lib
$(MAKE) -C krylov lib $(MAKE) -C krylov lib
@ -25,6 +26,8 @@ libd: based precd kryld utild cbindd extd $(CUDALD)
$(MAKE) -C ext lib $(MAKE) -C ext lib
cudald: cudad cudald: cudad
$(MAKE) -C cuda lib $(MAKE) -C cuda lib
oaccld: oaccd
$(MAKE) -C openacc lib
based: based:
@ -41,6 +44,8 @@ extd: based
$(MAKE) -C ext objs $(MAKE) -C ext objs
cudad: based extd cudad: based extd
$(MAKE) -C cuda objs $(MAKE) -C cuda objs
oaccd: based extd
$(MAKE) -C openacc objs
install: all install: all
@ -67,6 +72,7 @@ clean:
$(MAKE) -C cbind clean $(MAKE) -C cbind clean
$(MAKE) -C ext clean $(MAKE) -C ext clean
$(MAKE) -C cuda clean $(MAKE) -C cuda clean
$(MAKE) -C openacc clean
check: all check: all
make check -C test/serial make check -C test/serial
@ -84,6 +90,7 @@ veryclean: cleanlib
cd cbind && $(MAKE) veryclean cd cbind && $(MAKE) veryclean
cd ext && $(MAKE) veryclean cd ext && $(MAKE) veryclean
cd cuda && $(MAKE) veryclean cd cuda && $(MAKE) veryclean
cd openacc && $(MAKE) veryclean
cd test/fileread && $(MAKE) clean cd test/fileread && $(MAKE) clean
cd test/pargen && $(MAKE) clean cd test/pargen && $(MAKE) clean
cd test/util && $(MAKE) clean cd test/util && $(MAKE) clean

@ -104,6 +104,7 @@ module psb_c_vect_mod
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: upd_xyz => c_vect_upd_xyz procedure, pass(z) :: upd_xyz => c_vect_upd_xyz
procedure, pass(z) :: xyzw => c_vect_xyzw procedure, pass(z) :: xyzw => c_vect_xyzw
procedure, pass(y) :: mlt_v => c_vect_mlt_v procedure, pass(y) :: mlt_v => c_vect_mlt_v
procedure, pass(y) :: mlt_a => c_vect_mlt_a procedure, pass(y) :: mlt_a => c_vect_mlt_a
procedure, pass(z) :: mlt_a_2 => c_vect_mlt_a_2 procedure, pass(z) :: mlt_a_2 => c_vect_mlt_a_2

@ -104,6 +104,7 @@ module psb_d_vect_mod
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: upd_xyz => d_vect_upd_xyz procedure, pass(z) :: upd_xyz => d_vect_upd_xyz
procedure, pass(z) :: xyzw => d_vect_xyzw procedure, pass(z) :: xyzw => d_vect_xyzw
procedure, pass(y) :: mlt_v => d_vect_mlt_v procedure, pass(y) :: mlt_v => d_vect_mlt_v
procedure, pass(y) :: mlt_a => d_vect_mlt_a procedure, pass(y) :: mlt_a => d_vect_mlt_a
procedure, pass(z) :: mlt_a_2 => d_vect_mlt_a_2 procedure, pass(z) :: mlt_a_2 => d_vect_mlt_a_2

@ -104,6 +104,7 @@ module psb_s_vect_mod
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: upd_xyz => s_vect_upd_xyz procedure, pass(z) :: upd_xyz => s_vect_upd_xyz
procedure, pass(z) :: xyzw => s_vect_xyzw procedure, pass(z) :: xyzw => s_vect_xyzw
procedure, pass(y) :: mlt_v => s_vect_mlt_v procedure, pass(y) :: mlt_v => s_vect_mlt_v
procedure, pass(y) :: mlt_a => s_vect_mlt_a procedure, pass(y) :: mlt_a => s_vect_mlt_a
procedure, pass(z) :: mlt_a_2 => s_vect_mlt_a_2 procedure, pass(z) :: mlt_a_2 => s_vect_mlt_a_2

@ -104,6 +104,7 @@ module psb_z_vect_mod
generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2 generic, public :: axpby => axpby_v, axpby_a, axpby_v2, axpby_a2
procedure, pass(z) :: upd_xyz => z_vect_upd_xyz procedure, pass(z) :: upd_xyz => z_vect_upd_xyz
procedure, pass(z) :: xyzw => z_vect_xyzw procedure, pass(z) :: xyzw => z_vect_xyzw
procedure, pass(y) :: mlt_v => z_vect_mlt_v procedure, pass(y) :: mlt_v => z_vect_mlt_v
procedure, pass(y) :: mlt_a => z_vect_mlt_a procedure, pass(y) :: mlt_a => z_vect_mlt_a
procedure, pass(z) :: mlt_a_2 => z_vect_mlt_a_2 procedure, pass(z) :: mlt_a_2 => z_vect_mlt_a_2

@ -0,0 +1,104 @@
# AC_OPENACC
# ---------
# Check which options need to be passed to the C compiler to support Openacc.
# Set the OPENACC_CFLAGS / OPENACC_CXXFLAGS / OPENACC_FFLAGS variable to these
# options.
# The options are necessary at compile time (so the #pragmas are understood)
# and at link time (so the appropriate library is linked with).
# This macro takes care to not produce redundant options if $CC $CFLAGS already
# supports Openacc.
#
# For each candidate option, we do a compile test first, then a link test;
# if the compile test succeeds but the link test fails, that means we have
# found the correct option but it doesn't work because the libraries are
# broken. (This can happen, for instance, with SunPRO C and a bad combination
# of operating system patches.)
#
# Several of the options in our candidate list can be misinterpreted by
# compilers that don't use them to activate Openacc support; for example,
# many compilers understand "-openacc" to mean "write output to a file
# named 'penmp'" rather than "enable Openacc". We can't completely avoid
# the possibility of clobbering files named 'penmp' or 'mp' in configure's
# working directory; therefore, this macro will bomb out if any such file
# already exists when it's invoked.
AC_DEFUN([AX_C_OPENACC],
[AC_REQUIRE([_AX_OPENACC_SAFE_WD])]dnl
[AC_ARG_ENABLE([openacc],
[AS_HELP_STRING([--disable-openacc], [do not use Openacc])])]dnl
[
OPENACC_[]_AC_LANG_PREFIX[]FLAGS=
if test "$enable_openacc" != no; then
AC_LANG_PUSH([C])
AC_CACHE_CHECK([for $[]_AC_CC[] option to support Openacc],
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc],
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='not found'
dnl Try these flags:
dnl (on by default) ''
dnl GCC >= 4.2 -fopenacc
dnl SunPRO C -xopenacc
dnl Intel C -openacc
dnl SGI C, PGI C -mp
dnl Tru64 Compaq C -omp
dnl IBM XL C (AIX, Linux) -qsmp=omp
dnl Cray CCE -homp
dnl NEC SX -Popenacc
dnl Lahey Fortran (Linux) --openacc
for ac_option in '' -fopenacc -openacc -acc; do
ac_save_[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS
_AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $ac_option"
AC_COMPILE_IFELSE([
#ifndef _OPENACC
#error "OpenACC not supported"
#endif
#include <openacc.h>
int main (void) { acc_init (0); return 0;}
],
[AC_LINK_IFELSE([
#ifndef _OPENACC
#error "OpenACC not supported"
#endif
#include <openacc.h>
int main (void) { acc_init (0); return 0;}
],
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc=$ac_option],
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'])])
_AC_LANG_PREFIX[]FLAGS=$ac_save_[]_AC_LANG_PREFIX[]FLAGS
if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'not found'; then
break
fi
done
if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = 'not found'; then
ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'
elif test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = ''; then
ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='none needed'
fi
dnl _AX_OPENACC_SAFE_WD checked that these files did not exist before we
dnl started probing for Openacc support, so if they exist now, they were
dnl created by the probe loop and it's safe to delete them.
rm -f penmp mp])
if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'unsupported' && \
test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'none needed'; then
OPENACC_[]_AC_LANG_PREFIX[]FLAGS="$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc"
fi
AC_LANG_POP([C])
fi
])
# _AC_OPENACC_SAFE_WD
# ------------------
# AC_REQUIREd by AC_OPENACC. Checks both at autoconf time and at
# configure time for files that AC_OPENACC clobbers.
AC_DEFUN([_AX_OPENACC_SAFE_WD],
[m4_syscmd([test ! -e penmp && test ! -e mp])]dnl
[m4_if(sysval, [0], [], [m4_fatal(m4_normalize(
[AX_OPENACC clobbers files named 'mp' and 'penmp'.
To use AX_OPENACC you must not have either of these files
at the top level of your source tree.]))])]dnl
[if test -e penmp || test -e mp; then
AC_MSG_ERROR(m4_normalize(
[AX@&t@_OPENACC clobbers files named 'mp' and 'penmp'.
Aborting configure because one of these files already exists.]))
fi])

@ -0,0 +1,104 @@
# AC_OPENACC
# ---------
# Check which options need to be passed to the C compiler to support Openacc.
# Set the OPENACC_CFLAGS / OPENACC_CXXFLAGS / OPENACC_FFLAGS variable to these
# options.
# The options are necessary at compile time (so the #pragmas are understood)
# and at link time (so the appropriate library is linked with).
# This macro takes care to not produce redundant options if $CC $CFLAGS already
# supports Openacc.
#
# For each candidate option, we do a compile test first, then a link test;
# if the compile test succeeds but the link test fails, that means we have
# found the correct option but it doesn't work because the libraries are
# broken. (This can happen, for instance, with SunPRO C and a bad combination
# of operating system patches.)
#
# Several of the options in our candidate list can be misinterpreted by
# compilers that don't use them to activate Openacc support; for example,
# many compilers understand "-openacc" to mean "write output to a file
# named 'penmp'" rather than "enable Openacc". We can't completely avoid
# the possibility of clobbering files named 'penmp' or 'mp' in configure's
# working directory; therefore, this macro will bomb out if any such file
# already exists when it's invoked.
AC_DEFUN([AX_CXX_OPENACC],
[AC_REQUIRE([_AX_OPENACC_SAFE_WD])]dnl
[AC_ARG_ENABLE([openacc],
[AS_HELP_STRING([--disable-openacc], [do not use Openacc])])]dnl
[
OPENACC_[]_AC_LANG_PREFIX[]FLAGS=
if test "$enable_openacc" != no; then
AC_LANG_PUSH([C++])
AC_CACHE_CHECK([for $[]_AC_CC[] option to support Openacc],
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc],
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='not found'
dnl Try these flags:
dnl (on by default) ''
dnl GCC >= 4.2 -fopenacc
dnl SunPRO C -xopenacc
dnl Intel C -openacc
dnl SGI C, PGI C -mp
dnl Tru64 Compaq C -omp
dnl IBM XL C (AIX, Linux) -qsmp=omp
dnl Cray CCE -homp
dnl NEC SX -Popenacc
dnl Lahey Fortran (Linux) --openacc
for ac_option in '' -fopenacc -openacc -acc; do
ac_save_[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS
_AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $ac_option"
AC_COMPILE_IFELSE([
#ifndef _OPENACC
#error "OpenACC not supported"
#endif
#include <openacc.h>
int main (void) { acc_init (acc_get_device_type()); return 0;}
],
[AC_LINK_IFELSE([
#ifndef _OPENACC
#error "OpenACC not supported"
#endif
#include <openacc.h>
int main (void) { acc_init (acc_get_device_type()); return 0;}
],
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc=$ac_option],
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'])])
_AC_LANG_PREFIX[]FLAGS=$ac_save_[]_AC_LANG_PREFIX[]FLAGS
if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'not found'; then
break
fi
done
if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = 'not found'; then
ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'
elif test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = ''; then
ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='none needed'
fi
dnl _AX_OPENACC_SAFE_WD checked that these files did not exist before we
dnl started probing for Openacc support, so if they exist now, they were
dnl created by the probe loop and it's safe to delete them.
rm -f penmp mp])
if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'unsupported' && \
test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'none needed'; then
OPENACC_[]_AC_LANG_PREFIX[]FLAGS="$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc"
fi
AC_LANG_POP([C++])
fi
])
dnl _AC_OPENACC_SAFE_WD
dnl ------------------
dnl AC_REQUIREd by AC_OPENACC. Checks both at autoconf time and at
dnl configure time for files that AC_OPENACC clobbers.
dnl AC_DEFUN([_AX_OPENACC_SAFE_WD],
dnl [m4_syscmd([test ! -e penmp && test ! -e mp])]dnl
dnl [m4_if(sysval, [0], [], [m4_fatal(m4_normalize(
dnl [AX_OPENACC clobbers files named 'mp' and 'penmp'.
dnl To use AX_OPENACC you must not have either of these files
dnl at the top level of your source tree.]))])]dnl
dnl [if test -e penmp || test -e mp; then
dnl AC_MSG_ERROR(m4_normalize(
dnl [AX@&t@_OPENACC clobbers files named 'mp' and 'penmp'.
dnl Aborting configure because one of these files already exists.]))
dnl fi])

@ -0,0 +1,108 @@
# AC_OPENACC
# ---------
# Check which options need to be passed to the C compiler to support Openacc.
# Set the OPENACC_CFLAGS / OPENACC_CXXFLAGS / OPENACC_FFLAGS variable to these
# options.
# The options are necessary at compile time (so the #pragmas are understood)
# and at link time (so the appropriate library is linked with).
# This macro takes care to not produce redundant options if $CC $CFLAGS already
# supports Openacc.
#
# For each candidate option, we do a compile test first, then a link test;
# if the compile test succeeds but the link test fails, that means we have
# found the correct option but it doesn't work because the libraries are
# broken. (This can happen, for instance, with SunPRO C and a bad combination
# of operating system patches.)
#
# Several of the options in our candidate list can be misinterpreted by
# compilers that don't use them to activate Openacc support; for example,
# many compilers understand "-openacc" to mean "write output to a file
# named 'penmp'" rather than "enable Openacc". We can't completely avoid
# the possibility of clobbering files named 'penmp' or 'mp' in configure's
# working directory; therefore, this macro will bomb out if any such file
# already exists when it's invoked.
AC_DEFUN([AX_FC_OPENACC],
[AC_REQUIRE([_AX_OPENACC_SAFE_WD])]dnl
[AC_ARG_ENABLE([openacc],
[AS_HELP_STRING([--disable-openacc], [do not use Openacc])])]dnl
[
OPENACC_[]_AC_LANG_PREFIX[]FLAGS=
if test "$enable_openacc" != no; then
AC_LANG_PUSH([Fortran])
AC_CACHE_CHECK([for $[]_AC_CC[] option to support Openacc],
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc],
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='not found'
dnl Try these flags:
dnl (on by default) ''
dnl GCC >= 4.2 -fopenacc
dnl SunPRO C -xopenacc
dnl Intel C -openacc
dnl SGI C, PGI C -mp
dnl Tru64 Compaq C -omp
dnl IBM XL C (AIX, Linux) -qsmp=omp
dnl Cray CCE -homp
dnl NEC SX -Popenacc
dnl Lahey Fortran (Linux) --openacc
for ac_option in '' -fopenacc -openacc -acc; do
ac_save_[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS
_AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $ac_option"
AC_COMPILE_IFELSE([
program main
use openacc
implicit none
integer tid, np
tid = 42
call acc_init(0)
end
],
[AC_LINK_IFELSE([
program main
use openacc
implicit none
integer tid, np
tid = 42
call acc_init(0)
end
],
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc=$ac_option],
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'])])
_AC_LANG_PREFIX[]FLAGS=$ac_save_[]_AC_LANG_PREFIX[]FLAGS
if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'unsupported'; then
break
fi
done
if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = 'not found'; then
ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'
elif test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = ''; then
ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='none needed'
fi
dnl _AX_OPENACC_SAFE_WD checked that these files did not exist before we
dnl started probing for Openacc support, so if they exist now, they were
dnl created by the probe loop and it's safe to delete them.
rm -f penmp mp])
if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'unsupported' && \
test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'none needed'; then
OPENACC_[]_AC_LANG_PREFIX[]FLAGS="$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc"
fi
AC_LANG_POP([Fortran])
fi
])
# _AC_OPENACC_SAFE_WD
# ------------------
# AC_REQUIREd by AC_OPENACC. Checks both at autoconf time and at
# configure time for files that AC_OPENACC clobbers.
AC_DEFUN([_AX_OPENACC_SAFE_WD],
[m4_syscmd([test ! -e penmp && test ! -e mp])]dnl
[m4_if(sysval, [0], [], [m4_fatal(m4_normalize(
[AX_OPENACC clobbers files named 'mp' and 'penmp'.
To use AX_OPENACC you must not have either of these files
at the top level of your source tree.]))])]dnl
[if test -e penmp || test -e mp; then
AC_MSG_ERROR(m4_normalize(
[AX@&t@_OPENACC clobbers files named 'mp' and 'penmp'.
Aborting configure because one of these files already exists.]))
fi])

@ -2234,56 +2234,6 @@ AC_HELP_STRING([--with-cudacc], [A comma-separated list of CCs to compile to, fo
[pac_cv_cudacc='']) [pac_cv_cudacc=''])
]) ])
AC_DEFUN(PAC_ARG_WITH_LIBRSB,
[SAVE_LIBS="$LIBS"
SAVE_CPPFLAGS="$CPPFLAGS"
AC_ARG_WITH(librsb,
AC_HELP_STRING([--with-librsb], [The directory for LIBRSB, for example,
--with-librsb=/opt/packages/librsb]),
[pac_cv_librsb_dir=$withval],
[pac_cv_librsb_dir=''])
if test "x$pac_cv_librsb_dir" != "x"; then
LIBS="-L$pac_cv_librsb_dir $LIBS"
RSB_INCLUDES="-I$pac_cv_librsb_dir"
# CPPFLAGS="$GPU_INCLUDES $CUDA_INCLUDES $CPPFLAGS"
RSB_LIBDIR="-L$pac_cv_librsb_dir"
fi
#AC_MSG_CHECKING([librsb dir $pac_cv_librsb_dir])
AC_CHECK_HEADER([$pac_cv_librsb_dir/rsb.h],
[pac_rsb_header_ok=yes],
[pac_rsb_header_ok=no; RSB_INCLUDES=""])
if test "x$pac_rsb_header_ok" == "xyes" ; then
RSB_LIBS="-lrsb $RSB_LIBDIR"
# LIBS="$GPU_LIBS $CUDA_LIBS -lm $LIBS";
# AC_MSG_CHECKING([for spgpuCreate in $GPU_LIBS])
# AC_TRY_LINK_FUNC(spgpuCreate,
# [pac_cv_have_spgpu=yes;pac_gpu_lib_ok=yes; ],
# [pac_cv_have_spgpu=no;pac_gpu_lib_ok=no; GPU_LIBS=""])
# AC_MSG_RESULT($pac_gpu_lib_ok)
# if test "x$pac_cv_have_spgpu" == "xyes" ; then
# AC_MSG_NOTICE([Have found SPGPU])
RSBLIBNAME="librsb.a";
LIBRSB_DIR="$pac_cv_librsb_dir";
# SPGPU_DEFINES="-DHAVE_SPGPU";
LIBRSB_INCDIR="$LIBRSB_DIR";
LIBRSB_INCLUDES="-I$LIBRSB_INCDIR";
LIBRSB_LIBS="-lrsb -L$LIBRSB_DIR";
# CUDA_DIR="$pac_cv_cuda_dir";
LIBRSB_DEFINES="-DHAVE_RSB";
LRSB=-lpsb_rsb
# CUDA_INCLUDES="-I$pac_cv_cuda_dir/include"
# CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib"
FDEFINES="$LIBRSB_DEFINES $psblas_cv_define_prepend $FDEFINES";
CDEFINES="$LIBRSB_DEFINES $CDEFINES";#CDEFINES="-DHAVE_SPGPU -DHAVE_CUDA $CDEFINES";
fi
# fi
LIBS="$SAVE_LIBS"
CPPFLAGS="$SAVE_CPPFLAGS"
])
dnl
dnl @synopsis PAC_CHECK_CUDA_VERSION dnl @synopsis PAC_CHECK_CUDA_VERSION
dnl dnl
@ -2336,3 +2286,47 @@ CPPFLAGS="$SAVE_CPPFLAGS"
])dnl ])dnl
dnl @synopsis PAC_ARG_OPENACC
dnl
dnl Test for --enable-openacc
dnl
dnl
dnl
dnl Example use:
dnl
dnl
dnl @author Salvatore Filippone <salvatore.filippone@uniroma2.it>
dnl
AC_DEFUN([PAC_ARG_OPENACC],
[AC_MSG_CHECKING([whether we want openacc ])
AC_ARG_ENABLE(openacc,
AS_HELP_STRING([--enable-openacc],
[Specify whether to enable openacc. ]),
[
pac_cv_openacc="yes";
]
dnl ,
dnl [pac_cv_openacc="no";]
)
if test x"$pac_cv_openacc" == x"yes" ; then
AC_MSG_RESULT([yes.])
# AC_LANG_PUSH([Fortran])
# AC_OPENACC()
# pac_cv_openacc_fcopt="$OPENACC_FCFLAGS";
# AC_LANG_POP()
# AC_LANG_PUSH([C])
# AC_OPENACC()
# pac_cv_openacc_ccopt="$OPENACC_CFLAGS";
# AC_LANG_POP()
# AC_LANG_PUSH([C++])
# AC_OPENACC()
# pac_cv_openacc_cxxopt="$OPENACC_CXXFLAGS";
# AC_LANG_POP()
else
pac_cv_openacc="no";
AC_MSG_RESULT([no.])
fi
]
)

458
configure vendored

@ -653,6 +653,12 @@ ac_subst_vars='am__EXEEXT_FALSE
am__EXEEXT_TRUE am__EXEEXT_TRUE
LTLIBOBJS LTLIBOBJS
LIBOBJS LIBOBJS
LRSB
LIBRSB_DEFINES
LIBRSB_DIR
LIBRSB_INCDIR
LIBRSB_INCLUDES
LIBRSB_LIBS
LCUDA LCUDA
CUDALD CUDALD
CUDAD CUDAD
@ -667,6 +673,11 @@ CUDA_DIR
EXTRALDLIBS EXTRALDLIBS
SPGPU_LIBS SPGPU_LIBS
SPGPU_FLAGS SPGPU_FLAGS
CXXOPENACC
CCOPENACC
FCOPENACC
OACCLD
OACCD
METISINCFILE METISINCFILE
UTILLIBNAME UTILLIBNAME
METHDLIBNAME METHDLIBNAME
@ -840,6 +851,11 @@ with_amdincdir
with_amdlibdir with_amdlibdir
with_cuda with_cuda
with_cudacc with_cudacc
enable_openacc
with_extraopenacc
with_ccopenacc
with_cxxopenacc
with_fcopenacc
' '
ac_precious_vars='build_alias ac_precious_vars='build_alias
host_alias host_alias
@ -1490,6 +1506,8 @@ Optional Features:
--disable-silent-rules verbose build output (undo: "make V=0") --disable-silent-rules verbose build output (undo: "make V=0")
--enable-openmp Specify whether to enable openmp. --enable-openmp Specify whether to enable openmp.
--disable-openmp do not use OpenMP --disable-openmp do not use OpenMP
--enable-openacc Specify whether to enable openacc.
--disable-openacc do not use Openacc
Optional Packages: Optional Packages:
--with-PACKAGE[=ARG] use PACKAGE [ARG=yes] --with-PACKAGE[=ARG] use PACKAGE [ARG=yes]
@ -1535,6 +1553,14 @@ Optional Packages:
--with-cuda=DIR Specify the CUDA install directory. --with-cuda=DIR Specify the CUDA install directory.
--with-cudacc A comma-separated list of CCs to compile to, for --with-cudacc A comma-separated list of CCs to compile to, for
example, --with-cudacc=50,60,70,75 example, --with-cudacc=50,60,70,75
--with-extraopenacc additional [EXTRAOPENACC] flags to be added: will
prepend to [EXTRAOPENACC]
--with-ccopenacc additional [CCOPENACC] flags to be added: will
prepend to [CCOPENACC]
--with-cxxopenacc additional [CXXOPENACC] flags to be added: will
prepend to [CXXOPENACC]
--with-fcopenacc additional [FCOPENACC] flags to be added: will
prepend to [FCOPENACC]
Some influential environment variables: Some influential environment variables:
FC Fortran compiler command FC Fortran compiler command
@ -5825,7 +5851,7 @@ fi
# we just gave the user the chance to append values to these variables # we just gave the user the chance to append values to these variables
###############################################################################
if test -n "$ac_tool_prefix"; then if test -n "$ac_tool_prefix"; then
@ -10868,6 +10894,407 @@ printf "%s\n" "$as_me: For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_
CUDA_LIBS=""; CUDA_LIBS="";
fi fi
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether we want openacc " >&5
printf %s "checking whether we want openacc ... " >&6; }
# Check whether --enable-openacc was given.
if test ${enable_openacc+y}
then :
enableval=$enable_openacc;
pac_cv_openacc="yes";
fi
if test x"$pac_cv_openacc" == x"yes" ; then
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes." >&5
printf "%s\n" "yes." >&6; }
# AC_LANG_PUSH([Fortran])
# AC_OPENACC()
# pac_cv_openacc_fcopt="$OPENACC_FCFLAGS";
# AC_LANG_POP()
# AC_LANG_PUSH([C])
# AC_OPENACC()
# pac_cv_openacc_ccopt="$OPENACC_CFLAGS";
# AC_LANG_POP()
# AC_LANG_PUSH([C++])
# AC_OPENACC()
# pac_cv_openacc_cxxopt="$OPENACC_CXXFLAGS";
# AC_LANG_POP()
else
pac_cv_openacc="no";
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no." >&5
printf "%s\n" "no." >&6; }
fi
if test x"$pac_cv_openacc" == x"yes" ; then
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether additional EXTRAOPENACC flags should be added (should be invoked only once)" >&5
printf %s "checking whether additional EXTRAOPENACC flags should be added (should be invoked only once)... " >&6; }
# Check whether --with-extraopenacc was given.
if test ${with_extraopenacc+y}
then :
withval=$with_extraopenacc;
EXTRAOPENACC="${withval} ${EXTRAOPENACC}"
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: EXTRAOPENACC = ${EXTRAOPENACC}" >&5
printf "%s\n" "EXTRAOPENACC = ${EXTRAOPENACC}" >&6; }
else $as_nop
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
printf "%s\n" "no" >&6; }
fi
if test -e penmp || test -e mp; then
as_fn_error $? "AX_OPENACC clobbers files named 'mp' and 'penmp'. Aborting configure because one of these files already exists." "$LINENO" 5
fi
# Check whether --enable-openacc was given.
if test ${enable_openacc+y}
then :
enableval=$enable_openacc;
fi
OPENACC_CFLAGS=
if test "$enable_openacc" != no; then
ac_ext=c
ac_cpp='$CPP $CPPFLAGS'
ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_c_compiler_gnu
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to support Openacc" >&5
printf %s "checking for $CC option to support Openacc... " >&6; }
if test ${ax_cv_prog_c_openacc+y}
then :
printf %s "(cached) " >&6
else $as_nop
ax_cv_prog_c_openacc='not found'
for ac_option in '' -fopenacc -openacc -acc; do
ac_save_CFLAGS=$CFLAGS
CFLAGS="$CFLAGS $ac_option"
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#ifndef _OPENACC
#error "OpenACC not supported"
#endif
#include <openacc.h>
int main (void) { acc_init (0); return 0;}
_ACEOF
if ac_fn_c_try_compile "$LINENO"
then :
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#ifndef _OPENACC
#error "OpenACC not supported"
#endif
#include <openacc.h>
int main (void) { acc_init (0); return 0;}
_ACEOF
if ac_fn_c_try_link "$LINENO"
then :
ax_cv_prog_c_openacc=$ac_option
else $as_nop
ax_cv_prog_c_openacc='unsupported'
fi
rm -f core conftest.err conftest.$ac_objext conftest.beam \
conftest$ac_exeext conftest.$ac_ext
fi
rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
CFLAGS=$ac_save_CFLAGS
if test "$ax_cv_prog_c_openacc" != 'not found'; then
break
fi
done
if test "$ax_cv_prog_c_openacc" = 'not found'; then
ac_cv_prog_c_openacc='unsupported'
elif test "$ax_cv_prog_c_openacc" = ''; then
ac_cv_prog_c_openacc='none needed'
fi
rm -f penmp mp
fi
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_prog_c_openacc" >&5
printf "%s\n" "$ax_cv_prog_c_openacc" >&6; }
if test "$ax_cv_prog_c_openacc" != 'unsupported' && \
test "$ax_cv_prog_c_openacc" != 'none needed'; then
OPENACC_CFLAGS="$ax_cv_prog_c_openacc"
fi
ac_ext=c
ac_cpp='$CPP $CPPFLAGS'
ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_c_compiler_gnu
fi
CCOPENACC="$ax_cv_prog_c_openacc";
# Check whether --enable-openacc was given.
if test ${enable_openacc+y}
then :
enableval=$enable_openacc;
fi
OPENACC_CFLAGS=
if test "$enable_openacc" != no; then
ac_ext=cpp
ac_cpp='$CXXCPP $CPPFLAGS'
ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CXX option to support Openacc" >&5
printf %s "checking for $CXX option to support Openacc... " >&6; }
if test ${ax_cv_prog_cxx_openacc+y}
then :
printf %s "(cached) " >&6
else $as_nop
ax_cv_prog_cxx_openacc='not found'
for ac_option in '' -fopenacc -openacc -acc; do
ac_save_CXXFLAGS=$CXXFLAGS
CXXFLAGS="$CXXFLAGS $ac_option"
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#ifndef _OPENACC
#error "OpenACC not supported"
#endif
#include <openacc.h>
int main (void) { acc_init (acc_get_device_type()); return 0;}
_ACEOF
if ac_fn_cxx_try_compile "$LINENO"
then :
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#ifndef _OPENACC
#error "OpenACC not supported"
#endif
#include <openacc.h>
int main (void) { acc_init (acc_get_device_type()); return 0;}
_ACEOF
if ac_fn_cxx_try_link "$LINENO"
then :
ax_cv_prog_cxx_openacc=$ac_option
else $as_nop
ax_cv_prog_cxx_openacc='unsupported'
fi
rm -f core conftest.err conftest.$ac_objext conftest.beam \
conftest$ac_exeext conftest.$ac_ext
fi
rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
CXXFLAGS=$ac_save_CXXFLAGS
if test "$ax_cv_prog_cxx_openacc" != 'not found'; then
break
fi
done
if test "$ax_cv_prog_cxx_openacc" = 'not found'; then
ac_cv_prog_cxx_openacc='unsupported'
elif test "$ax_cv_prog_cxx_openacc" = ''; then
ac_cv_prog_cxx_openacc='none needed'
fi
rm -f penmp mp
fi
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_prog_cxx_openacc" >&5
printf "%s\n" "$ax_cv_prog_cxx_openacc" >&6; }
if test "$ax_cv_prog_cxx_openacc" != 'unsupported' && \
test "$ax_cv_prog_cxx_openacc" != 'none needed'; then
OPENACC_CXXFLAGS="$ax_cv_prog_cxx_openacc"
fi
ac_ext=c
ac_cpp='$CPP $CPPFLAGS'
ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_c_compiler_gnu
fi
CXXOPENACC="$ax_cv_prog_cxx_openacc";
# Check whether --enable-openacc was given.
if test ${enable_openacc+y}
then :
enableval=$enable_openacc;
fi
OPENACC_CFLAGS=
if test "$enable_openacc" != no; then
ac_ext=${ac_fc_srcext-f}
ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5'
ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_fc_compiler_gnu
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $FC option to support Openacc" >&5
printf %s "checking for $FC option to support Openacc... " >&6; }
if test ${ax_cv_prog_fc_openacc+y}
then :
printf %s "(cached) " >&6
else $as_nop
ax_cv_prog_fc_openacc='not found'
for ac_option in '' -fopenacc -openacc -acc; do
ac_save_FCFLAGS=$FCFLAGS
FCFLAGS="$FCFLAGS $ac_option"
cat > conftest.$ac_ext <<_ACEOF
program main
use openacc
implicit none
integer tid, np
tid = 42
call acc_init(0)
end
_ACEOF
if ac_fn_fc_try_compile "$LINENO"
then :
cat > conftest.$ac_ext <<_ACEOF
program main
use openacc
implicit none
integer tid, np
tid = 42
call acc_init(0)
end
_ACEOF
if ac_fn_fc_try_link "$LINENO"
then :
ax_cv_prog_fc_openacc=$ac_option
else $as_nop
ax_cv_prog_fc_openacc='unsupported'
fi
rm -f core conftest.err conftest.$ac_objext conftest.beam \
conftest$ac_exeext conftest.$ac_ext
fi
rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
FCFLAGS=$ac_save_FCFLAGS
if test "$ax_cv_prog_fc_openacc" != 'unsupported'; then
break
fi
done
if test "$ax_cv_prog_fc_openacc" = 'not found'; then
ac_cv_prog_fc_openacc='unsupported'
elif test "$ax_cv_prog_fc_openacc" = ''; then
ac_cv_prog_fc_openacc='none needed'
fi
rm -f penmp mp
fi
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_prog_fc_openacc" >&5
printf "%s\n" "$ax_cv_prog_fc_openacc" >&6; }
if test "$ax_cv_prog_fc_openacc" != 'unsupported' && \
test "$ax_cv_prog_fc_openacc" != 'none needed'; then
OPENACC_FCFLAGS="$ax_cv_prog_fc_openacc"
fi
ac_ext=c
ac_cpp='$CPP $CPPFLAGS'
ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_c_compiler_gnu
fi
FCOPENACC="$ax_cv_prog_fc_openacc";
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether additional CCOPENACC flags should be added (should be invoked only once)" >&5
printf %s "checking whether additional CCOPENACC flags should be added (should be invoked only once)... " >&6; }
# Check whether --with-ccopenacc was given.
if test ${with_ccopenacc+y}
then :
withval=$with_ccopenacc;
CCOPENACC="${withval} ${CCOPENACC}"
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: CCOPENACC = ${CCOPENACC}" >&5
printf "%s\n" "CCOPENACC = ${CCOPENACC}" >&6; }
else $as_nop
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
printf "%s\n" "no" >&6; }
fi
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether additional CXXOPENACC flags should be added (should be invoked only once)" >&5
printf %s "checking whether additional CXXOPENACC flags should be added (should be invoked only once)... " >&6; }
# Check whether --with-cxxopenacc was given.
if test ${with_cxxopenacc+y}
then :
withval=$with_cxxopenacc;
CXXOPENACC="${withval} ${CXXOPENACC}"
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: CXXOPENACC = ${CXXOPENACC}" >&5
printf "%s\n" "CXXOPENACC = ${CXXOPENACC}" >&6; }
else $as_nop
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
printf "%s\n" "no" >&6; }
fi
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether additional FCOPENACC flags should be added (should be invoked only once)" >&5
printf %s "checking whether additional FCOPENACC flags should be added (should be invoked only once)... " >&6; }
# Check whether --with-fcopenacc was given.
if test ${with_fcopenacc+y}
then :
withval=$with_fcopenacc;
FCOPENACC="${withval} ${FCOPENACC}"
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: FCOPENACC = ${FCOPENACC}" >&5
printf "%s\n" "FCOPENACC = ${FCOPENACC}" >&6; }
else $as_nop
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
printf "%s\n" "no" >&6; }
fi
CCOPENACC="$CCOPENACC $EXTRAOPENACC";
CXXOPENACC="$CXXOPENACC $EXTRAOPENACC";
FCOPENACC="$FCOPENACC $EXTRAOPENACC";
OACCD=oaccd;
OACCLD=oaccld;
#FCOPT="$FCOPT $FCOPENACC";
#CCOPT="$CCOPT $CCOPENACC"
#CXXOPT="$CXXOPT $CXXOPENACC"
#FLINK="$FLINK $FCOPENACC";
fi
###############################################################################
LIBRSB_DIR="$pac_cv_librsb_dir";
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for LIBRSB install dir" >&5
printf %s "checking for LIBRSB install dir... " >&6; }
case $LIBRSB_DIR in
/*) ;;
*) esac
pac_cv_status_file="$LIBRSB_DIR/librsb.a"
if test ! -f "$pac_cv_status_file" ; then
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
printf "%s\n" "no" >&6; }
#AC_MSG_ERROR([Could not find an installation in $LIBRSB_DIR.])
else
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $LIBRSB_DIR" >&5
printf "%s\n" "$LIBRSB_DIR" >&6; }
RSBTARGETLIB=rsbd;
RSBTARGETOBJ=rsbobj;
fi
@ -10958,6 +11385,19 @@ FDEFINES=$(PSBFDEFINES) $(CUDA_DEFINES)
@ -12299,6 +12739,9 @@ fi
CUDA : ${HAVE_CUDA} CUDA : ${HAVE_CUDA}
CUDA_CC : ${pac_cv_cudacc} CUDA_CC : ${pac_cv_cudacc}
OPENACC : ${pac_cv_openacc}
FCOPENACC : ${FCOPENACC}
BLAS : ${BLAS_LIBS} BLAS : ${BLAS_LIBS}
METIS usable : ${psblas_cv_have_metis} METIS usable : ${psblas_cv_have_metis}
@ -12332,6 +12775,9 @@ printf "%s\n" "$as_me:
CUDA : ${HAVE_CUDA} CUDA : ${HAVE_CUDA}
CUDA_CC : ${pac_cv_cudacc} CUDA_CC : ${pac_cv_cudacc}
OPENACC : ${pac_cv_openacc}
FCOPENACC : ${FCOPENACC}
BLAS : ${BLAS_LIBS} BLAS : ${BLAS_LIBS}
METIS usable : ${psblas_cv_have_metis} METIS usable : ${psblas_cv_have_metis}
@ -12348,6 +12794,16 @@ printf "%s\n" "$as_me:
If you are satisfied, run 'make' to build ${PACKAGE_NAME} and its documentation; otherwise If you are satisfied, run 'make' to build ${PACKAGE_NAME} and its documentation; otherwise
type ./configure --help=short for a complete list of configure options specific to ${PACKAGE_NAME}. type ./configure --help=short for a complete list of configure options specific to ${PACKAGE_NAME}.
" >&6;} " >&6;}
if test x"${pac_cv_openacc}" == x"yes" ; then
if test x"${FCOPENACC}" == x ; then
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}:
WARNING: OpenACC enabled, but no choice for FCOPENACC compile flag.
You may want to rerun configure with --with-fcopenacc= " >&5
printf "%s\n" "$as_me:
WARNING: OpenACC enabled, but no choice for FCOPENACC compile flag.
You may want to rerun configure with --with-fcopenacc= " >&6;}
fi
fi
############################################################################### ###############################################################################

@ -844,9 +844,47 @@ if test "x$pac_cv_ipk_size" != "x4"; then
CUDA_LIBS=""; CUDA_LIBS="";
fi fi
PAC_ARG_OPENACC()
dnl AC_ARG_ENABLE([openacc],
dnl [AS_HELP_STRING([--disable-openacc], [do not use Openacc])])
if test x"$pac_cv_openacc" == x"yes" ; then
PAC_ARG_WITH_FLAGS(extraopenacc,EXTRAOPENACC)
dnl if test false; then
AX_C_OPENACC()
CCOPENACC="$ax_cv_prog_c_openacc";
AX_CXX_OPENACC()
CXXOPENACC="$ax_cv_prog_cxx_openacc";
AX_FC_OPENACC()
FCOPENACC="$ax_cv_prog_fc_openacc";
dnl AX_OPENACC()
dnl
dnl CXXOPENACC="$ax_cv_prog_cxx_openacc";
dnl FCOPENACC="$ax_cv_prog_fc_openacc";
dnl else
dnl AC_MSG_NOTICE([OpenACC 1 flags CC $CCOPENACC CXX $CXXOPENACC FC $FCOPENACC])
PAC_ARG_WITH_FLAGS(ccopenacc,CCOPENACC)
PAC_ARG_WITH_FLAGS(cxxopenacc,CXXOPENACC)
PAC_ARG_WITH_FLAGS(fcopenacc,FCOPENACC)
dnl AC_MSG_NOTICE([OpenACC 2 flags CC $CCOPENACC CXX $CXXOPENACC FC $FCOPENACC])
dnl CCOPENACC="$ax_cv_prog_c_openacc";
dnl CXXOPENACC="$ax_cv_prog_cxx_openacc";
dnl FCOPENACC="$ax_cv_prog_fc_openacc";
dnl fi
CCOPENACC="$CCOPENACC $EXTRAOPENACC";
CXXOPENACC="$CXXOPENACC $EXTRAOPENACC";
FCOPENACC="$FCOPENACC $EXTRAOPENACC";
dnl AC_MSG_NOTICE([OpenACC 3 flags CC $CCOPENACC CXX $CXXOPENACC FC $FCOPENACC])
OACCD=oaccd;
OACCLD=oaccld;
#FCOPT="$FCOPT $FCOPENACC";
#CCOPT="$CCOPT $CCOPENACC"
#CXXOPT="$CXXOPT $CXXOPENACC"
#FLINK="$FLINK $FCOPENACC";
fi
############################################################################### ###############################################################################
PAC_ARG_WITH_LIBRSB dnl PAC_ARG_WITH_LIBRSB()
LIBRSB_DIR="$pac_cv_librsb_dir"; LIBRSB_DIR="$pac_cv_librsb_dir";
AC_MSG_CHECKING([for LIBRSB install dir]) AC_MSG_CHECKING([for LIBRSB install dir])
case $LIBRSB_DIR in case $LIBRSB_DIR in
@ -952,6 +990,12 @@ AC_SUBST(PRECLIBNAME)
AC_SUBST(METHDLIBNAME) AC_SUBST(METHDLIBNAME)
AC_SUBST(UTILLIBNAME) AC_SUBST(UTILLIBNAME)
AC_SUBST(METISINCFILE) AC_SUBST(METISINCFILE)
AC_SUBST(OACCD)
AC_SUBST(OACCLD)
AC_SUBST(FCOPENACC)
AC_SUBST(CCOPENACC)
AC_SUBST(CXXOPENACC)
AC_SUBST(SPGPU_FLAGS) AC_SUBST(SPGPU_FLAGS)
AC_SUBST(SPGPU_LIBS) AC_SUBST(SPGPU_LIBS)
dnl AC_SUBST(SPGPU_DIR) dnl AC_SUBST(SPGPU_DIR)
@ -976,7 +1020,6 @@ AC_SUBST(LIBRSB_DIR)
AC_SUBST(LIBRSB_DEFINES) AC_SUBST(LIBRSB_DEFINES)
AC_SUBST(LRSB) AC_SUBST(LRSB)
############################################################################### ###############################################################################
# the following files will be created by Automake # the following files will be created by Automake
@ -1004,6 +1047,11 @@ AC_MSG_NOTICE([
CUDA : ${HAVE_CUDA} CUDA : ${HAVE_CUDA}
CUDA_CC : ${pac_cv_cudacc} CUDA_CC : ${pac_cv_cudacc}
OPENACC : ${pac_cv_openacc}
FCOPENACC : ${FCOPENACC}
OACCD : ${OACCD}
OACCLD : ${OACCLD}
BLAS : ${BLAS_LIBS} BLAS : ${BLAS_LIBS}
METIS usable : ${psblas_cv_have_metis} METIS usable : ${psblas_cv_have_metis}
@ -1023,6 +1071,13 @@ dnl Note : we should use LDLIBS sooner or later!
dnl To install the program and its documentation, run 'make install' if you are root, dnl To install the program and its documentation, run 'make install' if you are root,
dnl or run 'su -c "make install"' if you are not root. dnl or run 'su -c "make install"' if you are not root.
]) ])
if test x"${pac_cv_openacc}" == x"yes" ; then
if test x"${FCOPENACC}" == x ; then
AC_MSG_NOTICE([
WARNING: OpenACC enabled, but no choice for FCOPENACC compile flag.
You may want to rerun configure with --with-fcopenacc= ])
fi
fi
############################################################################### ###############################################################################

@ -0,0 +1,89 @@
include ../Make.inc
#CC=mpicc
#FC=mpif90
#FCOPT=-O0 -march=native
#OFFLOAD=-fopenacc -foffload=nvptx-none="-march=sm_70"
LIBDIR=../lib
INCDIR=../include
MODDIR=../modules
IMPLDIR=./impl
INCLUDES=-I$(LIBDIR) -I$(INCDIR) -I$(MODDIR)
FINCLUDES=$(FMFLAG). $(FMFLAG)$(INCDIR) $(FMFLAG)$(MODDIR) $(FIFLAG).
CINCLUDES=
#LIBS=-L$(LIBDIR) -lpsb_util -lpsb_ext -lpsb_base -lopenblas -lmetis
FOBJS= psb_i_oacc_vect_mod.o psb_l_oacc_vect_mod.o \
psb_s_oacc_vect_mod.o psb_s_oacc_csr_mat_mod.o \
psb_d_oacc_vect_mod.o psb_d_oacc_csr_mat_mod.o \
psb_c_oacc_vect_mod.o psb_c_oacc_csr_mat_mod.o \
psb_z_oacc_vect_mod.o psb_z_oacc_csr_mat_mod.o \
psb_s_oacc_ell_mat_mod.o psb_s_oacc_hll_mat_mod.o \
psb_d_oacc_ell_mat_mod.o psb_d_oacc_hll_mat_mod.o \
psb_c_oacc_ell_mat_mod.o psb_c_oacc_hll_mat_mod.o \
psb_z_oacc_ell_mat_mod.o psb_z_oacc_hll_mat_mod.o \
psb_oacc_mod.o psb_oacc_env_mod.o
LIBNAME=libpsb_openacc.a
OBJS=$(COBJS) $(FOBJS)
lib: objs ilib
ar cur $(LIBNAME) $(OBJS)
/bin/cp -p $(LIBNAME) $(LIBDIR)
objs: $(OBJS) iobjs
/bin/cp -p *$(.mod) $(MODDIR)
iobjs: $(OBJS)
$(MAKE) -C impl objs
ilib: $(OBJS)
$(MAKE) -C impl lib
psb_oacc_mod.o : psb_i_oacc_vect_mod.o psb_l_oacc_vect_mod.o \
psb_s_oacc_vect_mod.o psb_s_oacc_csr_mat_mod.o \
psb_d_oacc_vect_mod.o psb_d_oacc_csr_mat_mod.o \
psb_c_oacc_vect_mod.o psb_c_oacc_csr_mat_mod.o \
psb_z_oacc_vect_mod.o psb_z_oacc_csr_mat_mod.o \
psb_s_oacc_ell_mat_mod.o psb_s_oacc_hll_mat_mod.o \
psb_d_oacc_ell_mat_mod.o psb_d_oacc_hll_mat_mod.o \
psb_c_oacc_ell_mat_mod.o psb_c_oacc_hll_mat_mod.o \
psb_z_oacc_ell_mat_mod.o psb_z_oacc_hll_mat_mod.o \
psb_oacc_env_mod.o
psb_s_oacc_vect_mod.o psb_d_oacc_vect_mod.o \
psb_c_oacc_vect_mod.o psb_z_oacc_vect_mod.o: psb_i_oacc_vect_mod.o psb_l_oacc_vect_mod.o psb_oacc_env_mod.o
psb_l_oacc_vect_mod.o: psb_i_oacc_vect_mod.o psb_oacc_env_mod.o
psb_i_oacc_vect_mod.o: psb_oacc_env_mod.o
psb_s_oacc_csr_mat_mod.o psb_s_oacc_ell_mat_mod.o psb_s_oacc_hll_mat_mod.o: psb_s_oacc_vect_mod.o
psb_d_oacc_csr_mat_mod.o psb_d_oacc_ell_mat_mod.o psb_d_oacc_hll_mat_mod.o: psb_d_oacc_vect_mod.o
psb_c_oacc_csr_mat_mod.o psb_c_oacc_ell_mat_mod.o psb_c_oacc_hll_mat_mod.o: psb_c_oacc_vect_mod.o
psb_z_oacc_csr_mat_mod.o psb_z_oacc_ell_mat_mod.o psb_z_oacc_hll_mat_mod.o: psb_z_oacc_vect_mod.o
clean: cclean iclean
/bin/rm -f $(FOBJS) *$(.mod) *.a *.smod
veryclean: clean
cclean:
/bin/rm -f $(COBJS)
iclean:
$(MAKE) -C impl clean
.c.o:
$(CC) $(CCOPT) $(CCOPENACC) $(CINCLUDES) $(CDEFINES) -c $< -o $@
.f90.o:
$(FC) $(FCOPT) $(FCOPENACC) $(FINCLUDES) -c $< -o $@
.F90.o:
$(FC) $(FCOPT) $(FCOPENACC) $(FINCLUDES) $(FDEFINES) -c $< -o $@
.cpp.o:
$(CXX) $(CXXOPT) $(CXXOPENACC) $(CXXINCLUDES) $(CXXDEFINES) -c $< -o $@

@ -0,0 +1,285 @@
include ../../Make.inc
LIBDIR=../../lib
INCDIR=../../include
MODDIR=../../modules
UP=..
#
# Compilers and such
#
#CCOPT= -g
FINCLUDES=$(FMFLAG).. $(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FIFLAG)..
LIBNAME=libpsb_openacc.a
OBJS= \
psb_s_oacc_csr_vect_mv.o \
psb_s_oacc_csr_inner_vect_sv.o \
psb_s_oacc_csr_scals.o \
psb_s_oacc_csr_scal.o \
psb_s_oacc_csr_allocate_mnnz.o \
psb_s_oacc_csr_reallocate_nz.o \
psb_s_oacc_csr_cp_from_coo.o \
psb_s_oacc_csr_cp_from_fmt.o \
psb_s_oacc_csr_mv_from_coo.o \
psb_s_oacc_csr_mv_from_fmt.o \
psb_s_oacc_csr_mold.o \
psb_s_oacc_mlt_v_2.o \
psb_s_oacc_mlt_v.o \
psb_d_oacc_csr_vect_mv.o \
psb_d_oacc_csr_inner_vect_sv.o \
psb_d_oacc_csr_scals.o \
psb_d_oacc_csr_scal.o \
psb_d_oacc_csr_allocate_mnnz.o \
psb_d_oacc_csr_reallocate_nz.o \
psb_d_oacc_csr_cp_from_coo.o \
psb_d_oacc_csr_cp_from_fmt.o \
psb_d_oacc_csr_mv_from_coo.o \
psb_d_oacc_csr_mv_from_fmt.o \
psb_d_oacc_csr_mold.o \
psb_d_oacc_mlt_v_2.o \
psb_d_oacc_mlt_v.o \
psb_c_oacc_csr_vect_mv.o \
psb_c_oacc_csr_inner_vect_sv.o \
psb_c_oacc_csr_scals.o \
psb_c_oacc_csr_scal.o \
psb_c_oacc_csr_allocate_mnnz.o \
psb_c_oacc_csr_reallocate_nz.o \
psb_c_oacc_csr_cp_from_coo.o \
psb_c_oacc_csr_cp_from_fmt.o \
psb_c_oacc_csr_mv_from_coo.o \
psb_c_oacc_csr_mv_from_fmt.o \
psb_c_oacc_csr_mold.o \
psb_c_oacc_mlt_v_2.o \
psb_c_oacc_mlt_v.o \
psb_z_oacc_csr_vect_mv.o \
psb_z_oacc_csr_inner_vect_sv.o \
psb_z_oacc_csr_scals.o \
psb_z_oacc_csr_scal.o \
psb_z_oacc_csr_allocate_mnnz.o \
psb_z_oacc_csr_reallocate_nz.o \
psb_z_oacc_csr_cp_from_coo.o \
psb_z_oacc_csr_cp_from_fmt.o \
psb_z_oacc_csr_mv_from_coo.o \
psb_z_oacc_csr_mv_from_fmt.o \
psb_z_oacc_csr_mold.o \
psb_z_oacc_mlt_v_2.o \
psb_z_oacc_mlt_v.o \
psb_s_oacc_ell_vect_mv.o \
psb_s_oacc_ell_inner_vect_sv.o \
psb_s_oacc_ell_scals.o \
psb_s_oacc_ell_scal.o \
psb_s_oacc_ell_reallocate_nz.o \
psb_s_oacc_ell_allocate_mnnz.o \
psb_s_oacc_ell_cp_from_coo.o \
psb_s_oacc_ell_cp_from_fmt.o \
psb_s_oacc_ell_mv_from_coo.o \
psb_s_oacc_ell_mv_from_fmt.o \
psb_s_oacc_ell_mold.o \
psb_s_oacc_hll_mold.o \
psb_s_oacc_hll_mv_from_fmt.o \
psb_s_oacc_hll_mv_from_coo.o \
psb_s_oacc_hll_cp_from_fmt.o \
psb_s_oacc_hll_cp_from_coo.o \
psb_s_oacc_hll_allocate_mnnz.o \
psb_s_oacc_hll_reallocate_nz.o \
psb_s_oacc_hll_scal.o \
psb_s_oacc_hll_scals.o \
psb_s_oacc_hll_inner_vect_sv.o \
psb_s_oacc_hll_vect_mv.o \
psb_d_oacc_ell_vect_mv.o \
psb_d_oacc_ell_inner_vect_sv.o \
psb_d_oacc_ell_scals.o \
psb_d_oacc_ell_scal.o \
psb_d_oacc_ell_reallocate_nz.o \
psb_d_oacc_ell_allocate_mnnz.o \
psb_d_oacc_ell_cp_from_coo.o \
psb_d_oacc_ell_cp_from_fmt.o \
psb_d_oacc_ell_mv_from_coo.o \
psb_d_oacc_ell_mv_from_fmt.o \
psb_d_oacc_ell_mold.o \
psb_d_oacc_hll_mold.o \
psb_d_oacc_hll_mv_from_fmt.o \
psb_d_oacc_hll_mv_from_coo.o \
psb_d_oacc_hll_cp_from_fmt.o \
psb_d_oacc_hll_cp_from_coo.o \
psb_d_oacc_hll_allocate_mnnz.o \
psb_d_oacc_hll_reallocate_nz.o \
psb_d_oacc_hll_scal.o \
psb_d_oacc_hll_scals.o \
psb_d_oacc_hll_inner_vect_sv.o \
psb_d_oacc_hll_vect_mv.o \
psb_c_oacc_ell_vect_mv.o \
psb_c_oacc_ell_inner_vect_sv.o \
psb_c_oacc_ell_scals.o \
psb_c_oacc_ell_scal.o \
psb_c_oacc_ell_reallocate_nz.o \
psb_c_oacc_ell_allocate_mnnz.o \
psb_c_oacc_ell_cp_from_coo.o \
psb_c_oacc_ell_cp_from_fmt.o \
psb_c_oacc_ell_mv_from_coo.o \
psb_c_oacc_ell_mv_from_fmt.o \
psb_c_oacc_ell_mold.o \
psb_c_oacc_hll_mold.o \
psb_c_oacc_hll_mv_from_fmt.o \
psb_c_oacc_hll_mv_from_coo.o \
psb_c_oacc_hll_cp_from_fmt.o \
psb_c_oacc_hll_cp_from_coo.o \
psb_c_oacc_hll_allocate_mnnz.o \
psb_c_oacc_hll_reallocate_nz.o \
psb_c_oacc_hll_scal.o \
psb_c_oacc_hll_scals.o \
psb_c_oacc_hll_inner_vect_sv.o \
psb_c_oacc_hll_vect_mv.o \
psb_z_oacc_ell_vect_mv.o \
psb_z_oacc_ell_inner_vect_sv.o \
psb_z_oacc_ell_scals.o \
psb_z_oacc_ell_scal.o \
psb_z_oacc_ell_reallocate_nz.o \
psb_z_oacc_ell_allocate_mnnz.o \
psb_z_oacc_ell_cp_from_coo.o \
psb_z_oacc_ell_cp_from_fmt.o \
psb_z_oacc_ell_mv_from_coo.o \
psb_z_oacc_ell_mv_from_fmt.o \
psb_z_oacc_ell_mold.o \
psb_z_oacc_hll_mold.o \
psb_z_oacc_hll_mv_from_fmt.o \
psb_z_oacc_hll_mv_from_coo.o \
psb_z_oacc_hll_cp_from_fmt.o \
psb_z_oacc_hll_cp_from_coo.o \
psb_z_oacc_hll_allocate_mnnz.o \
psb_z_oacc_hll_reallocate_nz.o \
psb_z_oacc_hll_scal.o \
psb_z_oacc_hll_scals.o \
psb_z_oacc_hll_inner_vect_sv.o \
psb_z_oacc_hll_vect_mv.o \
psb_z_oacc_ell_vect_mv.o \
psb_z_oacc_ell_inner_vect_sv.o \
psb_z_oacc_ell_scals.o \
psb_z_oacc_ell_scal.o \
psb_z_oacc_ell_reallocate_nz.o \
psb_z_oacc_ell_allocate_mnnz.o \
psb_z_oacc_ell_cp_from_coo.o \
psb_z_oacc_ell_cp_from_fmt.o \
psb_z_oacc_ell_mv_from_coo.o \
psb_z_oacc_ell_mv_from_fmt.o \
psb_z_oacc_ell_mold.o \
psb_z_oacc_hll_mold.o \
psb_z_oacc_hll_mv_from_fmt.o \
psb_z_oacc_hll_mv_from_coo.o \
psb_z_oacc_hll_cp_from_fmt.o \
psb_z_oacc_hll_cp_from_coo.o \
psb_z_oacc_hll_allocate_mnnz.o \
psb_z_oacc_hll_reallocate_nz.o \
psb_z_oacc_hll_scal.o \
psb_z_oacc_hll_scals.o \
psb_z_oacc_hll_inner_vect_sv.o \
psb_z_oacc_hll_vect_mv.o
objs: $(OBJS)
lib: objs
ar cur ../$(LIBNAME) $(OBJS)
psb_s_oacc_csr_vect_mv.o psb_s_oacc_csr_inner_vect_sv.o \
psb_s_oacc_csr_scals.o \
psb_s_oacc_csr_scal.o psb_s_oacc_csr_allocate_mnnz.o \
psb_s_oacc_csr_reallocate_nz.o psb_s_oacc_csr_cp_from_coo.o \
psb_s_oacc_csr_cp_from_fmt.o psb_s_oacc_csr_mv_from_coo.o \
psb_s_oacc_csr_mv_from_fmt.o psb_s_oacc_csr_mold.o: $(UP)/psb_s_oacc_csr_mat_mod.o $(UP)/psb_s_oacc_vect_mod.o
psb_s_oacc_ell_vect_mv.o psb_s_oacc_ell_inner_vect_sv.o \
psb_s_oacc_ell_scals.o \
psb_s_oacc_ell_scal.o psb_s_oacc_ell_allocate_mnnz.o \
psb_s_oacc_ell_reallocate_nz.o psb_s_oacc_ell_cp_from_coo.o \
psb_s_oacc_ell_cp_from_fmt.o psb_s_oacc_ell_mv_from_coo.o \
psb_s_oacc_ell_mv_from_fmt.o psb_s_oacc_ell_mold.o: $(UP)/psb_s_oacc_ell_mat_mod.o $(UP)/psb_s_oacc_vect_mod.o
psb_s_oacc_hll_vect_mv.o psb_s_oacc_hll_inner_vect_sv.o \
psb_s_oacc_hll_scals.o \
psb_s_oacc_hll_scal.o psb_s_oacc_hll_allocate_mnnz.o \
psb_s_oacc_hll_reallocate_nz.o psb_s_oacc_hll_cp_from_coo.o \
psb_s_oacc_hll_cp_from_fmt.o psb_s_oacc_hll_mv_from_coo.o \
psb_s_oacc_hll_mv_from_fmt.o psb_s_oacc_hll_mold.o: $(UP)/psb_s_oacc_hll_mat_mod.o $(UP)/psb_s_oacc_vect_mod.o
psb_d_oacc_csr_vect_mv.o psb_d_oacc_csr_inner_vect_sv.o \
psb_d_oacc_csr_scals.o \
psb_d_oacc_csr_scal.o psb_d_oacc_csr_allocate_mnnz.o \
psb_d_oacc_csr_reallocate_nz.o psb_d_oacc_csr_cp_from_coo.o \
psb_d_oacc_csr_cp_from_fmt.o psb_d_oacc_csr_mv_from_coo.o \
psb_d_oacc_csr_mv_from_fmt.o psb_d_oacc_csr_mold.o: $(UP)/psb_d_oacc_csr_mat_mod.o $(UP)/psb_d_oacc_vect_mod.o
psb_d_oacc_ell_vect_mv.o psb_d_oacc_ell_inner_vect_sv.o \
psb_d_oacc_ell_scals.o \
psb_d_oacc_ell_scal.o psb_d_oacc_ell_allocate_mnnz.o \
psb_d_oacc_ell_reallocate_nz.o psb_d_oacc_ell_cp_from_coo.o \
psb_d_oacc_ell_cp_from_fmt.o psb_d_oacc_ell_mv_from_coo.o \
psb_d_oacc_ell_mv_from_fmt.o psb_d_oacc_ell_mold.o: $(UP)/psb_d_oacc_ell_mat_mod.o $(UP)/psb_d_oacc_vect_mod.o
psb_d_oacc_hll_vect_mv.o psb_d_oacc_hll_inner_vect_sv.o \
psb_d_oacc_hll_scals.o \
psb_d_oacc_hll_scal.o psb_d_oacc_hll_allocate_mnnz.o \
psb_d_oacc_hll_reallocate_nz.o psb_d_oacc_hll_cp_from_coo.o \
psb_d_oacc_hll_cp_from_fmt.o psb_d_oacc_hll_mv_from_coo.o \
psb_d_oacc_hll_mv_from_fmt.o psb_d_oacc_hll_mold.o: $(UP)/psb_d_oacc_hll_mat_mod.o $(UP)/psb_d_oacc_vect_mod.o
psb_c_oacc_csr_vect_mv.o psb_c_oacc_csr_inner_vect_sv.o \
psb_c_oacc_csr_scals.o \
psb_c_oacc_csr_scal.o psb_c_oacc_csr_allocate_mnnz.o \
psb_c_oacc_csr_reallocate_nz.o psb_c_oacc_csr_cp_from_coo.o \
psb_c_oacc_csr_cp_from_fmt.o psb_c_oacc_csr_mv_from_coo.o \
psb_c_oacc_csr_mv_from_fmt.o psb_c_oacc_csr_mold.o: $(UP)/psb_c_oacc_csr_mat_mod.o $(UP)/psb_c_oacc_vect_mod.o
psb_c_oacc_ell_vect_mv.o psb_c_oacc_ell_inner_vect_sv.o \
psb_c_oacc_ell_scals.o \
psb_c_oacc_ell_scal.o psb_c_oacc_ell_allocate_mnnz.o \
psb_c_oacc_ell_reallocate_nz.o psb_c_oacc_ell_cp_from_coo.o \
psb_c_oacc_ell_cp_from_fmt.o psb_c_oacc_ell_mv_from_coo.o \
psb_c_oacc_ell_mv_from_fmt.o psb_c_oacc_ell_mold.o: $(UP)/psb_c_oacc_ell_mat_mod.o $(UP)/psb_c_oacc_vect_mod.o
psb_c_oacc_hll_vect_mv.o psb_c_oacc_hll_inner_vect_sv.o \
psb_c_oacc_hll_scals.o \
psb_c_oacc_hll_scal.o psb_c_oacc_hll_allocate_mnnz.o \
psb_c_oacc_hll_reallocate_nz.o psb_c_oacc_hll_cp_from_coo.o \
psb_c_oacc_hll_cp_from_fmt.o psb_c_oacc_hll_mv_from_coo.o \
psb_c_oacc_hll_mv_from_fmt.o psb_c_oacc_hll_mold.o: $(UP)/psb_c_oacc_hll_mat_mod.o $(UP)/psb_c_oacc_vect_mod.o
psb_z_oacc_csr_vect_mv.o psb_z_oacc_csr_inner_vect_sv.o \
psb_z_oacc_csr_scals.o \
psb_z_oacc_csr_scal.o psb_z_oacc_csr_allocate_mnnz.o \
psb_z_oacc_csr_reallocate_nz.o psb_z_oacc_csr_cp_from_coo.o \
psb_z_oacc_csr_cp_from_fmt.o psb_z_oacc_csr_mv_from_coo.o \
psb_z_oacc_csr_mv_from_fmt.o psb_z_oacc_csr_mold.o: $(UP)/psb_z_oacc_csr_mat_mod.o $(UP)/psb_z_oacc_vect_mod.o
psb_z_oacc_ell_vect_mv.o psb_z_oacc_ell_inner_vect_sv.o \
psb_z_oacc_ell_scals.o \
psb_z_oacc_ell_scal.o psb_z_oacc_ell_allocate_mnnz.o \
psb_z_oacc_ell_reallocate_nz.o psb_z_oacc_ell_cp_from_coo.o \
psb_z_oacc_ell_cp_from_fmt.o psb_z_oacc_ell_mv_from_coo.o \
psb_z_oacc_ell_mv_from_fmt.o psb_z_oacc_ell_mold.o: $(UP)/psb_z_oacc_ell_mat_mod.o $(UP)/psb_z_oacc_vect_mod.o
psb_z_oacc_hll_vect_mv.o psb_z_oacc_hll_inner_vect_sv.o \
psb_z_oacc_hll_scals.o \
psb_z_oacc_hll_scal.o psb_z_oacc_hll_allocate_mnnz.o \
psb_z_oacc_hll_reallocate_nz.o psb_z_oacc_hll_cp_from_coo.o \
psb_z_oacc_hll_cp_from_fmt.o psb_z_oacc_hll_mv_from_coo.o \
psb_z_oacc_hll_mv_from_fmt.o psb_z_oacc_hll_mold.o: $(UP)/psb_z_oacc_hll_mat_mod.o $(UP)/psb_z_oacc_vect_mod.o
psb_s_oacc_mlt_v_2.o psb_s_oacc_mlt_v.o: $(UP)/psb_s_oacc_vect_mod.o
psb_d_oacc_mlt_v_2.o psb_d_oacc_mlt_v.o: $(UP)/psb_d_oacc_vect_mod.o
psb_c_oacc_mlt_v_2.o psb_c_oacc_mlt_v.o: $(UP)/psb_c_oacc_vect_mod.o
psb_z_oacc_mlt_v_2.o psb_z_oacc_mlt_v.o: $(UP)/psb_z_oacc_vect_mod.o
clean:
/bin/rm -f $(OBJS) *.smod
.c.o:
$(CC) $(CCOPT) $(CCOPENACC) $(CINCLUDES) $(CDEFINES) -c $< -o $@
.f90.o:
$(FC) $(FCOPT) $(FCOPENACC) $(FINCLUDES) -c $< -o $@
.F90.o:
$(FC) $(FCOPT) $(FCOPENACC) $(FINCLUDES) $(FDEFINES) -c $< -o $@
.cpp.o:
$(CXX) $(CXXOPT) $(CXXOPENACC) $(CXXINCLUDES) $(CXXDEFINES) -c $< -o $@

@ -0,0 +1,29 @@
submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_allocate_mnnz_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_csr_allocate_mnnz(m, n, a, nz)
implicit none
integer(psb_ipk_), intent(in) :: m, n
class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(in), optional :: nz
integer(psb_ipk_) :: info
integer(psb_ipk_) :: err_act, nz_
character(len=20) :: name='allocate_mnz'
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
call a%psb_c_csr_sparse_mat%allocate(m, n, nz)
call a%set_host()
call a%sync_dev_space()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_oacc_csr_allocate_mnnz
end submodule psb_c_oacc_csr_allocate_mnnz_impl

@ -0,0 +1,27 @@
submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_cp_from_coo_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_csr_cp_from_coo(a, b, info)
implicit none
class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
class(psb_c_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%free_dev_space()
call a%psb_c_csr_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
call a%sync_dev_space()
call a%set_host()
call a%sync()
return
9999 continue
info = psb_err_alloc_dealloc_
return
end subroutine psb_c_oacc_csr_cp_from_coo
end submodule psb_c_oacc_csr_cp_from_coo_impl

@ -0,0 +1,26 @@
submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_cp_from_fmt_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_csr_cp_from_fmt(a, b, info)
implicit none
class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
class(psb_c_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
select type(b)
type is (psb_c_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
call a%free_dev_space()
call a%psb_c_csr_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
call a%sync_dev_space()
call a%set_host()
call a%sync()
end select
end subroutine psb_c_oacc_csr_cp_from_fmt
end submodule psb_c_oacc_csr_cp_from_fmt_impl

@ -0,0 +1,83 @@
submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_inner_vect_sv_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans)
implicit none
class(psb_c_oacc_csr_sparse_mat), intent(in) :: a
complex(psb_spk_), intent(in) :: alpha, beta
class(psb_c_base_vect_type), intent(inout) :: x, y
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
complex(psb_spk_), allocatable :: rx(:), ry(:)
logical :: tra
character :: trans_
integer(psb_ipk_) :: err_act
character(len=20) :: name = 'c_oacc_csr_inner_vect_sv'
logical, parameter :: debug = .false.
integer(psb_ipk_) :: i
call psb_get_erraction(err_act)
info = psb_success_
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info, name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
if (tra .or. (beta /= dzero)) then
call x%sync()
call y%sync()
call a%psb_c_csr_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans)
call y%set_host()
else
select type (xx => x)
type is (psb_c_vect_oacc)
select type(yy => y)
type is (psb_c_vect_oacc)
if (xx%is_host()) call xx%sync()
if (beta /= dzero) then
if (yy%is_host()) call yy%sync()
end if
!$acc parallel loop present(a, xx, yy)
do i = 1, size(a%val)
yy%v(i) = alpha * a%val(i) * xx%v(a%ja(i)) + beta * yy%v(i)
end do
call yy%set_dev()
class default
rx = xx%get_vect()
ry = y%get_vect()
call a%psb_c_csr_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
call y%bld(ry)
end select
class default
rx = x%get_vect()
ry = y%get_vect()
call a%psb_c_csr_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
call y%bld(ry)
end select
endif
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info, name, a_err = 'csrg_vect_sv')
goto 9999
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_oacc_csr_inner_vect_sv
end submodule psb_c_oacc_csr_inner_vect_sv_impl

@ -0,0 +1,35 @@
submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_mold_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_csr_mold(a, b, info)
implicit none
class(psb_c_oacc_csr_sparse_mat), intent(in) :: a
class(psb_c_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name='csr_mold'
logical, parameter :: debug=.false.
call psb_get_erraction(err_act)
info = 0
if (allocated(b)) then
call b%free()
deallocate(b, stat=info)
end if
if (info == 0) allocate(psb_c_oacc_csr_sparse_mat :: b, stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
goto 9999
end if
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_oacc_csr_mold
end submodule psb_c_oacc_csr_mold_impl

@ -0,0 +1,27 @@
submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_mv_from_coo_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_csr_mv_from_coo(a, b, info)
implicit none
class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
class(psb_c_coo_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%free_dev_space()
call a%psb_c_csr_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
call a%sync_dev_space()
call a%set_host()
call a%sync()
return
9999 continue
info = psb_err_alloc_dealloc_
return
end subroutine psb_c_oacc_csr_mv_from_coo
end submodule psb_c_oacc_csr_mv_from_coo_impl

@ -0,0 +1,26 @@
submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_mv_from_fmt_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_csr_mv_from_fmt(a, b, info)
implicit none
class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
class(psb_c_base_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
select type(b)
type is (psb_c_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
call a%free_dev_space()
call a%psb_c_csr_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
call a%sync_dev_space()
call a%set_host()
call a%sync()
end select
end subroutine psb_c_oacc_csr_mv_from_fmt
end submodule psb_c_oacc_csr_mv_from_fmt_impl

@ -0,0 +1,28 @@
submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_reallocate_nz_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_csr_reallocate_nz(nz, a)
implicit none
integer(psb_ipk_), intent(in) :: nz
class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name='c_oacc_csr_reallocate_nz'
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
call a%psb_c_csr_sparse_mat%reallocate(nz)
call a%sync_dev_space()
call a%set_host()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_oacc_csr_reallocate_nz
end submodule psb_c_oacc_csr_reallocate_nz_impl

@ -0,0 +1,53 @@
submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_scal_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_csr_scal(d, a, info, side)
implicit none
class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
complex(psb_spk_), intent(in) :: d(:)
integer(psb_ipk_), intent(out) :: info
character, intent(in), optional :: side
integer(psb_ipk_) :: err_act
character(len=20) :: name='scal'
logical, parameter :: debug=.false.
integer(psb_ipk_) :: i, j
info = psb_success_
call psb_erractionsave(err_act)
if (a%is_host()) call a%sync()
if (present(side)) then
if (side == 'L') then
!$acc parallel loop present(a, d)
do i = 1, a%get_nrows()
do j = a%irp(i), a%irp(i+1) - 1
a%val(j) = a%val(j) * d(i)
end do
end do
else if (side == 'R') then
!$acc parallel loop present(a, d)
do i = 1, a%get_ncols()
do j = a%irp(i), a%irp(i+1) - 1
a%val(j) = a%val(j) * d(a%ja(j))
end do
end do
end if
else
!$acc parallel loop present(a, d)
do i = 1, size(a%val)
a%val(i) = a%val(i) * d(i)
end do
end if
call a%set_dev()
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_oacc_csr_scal
end submodule psb_c_oacc_csr_scal_impl

@ -0,0 +1,34 @@
submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_scals_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_csr_scals(d, a, info)
implicit none
class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
complex(psb_spk_), intent(in) :: d
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name='scal'
logical, parameter :: debug=.false.
integer(psb_ipk_) :: i
info = psb_success_
call psb_erractionsave(err_act)
if (a%is_host()) call a%sync()
!$acc parallel loop present(a)
do i = 1, size(a%val)
a%val(i) = a%val(i) * d
end do
call a%set_dev()
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_oacc_csr_scals
end submodule psb_c_oacc_csr_scals_impl

@ -0,0 +1,86 @@
submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_vect_mv_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans)
implicit none
complex(psb_spk_), intent(in) :: alpha, beta
class(psb_c_oacc_csr_sparse_mat), intent(in) :: a
class(psb_c_base_vect_type), intent(inout) :: x, y
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
integer(psb_ipk_) :: m, n
character :: trans_
logical :: device_done, tra
info = psb_success_
m = a%get_nrows()
n = a%get_ncols()
if ((n > size(x%v)) .or. (m > size(y%v))) then
write(0,*) 'ocsrmv Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_
return
end if
device_done = .false.
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
if (.not.tra) then
select type(xx => x)
class is (psb_c_vect_oacc)
select type (yy => y)
class is (psb_c_vect_oacc)
if (a%is_host()) call a%sync()
if (xx%is_host()) call xx%sync()
if (yy%is_host()) call yy%sync()
call inner_spmv(m, n, alpha, a%val, a%ja, a%irp, x%v, beta, y%v, info)
call y%set_dev()
device_done = .true.
end select
end select
end if
if (.not.device_done) then
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
call a%psb_c_csr_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans)
call y%set_host()
end if
contains
subroutine inner_spmv(m, n, alpha, val, ja, irp, x, beta, y, info)
implicit none
integer(psb_ipk_) :: m, n
complex(psb_spk_), intent(in) :: alpha, beta
complex(psb_spk_) :: val(:), x(:), y(:)
integer(psb_ipk_) :: ja(:), irp(:)
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i, j, ii, isz
complex(psb_spk_) :: tmp
integer(psb_ipk_), parameter :: vsz = 256
info = 0
!$acc parallel loop vector_length(vsz) private(isz)
do ii = 1, m, vsz
isz = min(vsz, m - ii + 1)
!$acc loop independent private(tmp)
do i = ii, ii + isz - 1
tmp = 0.0_psb_dpk_
!$acc loop seq
do j = irp(i), irp(i + 1) - 1
tmp = tmp + val(j) * x(ja(j))
end do
y(i) = alpha * tmp + beta * y(i)
end do
end do
end subroutine inner_spmv
end subroutine psb_c_oacc_csr_vect_mv
end submodule psb_c_oacc_csr_vect_mv_impl

@ -0,0 +1,35 @@
submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_allocate_mnnz_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_ell_allocate_mnnz(m, n, a, nz)
implicit none
integer(psb_ipk_), intent(in) :: m, n
class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(in), optional :: nz
integer(psb_ipk_) :: info
integer(psb_ipk_) :: err_act, nz_
character(len=20) :: name='allocate_mnnz'
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
if (present(nz)) then
nz_ = nz
else
nz_ = 10
end if
call a%psb_c_ell_sparse_mat%allocate(m, n, nz_)
call a%sync_dev_space()
call a%set_host()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_oacc_ell_allocate_mnnz
end submodule psb_c_oacc_ell_allocate_mnnz_impl

@ -0,0 +1,27 @@
submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_cp_from_coo_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_ell_cp_from_coo(a, b, info)
implicit none
class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
class(psb_c_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%free_dev_space()
call a%psb_c_ell_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
call a%sync_dev_space()
call a%set_host()
call a%sync()
return
9999 continue
info = psb_err_alloc_dealloc_
return
end subroutine psb_c_oacc_ell_cp_from_coo
end submodule psb_c_oacc_ell_cp_from_coo_impl

@ -0,0 +1,26 @@
submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_cp_from_fmt_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_ell_cp_from_fmt(a, b, info)
implicit none
class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
class(psb_c_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
select type(b)
type is (psb_c_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
call a%free_dev_space()
call a%psb_c_ell_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
call a%sync_dev_space()
call a%set_host()
call a%sync()
end select
end subroutine psb_c_oacc_ell_cp_from_fmt
end submodule psb_c_oacc_ell_cp_from_fmt_impl

@ -0,0 +1,85 @@
submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_inner_vect_sv_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_ell_inner_vect_sv(alpha, a, x, beta, y, info, trans)
implicit none
class(psb_c_oacc_ell_sparse_mat), intent(in) :: a
complex(psb_spk_), intent(in) :: alpha, beta
class(psb_c_base_vect_type), intent(inout) :: x, y
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
complex(psb_spk_), allocatable :: rx(:), ry(:)
logical :: tra
character :: trans_
integer(psb_ipk_) :: err_act
character(len=20) :: name = 'c_oacc_ell_inner_vect_sv'
logical, parameter :: debug = .false.
integer(psb_ipk_) :: i, j, nzt
call psb_get_erraction(err_act)
info = psb_success_
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info, name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
if (tra .or. (beta /= dzero)) then
call x%sync()
call y%sync()
call a%psb_c_ell_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans)
call y%set_host()
else
select type (xx => x)
type is (psb_c_vect_oacc)
select type(yy => y)
type is (psb_c_vect_oacc)
if (xx%is_host()) call xx%sync()
if (beta /= dzero) then
if (yy%is_host()) call yy%sync()
end if
nzt = a%nzt
!$acc parallel loop present(a, xx, yy)
do i = 1, size(a%val, 1)
do j = 1, nzt
yy%v(i) = alpha * a%val(i, j) * xx%v(a%ja(i, j)) + beta * yy%v(i)
end do
end do
call yy%set_dev()
class default
rx = xx%get_vect()
ry = y%get_vect()
call a%psb_c_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
call y%bld(ry)
end select
class default
rx = x%get_vect()
ry = y%get_vect()
call a%psb_c_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
call y%bld(ry)
end select
endif
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info, name, a_err = 'ell_vect_sv')
goto 9999
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_oacc_ell_inner_vect_sv
end submodule psb_c_oacc_ell_inner_vect_sv_impl

@ -0,0 +1,34 @@
submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_mold_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_ell_mold(a, b, info)
implicit none
class(psb_c_oacc_ell_sparse_mat), intent(in) :: a
class(psb_c_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name = 'ell_mold'
logical, parameter :: debug = .false.
call psb_get_erraction(err_act)
info = 0
if (allocated(b)) then
call b%free()
deallocate(b, stat=info)
end if
if (info == 0) allocate(psb_c_oacc_ell_sparse_mat :: b, stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
goto 9999
end if
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_oacc_ell_mold
end submodule psb_c_oacc_ell_mold_impl

@ -0,0 +1,27 @@
submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_mv_from_coo_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_ell_mv_from_coo(a, b, info)
implicit none
class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
class(psb_c_coo_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%free_dev_space()
call a%psb_c_ell_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
call a%sync_dev_space()
call a%set_host()
call a%sync()
return
9999 continue
info = psb_err_alloc_dealloc_
return
end subroutine psb_c_oacc_ell_mv_from_coo
end submodule psb_c_oacc_ell_mv_from_coo_impl

@ -0,0 +1,26 @@
submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_mv_from_fmt_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_ell_mv_from_fmt(a, b, info)
implicit none
class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
class(psb_c_base_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
select type(b)
type is (psb_c_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
call a%free_dev_space()
call a%psb_c_ell_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
call a%sync_dev_space()
call a%set_host()
call a%sync()
end select
end subroutine psb_c_oacc_ell_mv_from_fmt
end submodule psb_c_oacc_ell_mv_from_fmt_impl

@ -0,0 +1,28 @@
submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_reallocate_nz_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_ell_reallocate_nz(nz, a)
implicit none
integer(psb_ipk_), intent(in) :: nz
class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name='c_oacc_ell_reallocate_nz'
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
call a%psb_c_ell_sparse_mat%reallocate(nz)
call a%sync_dev_space()
call a%set_host()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_oacc_ell_reallocate_nz
end submodule psb_c_oacc_ell_reallocate_nz_impl

@ -0,0 +1,58 @@
submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_scal_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_ell_scal(d, a, info, side)
implicit none
class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
complex(psb_spk_), intent(in) :: d(:)
integer(psb_ipk_), intent(out) :: info
character, intent(in), optional :: side
integer(psb_ipk_) :: err_act
character(len=20) :: name='scal'
logical, parameter :: debug=.false.
integer(psb_ipk_) :: i, j, m, nzt
info = psb_success_
call psb_erractionsave(err_act)
if (a%is_host()) call a%sync()
m = a%get_nrows()
nzt = a%nzt
if (present(side)) then
if (side == 'L') then
!$acc parallel loop collapse(2) present(a, d)
do i = 1, m
do j = 1, nzt
a%val(i, j) = a%val(i, j) * d(i)
end do
end do
else if (side == 'R') then
!$acc parallel loop collapse(2) present(a, d)
do i = 1, m
do j = 1, nzt
a%val(i, j) = a%val(i, j) * d(a%ja(i, j))
end do
end do
end if
else
!$acc parallel loop collapse(2) present(a, d)
do i = 1, m
do j = 1, nzt
a%val(i, j) = a%val(i, j) * d(j)
end do
end do
end if
call a%set_dev()
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_oacc_ell_scal
end submodule psb_c_oacc_ell_scal_impl

@ -0,0 +1,39 @@
submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_scals_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_ell_scals(d, a, info)
implicit none
class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
complex(psb_spk_), intent(in) :: d
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name='scal'
logical, parameter :: debug=.false.
integer(psb_ipk_) :: i, j, nzt, m
info = psb_success_
call psb_erractionsave(err_act)
if (a%is_host()) call a%sync()
m = a%get_nrows()
nzt = a%nzt
!$acc parallel loop collapse(2) present(a)
do i = 1, m
do j = 1, nzt
a%val(i, j) = a%val(i, j) * d
end do
end do
call a%set_dev()
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_oacc_ell_scals
end submodule psb_c_oacc_ell_scals_impl

@ -0,0 +1,90 @@
submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_vect_mv_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans)
implicit none
complex(psb_spk_), intent(in) :: alpha, beta
class(psb_c_oacc_ell_sparse_mat), intent(in) :: a
class(psb_c_base_vect_type), intent(inout) :: x, y
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
integer(psb_ipk_) :: m, n, nzt, nc
character :: trans_
logical :: device_done, tra
info = psb_success_
m = a%get_nrows()
n = a%get_ncols()
nzt = a%nzt
nc = size(a%ja,2)
if ((n > size(x%v)) .or. (m > size(y%v))) then
write(0,*) 'oellmv Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_
return
end if
device_done = .false.
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
if (.not.tra) then
select type(xx => x)
class is (psb_c_vect_oacc)
select type (yy => y)
class is (psb_c_vect_oacc)
if (a%is_host()) call a%sync()
if (xx%is_host()) call xx%sync()
if (yy%is_host()) call yy%sync()
call inner_spmv(m, n, nc, alpha, a%val, a%ja, x%v, beta, y%v, info)
call y%set_dev()
device_done = .true.
end select
end select
end if
if (.not.device_done) then
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
call a%psb_c_ell_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans)
call y%set_host()
end if
contains
subroutine inner_spmv(m, n, nc, alpha, val, ja, x, beta, y, info)
implicit none
integer(psb_ipk_) :: m, n, nc
complex(psb_spk_), intent(in) :: alpha, beta
complex(psb_spk_) :: val(:,:), x(:), y(:)
integer(psb_ipk_) :: ja(:,:)
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i, j, ii, isz
complex(psb_spk_) :: tmp
integer(psb_ipk_), parameter :: vsz = 256
info = 0
!$acc parallel loop vector_length(vsz) private(isz)
do ii = 1, m, vsz
isz = min(vsz, m - ii + 1)
!$acc loop independent private(tmp)
do i = ii, ii + isz - 1
tmp = 0.0_psb_dpk_
!$acc loop seq
do j = 1, nc
if (ja(i,j) > 0) then
tmp = tmp + val(i,j) * x(ja(i,j))
end if
end do
y(i) = alpha * tmp + beta * y(i)
end do
end do
end subroutine inner_spmv
end subroutine psb_c_oacc_ell_vect_mv
end submodule psb_c_oacc_ell_vect_mv_impl

@ -0,0 +1,36 @@
submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_allocate_mnnz_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_hll_allocate_mnnz(m, n, a, nz)
implicit none
integer(psb_ipk_), intent(in) :: m, n
class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(in), optional :: nz
integer(psb_ipk_) :: info
integer(psb_ipk_) :: err_act, nz_
character(len=20) :: name='allocate_mnnz'
logical, parameter :: debug=.false.
integer(psb_ipk_) :: hksz, nhacks
call psb_erractionsave(err_act)
info = psb_success_
if (present(nz)) then
nz_ = nz
else
nz_ = 10
end if
call a%psb_c_hll_sparse_mat%allocate(m, n, nz_)
call a%sync_dev_space()
call a%set_host()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_oacc_hll_allocate_mnnz
end submodule psb_c_oacc_hll_allocate_mnnz_impl

@ -0,0 +1,27 @@
submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_cp_from_coo_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_hll_cp_from_coo(a, b, info)
implicit none
class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
class(psb_c_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%free_dev_space()
call a%psb_c_hll_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
call a%sync_dev_space()
call a%set_host()
call a%sync()
return
9999 continue
info = psb_err_alloc_dealloc_
return
end subroutine psb_c_oacc_hll_cp_from_coo
end submodule psb_c_oacc_hll_cp_from_coo_impl

@ -0,0 +1,26 @@
submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_cp_from_fmt_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_hll_cp_from_fmt(a, b, info)
implicit none
class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
class(psb_c_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
select type(b)
type is (psb_c_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
call a%free_dev_space()
call a%psb_c_hll_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
call a%sync_dev_space()
call a%set_host()
call a%sync()
end select
end subroutine psb_c_oacc_hll_cp_from_fmt
end submodule psb_c_oacc_hll_cp_from_fmt_impl

@ -0,0 +1,86 @@
submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_inner_vect_sv_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans)
implicit none
class(psb_c_oacc_hll_sparse_mat), intent(in) :: a
complex(psb_spk_), intent(in) :: alpha, beta
class(psb_c_base_vect_type), intent(inout) :: x, y
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
complex(psb_spk_), allocatable :: rx(:), ry(:)
logical :: tra
character :: trans_
integer(psb_ipk_) :: err_act
character(len=20) :: name = 'c_oacc_hll_inner_vect_sv'
logical, parameter :: debug = .false.
integer(psb_ipk_) :: i, j, nhacks, hksz
call psb_get_erraction(err_act)
info = psb_success_
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info, name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
if (tra .or. (beta /= dzero)) then
call x%sync()
call y%sync()
call a%psb_c_hll_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans)
call y%set_host()
else
select type (xx => x)
type is (psb_c_vect_oacc)
select type(yy => y)
type is (psb_c_vect_oacc)
if (xx%is_host()) call xx%sync()
if (beta /= dzero) then
if (yy%is_host()) call yy%sync()
end if
nhacks = size(a%hkoffs) - 1
hksz = a%hksz
!$acc parallel loop present(a, xx, yy)
do i = 1, nhacks
do j = a%hkoffs(i), a%hkoffs(i+1) - 1
yy%v(a%irn(j)) = alpha * a%val(j) * xx%v(a%ja(j)) + beta * yy%v(a%irn(j))
end do
end do
call yy%set_dev()
class default
rx = xx%get_vect()
ry = y%get_vect()
call a%psb_c_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
call y%bld(ry)
end select
class default
rx = x%get_vect()
ry = y%get_vect()
call a%psb_c_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
call y%bld(ry)
end select
endif
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info, name, a_err = 'hll_vect_sv')
goto 9999
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_oacc_hll_inner_vect_sv
end submodule psb_c_oacc_hll_inner_vect_sv_impl

@ -0,0 +1,34 @@
submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_mold_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_hll_mold(a, b, info)
implicit none
class(psb_c_oacc_hll_sparse_mat), intent(in) :: a
class(psb_c_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name = 'hll_mold'
logical, parameter :: debug = .false.
call psb_get_erraction(err_act)
info = 0
if (allocated(b)) then
call b%free()
deallocate(b, stat=info)
end if
if (info == 0) allocate(psb_c_oacc_hll_sparse_mat :: b, stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
goto 9999
end if
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_oacc_hll_mold
end submodule psb_c_oacc_hll_mold_impl

@ -0,0 +1,27 @@
submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_mv_from_coo_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_hll_mv_from_coo(a, b, info)
implicit none
class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
class(psb_c_coo_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%free_dev_space()
call a%psb_c_hll_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
call a%sync_dev_space()
call a%set_host()
call a%sync()
return
9999 continue
info = psb_err_alloc_dealloc_
return
end subroutine psb_c_oacc_hll_mv_from_coo
end submodule psb_c_oacc_hll_mv_from_coo_impl

@ -0,0 +1,25 @@
submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_mv_from_fmt_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_hll_mv_from_fmt(a, b, info)
implicit none
class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
class(psb_c_base_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
select type(b)
type is (psb_c_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
call a%free_dev_space()
call a%psb_c_hll_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
call a%sync_dev_space()
call a%set_host()
call a%sync()
end select
end subroutine psb_c_oacc_hll_mv_from_fmt
end submodule psb_c_oacc_hll_mv_from_fmt_impl

@ -0,0 +1,29 @@
submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_reallocate_nz_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_hll_reallocate_nz(nz, a)
implicit none
integer(psb_ipk_), intent(in) :: nz
class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name='c_oacc_hll_reallocate_nz'
logical, parameter :: debug=.false.
integer(psb_ipk_) :: hksz, nhacks
call psb_erractionsave(err_act)
info = psb_success_
call a%psb_c_hll_sparse_mat%reallocate(nz)
call a%sync_dev_space()
call a%set_host()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_oacc_hll_reallocate_nz
end submodule psb_c_oacc_hll_reallocate_nz_impl

@ -0,0 +1,62 @@
submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_scal_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_hll_scal(d, a, info, side)
implicit none
class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
complex(psb_spk_), intent(in) :: d(:)
integer(psb_ipk_), intent(out) :: info
character, intent(in), optional :: side
integer(psb_ipk_) :: err_act
character(len=20) :: name = 'scal'
integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks
info = psb_success_
call psb_erractionsave(err_act)
if (a%is_host()) call a%sync()
hksz = a%hksz
nhacks = (a%get_nrows() + hksz - 1) / hksz
nzt = a%nzt
if (present(side)) then
if (side == 'L') then
! $ a parallel loop collapse(2) present(a, d)
!$acc parallel loop present(a, d)
do i = 1, nhacks
do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
k = (j - a%hkoffs(i)) / nzt + (i - 1) * hksz + 1
a%val(j) = a%val(j) * d(k)
end do
end do
else if (side == 'R') then
! $ a parallel loop collapse(2) present(a, d)
!$acc parallel loop present(a, d)
do i = 1, nhacks
do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
a%val(j) = a%val(j) * d(a%ja(j))
end do
end do
end if
else
! $ a parallel loop collapse(2) present(a, d)
!$acc parallel loop present(a, d)
do i = 1, nhacks
do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
a%val(j) = a%val(j) * d(j - a%hkoffs(i) + 1)
end do
end do
end if
call a%set_dev()
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_oacc_hll_scal
end submodule psb_c_oacc_hll_scal_impl

@ -0,0 +1,40 @@
submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_scals_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_hll_scals(d, a, info)
implicit none
class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
complex(psb_spk_), intent(in) :: d
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name = 'scal'
integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks
info = psb_success_
call psb_erractionsave(err_act)
if (a%is_host()) call a%sync()
hksz = a%hksz
nhacks = (a%get_nrows() + hksz - 1) / hksz
nzt = a%nzt
! $ a parallel loop collapse(2) present(a)
!$acc parallel loop present(a)
do i = 1, nhacks
do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
a%val(j) = a%val(j) * d
end do
end do
call a%set_dev()
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_c_oacc_hll_scals
end submodule psb_c_oacc_hll_scals_impl

@ -0,0 +1,90 @@
submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_vect_mv_impl
use psb_base_mod
contains
module subroutine psb_c_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans)
implicit none
complex(psb_spk_), intent(in) :: alpha, beta
class(psb_c_oacc_hll_sparse_mat), intent(in) :: a
class(psb_c_base_vect_type), intent(inout) :: x, y
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
integer(psb_ipk_) :: m, n, nhacks, hksz
character :: trans_
logical :: device_done, tra
info = psb_success_
m = a%get_nrows()
n = a%get_ncols()
nhacks = size(a%hkoffs) - 1
hksz = a%hksz
if ((n > size(x%v)) .or. (m > size(y%v))) then
write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_
return
end if
device_done = .false.
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
if (.not.tra) then
select type(xx => x)
class is (psb_c_vect_oacc)
select type (yy => y)
class is (psb_c_vect_oacc)
if (a%is_host()) call a%sync()
if (xx%is_host()) call xx%sync()
if (yy%is_host()) call yy%sync()
call inner_spmv(m, nhacks, hksz, alpha, a%val, a%ja, a%hkoffs, x%v, beta, y%v, info)
call y%set_dev()
device_done = .true.
end select
end select
end if
if (.not.device_done) then
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
call a%psb_c_hll_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans)
call y%set_host()
end if
contains
subroutine inner_spmv(m, nhacks, hksz, alpha, val, ja, hkoffs, x, beta, y, info)
implicit none
integer(psb_ipk_) :: m, nhacks, hksz
complex(psb_spk_), intent(in) :: alpha, beta
complex(psb_spk_) :: val(:), x(:), y(:)
integer(psb_ipk_) :: ja(:), hkoffs(:)
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i, j, idx, k, ipnt,ir,nr,nlc,isz,ii
complex(psb_spk_) :: tmp
info = 0
!$acc parallel loop private(nlc, isz,ir,nr)
do i = 1, nhacks
isz = hkoffs(i + 1) - hkoffs(i)
nlc = isz/hksz
ir = (i-1)*hksz
nr = min(hksz,m-ir)
!$acc loop independent private(tmp,ii,ipnt)
do ii = 1, nr
ipnt = hkoffs(i) + ii
tmp = czero
!$acc loop seq
do j = 1, nlc
tmp = tmp + val(ipnt) * x(ja(ipnt))
ipnt = ipnt + hksz
end do
y(ii+ir) = alpha * tmp + beta * y(ii+ir)
end do
end do
end subroutine inner_spmv
end subroutine psb_c_oacc_hll_vect_mv
end submodule psb_c_oacc_hll_vect_mv_impl

@ -0,0 +1,46 @@
subroutine psb_c_oacc_mlt_v(x, y, info)
use psb_c_oacc_vect_mod, psb_protect_name => psb_c_oacc_mlt_v
implicit none
class(psb_c_base_vect_type), intent(inout) :: x
class(psb_c_vect_oacc), intent(inout) :: y
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i, n
info = 0
n = min(x%get_nrows(), y%get_nrows())
info = 0
n = min(x%get_nrows(), y%get_nrows())
select type(xx => x)
class is (psb_c_vect_oacc)
if (y%is_host()) call y%sync()
if (xx%is_host()) call xx%sync()
call c_inner_oacc_mlt_v(n,xx%v, y%v)
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ y%v(i) = y%v(i) * xx%v(i)
!!$ end do
call y%set_dev()
class default
if (xx%is_dev()) call xx%sync()
if (y%is_dev()) call y%sync()
do i = 1, n
y%v(i) = y%v(i) * xx%v(i)
end do
call y%set_host()
end select
contains
subroutine c_inner_oacc_mlt_v(n,x, y)
implicit none
integer(psb_ipk_), intent(in) :: n
complex(psb_spk_), intent(inout) :: x(:), y(:)
integer(psb_ipk_) :: i
!$acc parallel loop present(x,y)
do i = 1, n
y(i) = (x(i)) * (y(i))
end do
end subroutine c_inner_oacc_mlt_v
end subroutine psb_c_oacc_mlt_v

@ -0,0 +1,91 @@
subroutine psb_c_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
use psb_c_oacc_vect_mod, psb_protect_name => psb_c_oacc_mlt_v_2
use psb_string_mod
implicit none
complex(psb_spk_), intent(in) :: alpha, beta
class(psb_c_base_vect_type), intent(inout) :: x
class(psb_c_base_vect_type), intent(inout) :: y
class(psb_c_vect_oacc), intent(inout) :: z
integer(psb_ipk_), intent(out) :: info
character(len=1), intent(in), optional :: conjgx, conjgy
integer(psb_ipk_) :: i, n
logical :: conjgx_, conjgy_, device_done
conjgx_ = .false.
conjgy_ = .false.
device_done = .false.
if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C')
if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C')
n = min(x%get_nrows(), y%get_nrows(), z%get_nrows())
info = 0
select type(xx => x)
class is (psb_c_vect_oacc)
select type (yy => y)
class is (psb_c_vect_oacc)
if (xx%is_host()) call xx%sync()
if (yy%is_host()) call yy%sync()
if ((beta /= czero) .and. (z%is_host())) call z%sync()
call c_inner_oacc_mlt_v_2(n,alpha, xx%v, yy%v, beta, z%v, info, conjgx_, conjgy_)
call z%set_dev()
device_done = .true.
end select
end select
if (.not.device_done) then
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
if ((beta /= czero) .and. (z%is_dev())) call z%sync()
if (conjgx_.and.conjgy_) then
do i = 1, n
z%v(i) = alpha * conjg(x%v(i)) * conjg(y%v(i)) + beta * z%v(i)
end do
else if (conjgx_.and.(.not.conjgy_)) then
do i = 1, n
z%v(i) = alpha * conjg(x%v(i)) * (y%v(i)) + beta * z%v(i)
end do
else if ((.not.conjgx_).and.(conjgy_)) then
do i = 1, n
z%v(i) = alpha * (x%v(i)) * conjg(y%v(i)) + beta * z%v(i)
end do
else
do i = 1, n
z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i)
end do
end if
call z%set_host()
end if
contains
subroutine c_inner_oacc_mlt_v_2(n,alpha, x, y, beta, z, info, conjgx, conjgy)
implicit none
integer(psb_ipk_), intent(in) :: n
complex(psb_spk_), intent(in) :: alpha, beta
complex(psb_spk_), intent(inout) :: x(:), y(:), z(:)
integer(psb_ipk_), intent(out) :: info
logical, intent(in) :: conjgx, conjgy
integer(psb_ipk_) :: i
if (conjgx.and.conjgy) then
!$acc parallel loop present(x,y,z)
do i = 1, n
z(i) = alpha * conjg(x(i)) * conjg(y(i)) + beta * z(i)
end do
else if (conjgx.and.(.not.conjgy)) then
!$acc parallel loop present(x,y,z)
do i = 1, n
z(i) = alpha * conjg(x(i)) * (y(i)) + beta * z(i)
end do
else if ((.not.conjgx).and.(conjgy)) then
!$acc parallel loop present(x,y,z)
do i = 1, n
z(i) = alpha * (x(i)) * conjg(y(i)) + beta * z(i)
end do
else
!$acc parallel loop present(x,y,z)
do i = 1, n
z(i) = alpha * (x(i)) * (y(i)) + beta * z(i)
end do
end if
end subroutine c_inner_oacc_mlt_v_2
end subroutine psb_c_oacc_mlt_v_2

@ -0,0 +1,29 @@
submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_allocate_mnnz_impl
use psb_base_mod
contains
module subroutine psb_d_oacc_csr_allocate_mnnz(m, n, a, nz)
implicit none
integer(psb_ipk_), intent(in) :: m, n
class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(in), optional :: nz
integer(psb_ipk_) :: info
integer(psb_ipk_) :: err_act, nz_
character(len=20) :: name='allocate_mnz'
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
call a%psb_d_csr_sparse_mat%allocate(m, n, nz)
call a%set_host()
call a%sync_dev_space()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_d_oacc_csr_allocate_mnnz
end submodule psb_d_oacc_csr_allocate_mnnz_impl

@ -0,0 +1,27 @@
submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_cp_from_coo_impl
use psb_base_mod
contains
module subroutine psb_d_oacc_csr_cp_from_coo(a, b, info)
implicit none
class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
class(psb_d_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%free_dev_space()
call a%psb_d_csr_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
call a%sync_dev_space()
call a%set_host()
call a%sync()
return
9999 continue
info = psb_err_alloc_dealloc_
return
end subroutine psb_d_oacc_csr_cp_from_coo
end submodule psb_d_oacc_csr_cp_from_coo_impl

@ -0,0 +1,26 @@
submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_cp_from_fmt_impl
use psb_base_mod
contains
module subroutine psb_d_oacc_csr_cp_from_fmt(a, b, info)
implicit none
class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
class(psb_d_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
select type(b)
type is (psb_d_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
call a%free_dev_space()
call a%psb_d_csr_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
call a%sync_dev_space()
call a%set_host()
call a%sync()
end select
end subroutine psb_d_oacc_csr_cp_from_fmt
end submodule psb_d_oacc_csr_cp_from_fmt_impl

@ -0,0 +1,83 @@
submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_inner_vect_sv_impl
use psb_base_mod
contains
module subroutine psb_d_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans)
implicit none
class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
real(psb_dpk_), intent(in) :: alpha, beta
class(psb_d_base_vect_type), intent(inout) :: x, y
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
real(psb_dpk_), allocatable :: rx(:), ry(:)
logical :: tra
character :: trans_
integer(psb_ipk_) :: err_act
character(len=20) :: name = 'd_oacc_csr_inner_vect_sv'
logical, parameter :: debug = .false.
integer(psb_ipk_) :: i
call psb_get_erraction(err_act)
info = psb_success_
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info, name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
if (tra .or. (beta /= dzero)) then
call x%sync()
call y%sync()
call a%psb_d_csr_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans)
call y%set_host()
else
select type (xx => x)
type is (psb_d_vect_oacc)
select type(yy => y)
type is (psb_d_vect_oacc)
if (xx%is_host()) call xx%sync()
if (beta /= dzero) then
if (yy%is_host()) call yy%sync()
end if
!$acc parallel loop present(a, xx, yy)
do i = 1, size(a%val)
yy%v(i) = alpha * a%val(i) * xx%v(a%ja(i)) + beta * yy%v(i)
end do
call yy%set_dev()
class default
rx = xx%get_vect()
ry = y%get_vect()
call a%psb_d_csr_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
call y%bld(ry)
end select
class default
rx = x%get_vect()
ry = y%get_vect()
call a%psb_d_csr_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
call y%bld(ry)
end select
endif
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info, name, a_err = 'csrg_vect_sv')
goto 9999
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_d_oacc_csr_inner_vect_sv
end submodule psb_d_oacc_csr_inner_vect_sv_impl

@ -0,0 +1,35 @@
submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_mold_impl
use psb_base_mod
contains
module subroutine psb_d_oacc_csr_mold(a, b, info)
implicit none
class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
class(psb_d_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name='csr_mold'
logical, parameter :: debug=.false.
call psb_get_erraction(err_act)
info = 0
if (allocated(b)) then
call b%free()
deallocate(b, stat=info)
end if
if (info == 0) allocate(psb_d_oacc_csr_sparse_mat :: b, stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
goto 9999
end if
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_d_oacc_csr_mold
end submodule psb_d_oacc_csr_mold_impl

@ -0,0 +1,27 @@
submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_mv_from_coo_impl
use psb_base_mod
contains
module subroutine psb_d_oacc_csr_mv_from_coo(a, b, info)
implicit none
class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
class(psb_d_coo_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%free_dev_space()
call a%psb_d_csr_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
call a%sync_dev_space()
call a%set_host()
call a%sync()
return
9999 continue
info = psb_err_alloc_dealloc_
return
end subroutine psb_d_oacc_csr_mv_from_coo
end submodule psb_d_oacc_csr_mv_from_coo_impl

@ -0,0 +1,26 @@
submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_mv_from_fmt_impl
use psb_base_mod
contains
module subroutine psb_d_oacc_csr_mv_from_fmt(a, b, info)
implicit none
class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
class(psb_d_base_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
select type(b)
type is (psb_d_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
call a%free_dev_space()
call a%psb_d_csr_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
call a%sync_dev_space()
call a%set_host()
call a%sync()
end select
end subroutine psb_d_oacc_csr_mv_from_fmt
end submodule psb_d_oacc_csr_mv_from_fmt_impl

@ -0,0 +1,28 @@
submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_reallocate_nz_impl
use psb_base_mod
contains
module subroutine psb_d_oacc_csr_reallocate_nz(nz, a)
implicit none
integer(psb_ipk_), intent(in) :: nz
class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name='d_oacc_csr_reallocate_nz'
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
call a%psb_d_csr_sparse_mat%reallocate(nz)
call a%sync_dev_space()
call a%set_host()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_d_oacc_csr_reallocate_nz
end submodule psb_d_oacc_csr_reallocate_nz_impl

@ -0,0 +1,53 @@
submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_scal_impl
use psb_base_mod
contains
module subroutine psb_d_oacc_csr_scal(d, a, info, side)
implicit none
class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
real(psb_dpk_), intent(in) :: d(:)
integer(psb_ipk_), intent(out) :: info
character, intent(in), optional :: side
integer(psb_ipk_) :: err_act
character(len=20) :: name='scal'
logical, parameter :: debug=.false.
integer(psb_ipk_) :: i, j
info = psb_success_
call psb_erractionsave(err_act)
if (a%is_host()) call a%sync()
if (present(side)) then
if (side == 'L') then
!$acc parallel loop present(a, d)
do i = 1, a%get_nrows()
do j = a%irp(i), a%irp(i+1) - 1
a%val(j) = a%val(j) * d(i)
end do
end do
else if (side == 'R') then
!$acc parallel loop present(a, d)
do i = 1, a%get_ncols()
do j = a%irp(i), a%irp(i+1) - 1
a%val(j) = a%val(j) * d(a%ja(j))
end do
end do
end if
else
!$acc parallel loop present(a, d)
do i = 1, size(a%val)
a%val(i) = a%val(i) * d(i)
end do
end if
call a%set_dev()
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_d_oacc_csr_scal
end submodule psb_d_oacc_csr_scal_impl

@ -0,0 +1,34 @@
submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_scals_impl
use psb_base_mod
contains
module subroutine psb_d_oacc_csr_scals(d, a, info)
implicit none
class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
real(psb_dpk_), intent(in) :: d
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name='scal'
logical, parameter :: debug=.false.
integer(psb_ipk_) :: i
info = psb_success_
call psb_erractionsave(err_act)
if (a%is_host()) call a%sync()
!$acc parallel loop present(a)
do i = 1, size(a%val)
a%val(i) = a%val(i) * d
end do
call a%set_dev()
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_d_oacc_csr_scals
end submodule psb_d_oacc_csr_scals_impl

@ -0,0 +1,86 @@
submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_vect_mv_impl
use psb_base_mod
contains
module subroutine psb_d_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans)
implicit none
real(psb_dpk_), intent(in) :: alpha, beta
class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
class(psb_d_base_vect_type), intent(inout) :: x, y
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
integer(psb_ipk_) :: m, n
character :: trans_
logical :: device_done, tra
info = psb_success_
m = a%get_nrows()
n = a%get_ncols()
if ((n > size(x%v)) .or. (m > size(y%v))) then
write(0,*) 'ocsrmv Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_
return
end if
device_done = .false.
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
if (.not.tra) then
select type(xx => x)
class is (psb_d_vect_oacc)
select type (yy => y)
class is (psb_d_vect_oacc)
if (a%is_host()) call a%sync()
if (xx%is_host()) call xx%sync()
if (yy%is_host()) call yy%sync()
call inner_spmv(m, n, alpha, a%val, a%ja, a%irp, x%v, beta, y%v, info)
call y%set_dev()
device_done = .true.
end select
end select
end if
if (.not.device_done) then
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
call a%psb_d_csr_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans)
call y%set_host()
end if
contains
subroutine inner_spmv(m, n, alpha, val, ja, irp, x, beta, y, info)
implicit none
integer(psb_ipk_) :: m, n
real(psb_dpk_), intent(in) :: alpha, beta
real(psb_dpk_) :: val(:), x(:), y(:)
integer(psb_ipk_) :: ja(:), irp(:)
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i, j, ii, isz
real(psb_dpk_) :: tmp
integer(psb_ipk_), parameter :: vsz = 256
info = 0
!$acc parallel loop vector_length(vsz) private(isz)
do ii = 1, m, vsz
isz = min(vsz, m - ii + 1)
!$acc loop independent private(tmp)
do i = ii, ii + isz - 1
tmp = 0.0_psb_dpk_
!$acc loop seq
do j = irp(i), irp(i + 1) - 1
tmp = tmp + val(j) * x(ja(j))
end do
y(i) = alpha * tmp + beta * y(i)
end do
end do
end subroutine inner_spmv
end subroutine psb_d_oacc_csr_vect_mv
end submodule psb_d_oacc_csr_vect_mv_impl

@ -0,0 +1,35 @@
submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_allocate_mnnz_impl
use psb_base_mod
contains
module subroutine psb_d_oacc_ell_allocate_mnnz(m, n, a, nz)
implicit none
integer(psb_ipk_), intent(in) :: m, n
class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(in), optional :: nz
integer(psb_ipk_) :: info
integer(psb_ipk_) :: err_act, nz_
character(len=20) :: name='allocate_mnnz'
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
if (present(nz)) then
nz_ = nz
else
nz_ = 10
end if
call a%psb_d_ell_sparse_mat%allocate(m, n, nz_)
call a%sync_dev_space()
call a%set_host()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_d_oacc_ell_allocate_mnnz
end submodule psb_d_oacc_ell_allocate_mnnz_impl

@ -0,0 +1,27 @@
submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_cp_from_coo_impl
use psb_base_mod
contains
module subroutine psb_d_oacc_ell_cp_from_coo(a, b, info)
implicit none
class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
class(psb_d_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%free_dev_space()
call a%psb_d_ell_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
call a%sync_dev_space()
call a%set_host()
call a%sync()
return
9999 continue
info = psb_err_alloc_dealloc_
return
end subroutine psb_d_oacc_ell_cp_from_coo
end submodule psb_d_oacc_ell_cp_from_coo_impl

@ -0,0 +1,26 @@
submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_cp_from_fmt_impl
use psb_base_mod
contains
module subroutine psb_d_oacc_ell_cp_from_fmt(a, b, info)
implicit none
class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
class(psb_d_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
select type(b)
type is (psb_d_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
call a%free_dev_space()
call a%psb_d_ell_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
call a%sync_dev_space()
call a%set_host()
call a%sync()
end select
end subroutine psb_d_oacc_ell_cp_from_fmt
end submodule psb_d_oacc_ell_cp_from_fmt_impl

@ -0,0 +1,85 @@
submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_inner_vect_sv_impl
use psb_base_mod
contains
module subroutine psb_d_oacc_ell_inner_vect_sv(alpha, a, x, beta, y, info, trans)
implicit none
class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
real(psb_dpk_), intent(in) :: alpha, beta
class(psb_d_base_vect_type), intent(inout) :: x, y
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
real(psb_dpk_), allocatable :: rx(:), ry(:)
logical :: tra
character :: trans_
integer(psb_ipk_) :: err_act
character(len=20) :: name = 'd_oacc_ell_inner_vect_sv'
logical, parameter :: debug = .false.
integer(psb_ipk_) :: i, j, nzt
call psb_get_erraction(err_act)
info = psb_success_
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info, name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
if (tra .or. (beta /= dzero)) then
call x%sync()
call y%sync()
call a%psb_d_ell_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans)
call y%set_host()
else
select type (xx => x)
type is (psb_d_vect_oacc)
select type(yy => y)
type is (psb_d_vect_oacc)
if (xx%is_host()) call xx%sync()
if (beta /= dzero) then
if (yy%is_host()) call yy%sync()
end if
nzt = a%nzt
!$acc parallel loop present(a, xx, yy)
do i = 1, size(a%val, 1)
do j = 1, nzt
yy%v(i) = alpha * a%val(i, j) * xx%v(a%ja(i, j)) + beta * yy%v(i)
end do
end do
call yy%set_dev()
class default
rx = xx%get_vect()
ry = y%get_vect()
call a%psb_d_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
call y%bld(ry)
end select
class default
rx = x%get_vect()
ry = y%get_vect()
call a%psb_d_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
call y%bld(ry)
end select
endif
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info, name, a_err = 'ell_vect_sv')
goto 9999
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_d_oacc_ell_inner_vect_sv
end submodule psb_d_oacc_ell_inner_vect_sv_impl

@ -0,0 +1,34 @@
submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_mold_impl
use psb_base_mod
contains
module subroutine psb_d_oacc_ell_mold(a, b, info)
implicit none
class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
class(psb_d_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name = 'ell_mold'
logical, parameter :: debug = .false.
call psb_get_erraction(err_act)
info = 0
if (allocated(b)) then
call b%free()
deallocate(b, stat=info)
end if
if (info == 0) allocate(psb_d_oacc_ell_sparse_mat :: b, stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
goto 9999
end if
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_d_oacc_ell_mold
end submodule psb_d_oacc_ell_mold_impl

@ -0,0 +1,27 @@
submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_mv_from_coo_impl
use psb_base_mod
contains
module subroutine psb_d_oacc_ell_mv_from_coo(a, b, info)
implicit none
class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
class(psb_d_coo_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%free_dev_space()
call a%psb_d_ell_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
call a%sync_dev_space()
call a%set_host()
call a%sync()
return
9999 continue
info = psb_err_alloc_dealloc_
return
end subroutine psb_d_oacc_ell_mv_from_coo
end submodule psb_d_oacc_ell_mv_from_coo_impl

@ -0,0 +1,26 @@
submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_mv_from_fmt_impl
use psb_base_mod
contains
module subroutine psb_d_oacc_ell_mv_from_fmt(a, b, info)
implicit none
class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
class(psb_d_base_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
select type(b)
type is (psb_d_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
call a%free_dev_space()
call a%psb_d_ell_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
call a%sync_dev_space()
call a%set_host()
call a%sync()
end select
end subroutine psb_d_oacc_ell_mv_from_fmt
end submodule psb_d_oacc_ell_mv_from_fmt_impl

@ -0,0 +1,28 @@
submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_reallocate_nz_impl
use psb_base_mod
contains
module subroutine psb_d_oacc_ell_reallocate_nz(nz, a)
implicit none
integer(psb_ipk_), intent(in) :: nz
class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name='d_oacc_ell_reallocate_nz'
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
call a%psb_d_ell_sparse_mat%reallocate(nz)
call a%sync_dev_space()
call a%set_host()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_d_oacc_ell_reallocate_nz
end submodule psb_d_oacc_ell_reallocate_nz_impl

@ -0,0 +1,58 @@
submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_scal_impl
use psb_base_mod
contains
module subroutine psb_d_oacc_ell_scal(d, a, info, side)
implicit none
class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
real(psb_dpk_), intent(in) :: d(:)
integer(psb_ipk_), intent(out) :: info
character, intent(in), optional :: side
integer(psb_ipk_) :: err_act
character(len=20) :: name='scal'
logical, parameter :: debug=.false.
integer(psb_ipk_) :: i, j, m, nzt
info = psb_success_
call psb_erractionsave(err_act)
if (a%is_host()) call a%sync()
m = a%get_nrows()
nzt = a%nzt
if (present(side)) then
if (side == 'L') then
!$acc parallel loop collapse(2) present(a, d)
do i = 1, m
do j = 1, nzt
a%val(i, j) = a%val(i, j) * d(i)
end do
end do
else if (side == 'R') then
!$acc parallel loop collapse(2) present(a, d)
do i = 1, m
do j = 1, nzt
a%val(i, j) = a%val(i, j) * d(a%ja(i, j))
end do
end do
end if
else
!$acc parallel loop collapse(2) present(a, d)
do i = 1, m
do j = 1, nzt
a%val(i, j) = a%val(i, j) * d(j)
end do
end do
end if
call a%set_dev()
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_d_oacc_ell_scal
end submodule psb_d_oacc_ell_scal_impl

@ -0,0 +1,39 @@
submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_scals_impl
use psb_base_mod
contains
module subroutine psb_d_oacc_ell_scals(d, a, info)
implicit none
class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
real(psb_dpk_), intent(in) :: d
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name='scal'
logical, parameter :: debug=.false.
integer(psb_ipk_) :: i, j, nzt, m
info = psb_success_
call psb_erractionsave(err_act)
if (a%is_host()) call a%sync()
m = a%get_nrows()
nzt = a%nzt
!$acc parallel loop collapse(2) present(a)
do i = 1, m
do j = 1, nzt
a%val(i, j) = a%val(i, j) * d
end do
end do
call a%set_dev()
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_d_oacc_ell_scals
end submodule psb_d_oacc_ell_scals_impl

@ -0,0 +1,90 @@
submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_vect_mv_impl
use psb_base_mod
contains
module subroutine psb_d_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans)
implicit none
real(psb_dpk_), intent(in) :: alpha, beta
class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
class(psb_d_base_vect_type), intent(inout) :: x, y
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
integer(psb_ipk_) :: m, n, nzt, nc
character :: trans_
logical :: device_done, tra
info = psb_success_
m = a%get_nrows()
n = a%get_ncols()
nzt = a%nzt
nc = size(a%ja,2)
if ((n > size(x%v)) .or. (m > size(y%v))) then
write(0,*) 'oellmv Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_
return
end if
device_done = .false.
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
if (.not.tra) then
select type(xx => x)
class is (psb_d_vect_oacc)
select type (yy => y)
class is (psb_d_vect_oacc)
if (a%is_host()) call a%sync()
if (xx%is_host()) call xx%sync()
if (yy%is_host()) call yy%sync()
call inner_spmv(m, n, nc, alpha, a%val, a%ja, x%v, beta, y%v, info)
call y%set_dev()
device_done = .true.
end select
end select
end if
if (.not.device_done) then
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
call a%psb_d_ell_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans)
call y%set_host()
end if
contains
subroutine inner_spmv(m, n, nc, alpha, val, ja, x, beta, y, info)
implicit none
integer(psb_ipk_) :: m, n, nc
real(psb_dpk_), intent(in) :: alpha, beta
real(psb_dpk_) :: val(:,:), x(:), y(:)
integer(psb_ipk_) :: ja(:,:)
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i, j, ii, isz
real(psb_dpk_) :: tmp
integer(psb_ipk_), parameter :: vsz = 256
info = 0
!$acc parallel loop vector_length(vsz) private(isz)
do ii = 1, m, vsz
isz = min(vsz, m - ii + 1)
!$acc loop independent private(tmp)
do i = ii, ii + isz - 1
tmp = 0.0_psb_dpk_
!$acc loop seq
do j = 1, nc
if (ja(i,j) > 0) then
tmp = tmp + val(i,j) * x(ja(i,j))
end if
end do
y(i) = alpha * tmp + beta * y(i)
end do
end do
end subroutine inner_spmv
end subroutine psb_d_oacc_ell_vect_mv
end submodule psb_d_oacc_ell_vect_mv_impl

@ -0,0 +1,36 @@
submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_allocate_mnnz_impl
use psb_base_mod
contains
module subroutine psb_d_oacc_hll_allocate_mnnz(m, n, a, nz)
implicit none
integer(psb_ipk_), intent(in) :: m, n
class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(in), optional :: nz
integer(psb_ipk_) :: info
integer(psb_ipk_) :: err_act, nz_
character(len=20) :: name='allocate_mnnz'
logical, parameter :: debug=.false.
integer(psb_ipk_) :: hksz, nhacks
call psb_erractionsave(err_act)
info = psb_success_
if (present(nz)) then
nz_ = nz
else
nz_ = 10
end if
call a%psb_d_hll_sparse_mat%allocate(m, n, nz_)
call a%sync_dev_space()
call a%set_host()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_d_oacc_hll_allocate_mnnz
end submodule psb_d_oacc_hll_allocate_mnnz_impl

@ -0,0 +1,27 @@
submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_cp_from_coo_impl
use psb_base_mod
contains
module subroutine psb_d_oacc_hll_cp_from_coo(a, b, info)
implicit none
class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
class(psb_d_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%free_dev_space()
call a%psb_d_hll_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
call a%sync_dev_space()
call a%set_host()
call a%sync()
return
9999 continue
info = psb_err_alloc_dealloc_
return
end subroutine psb_d_oacc_hll_cp_from_coo
end submodule psb_d_oacc_hll_cp_from_coo_impl

@ -0,0 +1,26 @@
submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_cp_from_fmt_impl
use psb_base_mod
contains
module subroutine psb_d_oacc_hll_cp_from_fmt(a, b, info)
implicit none
class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
class(psb_d_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
select type(b)
type is (psb_d_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
call a%free_dev_space()
call a%psb_d_hll_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
call a%sync_dev_space()
call a%set_host()
call a%sync()
end select
end subroutine psb_d_oacc_hll_cp_from_fmt
end submodule psb_d_oacc_hll_cp_from_fmt_impl

@ -0,0 +1,86 @@
submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_inner_vect_sv_impl
use psb_base_mod
contains
module subroutine psb_d_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans)
implicit none
class(psb_d_oacc_hll_sparse_mat), intent(in) :: a
real(psb_dpk_), intent(in) :: alpha, beta
class(psb_d_base_vect_type), intent(inout) :: x, y
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
real(psb_dpk_), allocatable :: rx(:), ry(:)
logical :: tra
character :: trans_
integer(psb_ipk_) :: err_act
character(len=20) :: name = 'd_oacc_hll_inner_vect_sv'
logical, parameter :: debug = .false.
integer(psb_ipk_) :: i, j, nhacks, hksz
call psb_get_erraction(err_act)
info = psb_success_
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info, name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
if (tra .or. (beta /= dzero)) then
call x%sync()
call y%sync()
call a%psb_d_hll_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans)
call y%set_host()
else
select type (xx => x)
type is (psb_d_vect_oacc)
select type(yy => y)
type is (psb_d_vect_oacc)
if (xx%is_host()) call xx%sync()
if (beta /= dzero) then
if (yy%is_host()) call yy%sync()
end if
nhacks = size(a%hkoffs) - 1
hksz = a%hksz
!$acc parallel loop present(a, xx, yy)
do i = 1, nhacks
do j = a%hkoffs(i), a%hkoffs(i+1) - 1
yy%v(a%irn(j)) = alpha * a%val(j) * xx%v(a%ja(j)) + beta * yy%v(a%irn(j))
end do
end do
call yy%set_dev()
class default
rx = xx%get_vect()
ry = y%get_vect()
call a%psb_d_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
call y%bld(ry)
end select
class default
rx = x%get_vect()
ry = y%get_vect()
call a%psb_d_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
call y%bld(ry)
end select
endif
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info, name, a_err = 'hll_vect_sv')
goto 9999
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_d_oacc_hll_inner_vect_sv
end submodule psb_d_oacc_hll_inner_vect_sv_impl

@ -0,0 +1,34 @@
submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_mold_impl
use psb_base_mod
contains
module subroutine psb_d_oacc_hll_mold(a, b, info)
implicit none
class(psb_d_oacc_hll_sparse_mat), intent(in) :: a
class(psb_d_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name = 'hll_mold'
logical, parameter :: debug = .false.
call psb_get_erraction(err_act)
info = 0
if (allocated(b)) then
call b%free()
deallocate(b, stat=info)
end if
if (info == 0) allocate(psb_d_oacc_hll_sparse_mat :: b, stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
goto 9999
end if
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_d_oacc_hll_mold
end submodule psb_d_oacc_hll_mold_impl

@ -0,0 +1,27 @@
submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_mv_from_coo_impl
use psb_base_mod
contains
module subroutine psb_d_oacc_hll_mv_from_coo(a, b, info)
implicit none
class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
class(psb_d_coo_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%free_dev_space()
call a%psb_d_hll_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
call a%sync_dev_space()
call a%set_host()
call a%sync()
return
9999 continue
info = psb_err_alloc_dealloc_
return
end subroutine psb_d_oacc_hll_mv_from_coo
end submodule psb_d_oacc_hll_mv_from_coo_impl

@ -0,0 +1,25 @@
submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_mv_from_fmt_impl
use psb_base_mod
contains
module subroutine psb_d_oacc_hll_mv_from_fmt(a, b, info)
implicit none
class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
class(psb_d_base_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
select type(b)
type is (psb_d_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
call a%free_dev_space()
call a%psb_d_hll_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
call a%sync_dev_space()
call a%set_host()
call a%sync()
end select
end subroutine psb_d_oacc_hll_mv_from_fmt
end submodule psb_d_oacc_hll_mv_from_fmt_impl

@ -0,0 +1,29 @@
submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_reallocate_nz_impl
use psb_base_mod
contains
module subroutine psb_d_oacc_hll_reallocate_nz(nz, a)
implicit none
integer(psb_ipk_), intent(in) :: nz
class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name='d_oacc_hll_reallocate_nz'
logical, parameter :: debug=.false.
integer(psb_ipk_) :: hksz, nhacks
call psb_erractionsave(err_act)
info = psb_success_
call a%psb_d_hll_sparse_mat%reallocate(nz)
call a%sync_dev_space()
call a%set_host()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_d_oacc_hll_reallocate_nz
end submodule psb_d_oacc_hll_reallocate_nz_impl

@ -0,0 +1,62 @@
submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_scal_impl
use psb_base_mod
contains
module subroutine psb_d_oacc_hll_scal(d, a, info, side)
implicit none
class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
real(psb_dpk_), intent(in) :: d(:)
integer(psb_ipk_), intent(out) :: info
character, intent(in), optional :: side
integer(psb_ipk_) :: err_act
character(len=20) :: name = 'scal'
integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks
info = psb_success_
call psb_erractionsave(err_act)
if (a%is_host()) call a%sync()
hksz = a%hksz
nhacks = (a%get_nrows() + hksz - 1) / hksz
nzt = a%nzt
if (present(side)) then
if (side == 'L') then
! $ a parallel loop collapse(2) present(a, d)
!$acc parallel loop present(a, d)
do i = 1, nhacks
do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
k = (j - a%hkoffs(i)) / nzt + (i - 1) * hksz + 1
a%val(j) = a%val(j) * d(k)
end do
end do
else if (side == 'R') then
! $ a parallel loop collapse(2) present(a, d)
!$acc parallel loop present(a, d)
do i = 1, nhacks
do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
a%val(j) = a%val(j) * d(a%ja(j))
end do
end do
end if
else
! $ a parallel loop collapse(2) present(a, d)
!$acc parallel loop present(a, d)
do i = 1, nhacks
do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
a%val(j) = a%val(j) * d(j - a%hkoffs(i) + 1)
end do
end do
end if
call a%set_dev()
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_d_oacc_hll_scal
end submodule psb_d_oacc_hll_scal_impl

@ -0,0 +1,40 @@
submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_scals_impl
use psb_base_mod
contains
module subroutine psb_d_oacc_hll_scals(d, a, info)
implicit none
class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
real(psb_dpk_), intent(in) :: d
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name = 'scal'
integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks
info = psb_success_
call psb_erractionsave(err_act)
if (a%is_host()) call a%sync()
hksz = a%hksz
nhacks = (a%get_nrows() + hksz - 1) / hksz
nzt = a%nzt
! $ a parallel loop collapse(2) present(a)
!$acc parallel loop present(a)
do i = 1, nhacks
do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
a%val(j) = a%val(j) * d
end do
end do
call a%set_dev()
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_d_oacc_hll_scals
end submodule psb_d_oacc_hll_scals_impl

@ -0,0 +1,90 @@
submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_vect_mv_impl
use psb_base_mod
contains
module subroutine psb_d_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans)
implicit none
real(psb_dpk_), intent(in) :: alpha, beta
class(psb_d_oacc_hll_sparse_mat), intent(in) :: a
class(psb_d_base_vect_type), intent(inout) :: x, y
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
integer(psb_ipk_) :: m, n, nhacks, hksz
character :: trans_
logical :: device_done, tra
info = psb_success_
m = a%get_nrows()
n = a%get_ncols()
nhacks = size(a%hkoffs) - 1
hksz = a%hksz
if ((n > size(x%v)) .or. (m > size(y%v))) then
write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_
return
end if
device_done = .false.
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
if (.not.tra) then
select type(xx => x)
class is (psb_d_vect_oacc)
select type (yy => y)
class is (psb_d_vect_oacc)
if (a%is_host()) call a%sync()
if (xx%is_host()) call xx%sync()
if (yy%is_host()) call yy%sync()
call inner_spmv(m, nhacks, hksz, alpha, a%val, a%ja, a%hkoffs, x%v, beta, y%v, info)
call y%set_dev()
device_done = .true.
end select
end select
end if
if (.not.device_done) then
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
call a%psb_d_hll_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans)
call y%set_host()
end if
contains
subroutine inner_spmv(m, nhacks, hksz, alpha, val, ja, hkoffs, x, beta, y, info)
implicit none
integer(psb_ipk_) :: m, nhacks, hksz
real(psb_dpk_), intent(in) :: alpha, beta
real(psb_dpk_) :: val(:), x(:), y(:)
integer(psb_ipk_) :: ja(:), hkoffs(:)
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i, j, idx, k, ipnt,ir,nr,nlc,isz,ii
real(psb_dpk_) :: tmp
info = 0
!$acc parallel loop private(nlc, isz,ir,nr)
do i = 1, nhacks
isz = hkoffs(i + 1) - hkoffs(i)
nlc = isz/hksz
ir = (i-1)*hksz
nr = min(hksz,m-ir)
!$acc loop independent private(tmp,ii,ipnt)
do ii = 1, nr
ipnt = hkoffs(i) + ii
tmp = dzero
!$acc loop seq
do j = 1, nlc
tmp = tmp + val(ipnt) * x(ja(ipnt))
ipnt = ipnt + hksz
end do
y(ii+ir) = alpha * tmp + beta * y(ii+ir)
end do
end do
end subroutine inner_spmv
end subroutine psb_d_oacc_hll_vect_mv
end submodule psb_d_oacc_hll_vect_mv_impl

@ -0,0 +1,46 @@
subroutine psb_d_oacc_mlt_v(x, y, info)
use psb_d_oacc_vect_mod, psb_protect_name => psb_d_oacc_mlt_v
implicit none
class(psb_d_base_vect_type), intent(inout) :: x
class(psb_d_vect_oacc), intent(inout) :: y
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i, n
info = 0
n = min(x%get_nrows(), y%get_nrows())
info = 0
n = min(x%get_nrows(), y%get_nrows())
select type(xx => x)
class is (psb_d_vect_oacc)
if (y%is_host()) call y%sync()
if (xx%is_host()) call xx%sync()
call d_inner_oacc_mlt_v(n,xx%v, y%v)
!!$ !$acc parallel loop
!!$ do i = 1, n
!!$ y%v(i) = y%v(i) * xx%v(i)
!!$ end do
call y%set_dev()
class default
if (xx%is_dev()) call xx%sync()
if (y%is_dev()) call y%sync()
do i = 1, n
y%v(i) = y%v(i) * xx%v(i)
end do
call y%set_host()
end select
contains
subroutine d_inner_oacc_mlt_v(n,x, y)
implicit none
integer(psb_ipk_), intent(in) :: n
real(psb_dpk_), intent(inout) :: x(:), y(:)
integer(psb_ipk_) :: i
!$acc parallel loop present(x,y)
do i = 1, n
y(i) = (x(i)) * (y(i))
end do
end subroutine d_inner_oacc_mlt_v
end subroutine psb_d_oacc_mlt_v

@ -0,0 +1,91 @@
subroutine psb_d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
use psb_d_oacc_vect_mod, psb_protect_name => psb_d_oacc_mlt_v_2
use psb_string_mod
implicit none
real(psb_dpk_), intent(in) :: alpha, beta
class(psb_d_base_vect_type), intent(inout) :: x
class(psb_d_base_vect_type), intent(inout) :: y
class(psb_d_vect_oacc), intent(inout) :: z
integer(psb_ipk_), intent(out) :: info
character(len=1), intent(in), optional :: conjgx, conjgy
integer(psb_ipk_) :: i, n
logical :: conjgx_, conjgy_, device_done
conjgx_ = .false.
conjgy_ = .false.
device_done = .false.
if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C')
if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C')
n = min(x%get_nrows(), y%get_nrows(), z%get_nrows())
info = 0
select type(xx => x)
class is (psb_d_vect_oacc)
select type (yy => y)
class is (psb_d_vect_oacc)
if (xx%is_host()) call xx%sync()
if (yy%is_host()) call yy%sync()
if ((beta /= dzero) .and. (z%is_host())) call z%sync()
call d_inner_oacc_mlt_v_2(n,alpha, xx%v, yy%v, beta, z%v, info, conjgx_, conjgy_)
call z%set_dev()
device_done = .true.
end select
end select
if (.not.device_done) then
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
if ((beta /= dzero) .and. (z%is_dev())) call z%sync()
if (conjgx_.and.conjgy_) then
do i = 1, n
z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i)
end do
else if (conjgx_.and.(.not.conjgy_)) then
do i = 1, n
z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i)
end do
else if ((.not.conjgx_).and.(conjgy_)) then
do i = 1, n
z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i)
end do
else
do i = 1, n
z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i)
end do
end if
call z%set_host()
end if
contains
subroutine d_inner_oacc_mlt_v_2(n,alpha, x, y, beta, z, info, conjgx, conjgy)
implicit none
integer(psb_ipk_), intent(in) :: n
real(psb_dpk_), intent(in) :: alpha, beta
real(psb_dpk_), intent(inout) :: x(:), y(:), z(:)
integer(psb_ipk_), intent(out) :: info
logical, intent(in) :: conjgx, conjgy
integer(psb_ipk_) :: i
if (conjgx.and.conjgy) then
!$acc parallel loop present(x,y,z)
do i = 1, n
z(i) = alpha * (x(i)) * (y(i)) + beta * z(i)
end do
else if (conjgx.and.(.not.conjgy)) then
!$acc parallel loop present(x,y,z)
do i = 1, n
z(i) = alpha * (x(i)) * (y(i)) + beta * z(i)
end do
else if ((.not.conjgx).and.(conjgy)) then
!$acc parallel loop present(x,y,z)
do i = 1, n
z(i) = alpha * (x(i)) * (y(i)) + beta * z(i)
end do
else
!$acc parallel loop present(x,y,z)
do i = 1, n
z(i) = alpha * (x(i)) * (y(i)) + beta * z(i)
end do
end if
end subroutine d_inner_oacc_mlt_v_2
end subroutine psb_d_oacc_mlt_v_2

@ -0,0 +1,29 @@
submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_allocate_mnnz_impl
use psb_base_mod
contains
module subroutine psb_s_oacc_csr_allocate_mnnz(m, n, a, nz)
implicit none
integer(psb_ipk_), intent(in) :: m, n
class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(in), optional :: nz
integer(psb_ipk_) :: info
integer(psb_ipk_) :: err_act, nz_
character(len=20) :: name='allocate_mnz'
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
call a%psb_s_csr_sparse_mat%allocate(m, n, nz)
call a%set_host()
call a%sync_dev_space()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_s_oacc_csr_allocate_mnnz
end submodule psb_s_oacc_csr_allocate_mnnz_impl

@ -0,0 +1,27 @@
submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_cp_from_coo_impl
use psb_base_mod
contains
module subroutine psb_s_oacc_csr_cp_from_coo(a, b, info)
implicit none
class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
class(psb_s_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%free_dev_space()
call a%psb_s_csr_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
call a%sync_dev_space()
call a%set_host()
call a%sync()
return
9999 continue
info = psb_err_alloc_dealloc_
return
end subroutine psb_s_oacc_csr_cp_from_coo
end submodule psb_s_oacc_csr_cp_from_coo_impl

@ -0,0 +1,26 @@
submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_cp_from_fmt_impl
use psb_base_mod
contains
module subroutine psb_s_oacc_csr_cp_from_fmt(a, b, info)
implicit none
class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
class(psb_s_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
select type(b)
type is (psb_s_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
call a%free_dev_space()
call a%psb_s_csr_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
call a%sync_dev_space()
call a%set_host()
call a%sync()
end select
end subroutine psb_s_oacc_csr_cp_from_fmt
end submodule psb_s_oacc_csr_cp_from_fmt_impl

@ -0,0 +1,83 @@
submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_inner_vect_sv_impl
use psb_base_mod
contains
module subroutine psb_s_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans)
implicit none
class(psb_s_oacc_csr_sparse_mat), intent(in) :: a
real(psb_spk_), intent(in) :: alpha, beta
class(psb_s_base_vect_type), intent(inout) :: x, y
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
real(psb_spk_), allocatable :: rx(:), ry(:)
logical :: tra
character :: trans_
integer(psb_ipk_) :: err_act
character(len=20) :: name = 's_oacc_csr_inner_vect_sv'
logical, parameter :: debug = .false.
integer(psb_ipk_) :: i
call psb_get_erraction(err_act)
info = psb_success_
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info, name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
if (tra .or. (beta /= dzero)) then
call x%sync()
call y%sync()
call a%psb_s_csr_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans)
call y%set_host()
else
select type (xx => x)
type is (psb_s_vect_oacc)
select type(yy => y)
type is (psb_s_vect_oacc)
if (xx%is_host()) call xx%sync()
if (beta /= dzero) then
if (yy%is_host()) call yy%sync()
end if
!$acc parallel loop present(a, xx, yy)
do i = 1, size(a%val)
yy%v(i) = alpha * a%val(i) * xx%v(a%ja(i)) + beta * yy%v(i)
end do
call yy%set_dev()
class default
rx = xx%get_vect()
ry = y%get_vect()
call a%psb_s_csr_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
call y%bld(ry)
end select
class default
rx = x%get_vect()
ry = y%get_vect()
call a%psb_s_csr_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
call y%bld(ry)
end select
endif
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info, name, a_err = 'csrg_vect_sv')
goto 9999
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_s_oacc_csr_inner_vect_sv
end submodule psb_s_oacc_csr_inner_vect_sv_impl

@ -0,0 +1,35 @@
submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_mold_impl
use psb_base_mod
contains
module subroutine psb_s_oacc_csr_mold(a, b, info)
implicit none
class(psb_s_oacc_csr_sparse_mat), intent(in) :: a
class(psb_s_base_sparse_mat), intent(inout), allocatable :: b
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name='csr_mold'
logical, parameter :: debug=.false.
call psb_get_erraction(err_act)
info = 0
if (allocated(b)) then
call b%free()
deallocate(b, stat=info)
end if
if (info == 0) allocate(psb_s_oacc_csr_sparse_mat :: b, stat=info)
if (info /= psb_success_) then
info = psb_err_alloc_dealloc_
call psb_errpush(info, name)
goto 9999
end if
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_s_oacc_csr_mold
end submodule psb_s_oacc_csr_mold_impl

@ -0,0 +1,27 @@
submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_mv_from_coo_impl
use psb_base_mod
contains
module subroutine psb_s_oacc_csr_mv_from_coo(a, b, info)
implicit none
class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
class(psb_s_coo_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%free_dev_space()
call a%psb_s_csr_sparse_mat%mv_from_coo(b, info)
if (info /= 0) goto 9999
call a%sync_dev_space()
call a%set_host()
call a%sync()
return
9999 continue
info = psb_err_alloc_dealloc_
return
end subroutine psb_s_oacc_csr_mv_from_coo
end submodule psb_s_oacc_csr_mv_from_coo_impl

@ -0,0 +1,26 @@
submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_mv_from_fmt_impl
use psb_base_mod
contains
module subroutine psb_s_oacc_csr_mv_from_fmt(a, b, info)
implicit none
class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
class(psb_s_base_sparse_mat), intent(inout) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
select type(b)
type is (psb_s_coo_sparse_mat)
call a%mv_from_coo(b, info)
class default
call a%free_dev_space()
call a%psb_s_csr_sparse_mat%mv_from_fmt(b, info)
if (info /= 0) return
call a%sync_dev_space()
call a%set_host()
call a%sync()
end select
end subroutine psb_s_oacc_csr_mv_from_fmt
end submodule psb_s_oacc_csr_mv_from_fmt_impl

@ -0,0 +1,28 @@
submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_reallocate_nz_impl
use psb_base_mod
contains
module subroutine psb_s_oacc_csr_reallocate_nz(nz, a)
implicit none
integer(psb_ipk_), intent(in) :: nz
class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
integer(psb_ipk_) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name='s_oacc_csr_reallocate_nz'
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
call a%psb_s_csr_sparse_mat%reallocate(nz)
call a%sync_dev_space()
call a%set_host()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_s_oacc_csr_reallocate_nz
end submodule psb_s_oacc_csr_reallocate_nz_impl

@ -0,0 +1,53 @@
submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_scal_impl
use psb_base_mod
contains
module subroutine psb_s_oacc_csr_scal(d, a, info, side)
implicit none
class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
real(psb_spk_), intent(in) :: d(:)
integer(psb_ipk_), intent(out) :: info
character, intent(in), optional :: side
integer(psb_ipk_) :: err_act
character(len=20) :: name='scal'
logical, parameter :: debug=.false.
integer(psb_ipk_) :: i, j
info = psb_success_
call psb_erractionsave(err_act)
if (a%is_host()) call a%sync()
if (present(side)) then
if (side == 'L') then
!$acc parallel loop present(a, d)
do i = 1, a%get_nrows()
do j = a%irp(i), a%irp(i+1) - 1
a%val(j) = a%val(j) * d(i)
end do
end do
else if (side == 'R') then
!$acc parallel loop present(a, d)
do i = 1, a%get_ncols()
do j = a%irp(i), a%irp(i+1) - 1
a%val(j) = a%val(j) * d(a%ja(j))
end do
end do
end if
else
!$acc parallel loop present(a, d)
do i = 1, size(a%val)
a%val(i) = a%val(i) * d(i)
end do
end if
call a%set_dev()
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_s_oacc_csr_scal
end submodule psb_s_oacc_csr_scal_impl

@ -0,0 +1,34 @@
submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_scals_impl
use psb_base_mod
contains
module subroutine psb_s_oacc_csr_scals(d, a, info)
implicit none
class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
real(psb_spk_), intent(in) :: d
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: err_act
character(len=20) :: name='scal'
logical, parameter :: debug=.false.
integer(psb_ipk_) :: i
info = psb_success_
call psb_erractionsave(err_act)
if (a%is_host()) call a%sync()
!$acc parallel loop present(a)
do i = 1, size(a%val)
a%val(i) = a%val(i) * d
end do
call a%set_dev()
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_s_oacc_csr_scals
end submodule psb_s_oacc_csr_scals_impl

@ -0,0 +1,86 @@
submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_vect_mv_impl
use psb_base_mod
contains
module subroutine psb_s_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans)
implicit none
real(psb_spk_), intent(in) :: alpha, beta
class(psb_s_oacc_csr_sparse_mat), intent(in) :: a
class(psb_s_base_vect_type), intent(inout) :: x, y
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
integer(psb_ipk_) :: m, n
character :: trans_
logical :: device_done, tra
info = psb_success_
m = a%get_nrows()
n = a%get_ncols()
if ((n > size(x%v)) .or. (m > size(y%v))) then
write(0,*) 'ocsrmv Size error ', m, n, size(x%v), size(y%v)
info = psb_err_invalid_mat_state_
return
end if
device_done = .false.
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
if (.not.tra) then
select type(xx => x)
class is (psb_s_vect_oacc)
select type (yy => y)
class is (psb_s_vect_oacc)
if (a%is_host()) call a%sync()
if (xx%is_host()) call xx%sync()
if (yy%is_host()) call yy%sync()
call inner_spmv(m, n, alpha, a%val, a%ja, a%irp, x%v, beta, y%v, info)
call y%set_dev()
device_done = .true.
end select
end select
end if
if (.not.device_done) then
if (x%is_dev()) call x%sync()
if (y%is_dev()) call y%sync()
call a%psb_s_csr_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans)
call y%set_host()
end if
contains
subroutine inner_spmv(m, n, alpha, val, ja, irp, x, beta, y, info)
implicit none
integer(psb_ipk_) :: m, n
real(psb_spk_), intent(in) :: alpha, beta
real(psb_spk_) :: val(:), x(:), y(:)
integer(psb_ipk_) :: ja(:), irp(:)
integer(psb_ipk_), intent(out) :: info
integer(psb_ipk_) :: i, j, ii, isz
real(psb_spk_) :: tmp
integer(psb_ipk_), parameter :: vsz = 256
info = 0
!$acc parallel loop vector_length(vsz) private(isz)
do ii = 1, m, vsz
isz = min(vsz, m - ii + 1)
!$acc loop independent private(tmp)
do i = ii, ii + isz - 1
tmp = 0.0_psb_dpk_
!$acc loop seq
do j = irp(i), irp(i + 1) - 1
tmp = tmp + val(j) * x(ja(j))
end do
y(i) = alpha * tmp + beta * y(i)
end do
end do
end subroutine inner_spmv
end subroutine psb_s_oacc_csr_vect_mv
end submodule psb_s_oacc_csr_vect_mv_impl

@ -0,0 +1,35 @@
submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_allocate_mnnz_impl
use psb_base_mod
contains
module subroutine psb_s_oacc_ell_allocate_mnnz(m, n, a, nz)
implicit none
integer(psb_ipk_), intent(in) :: m, n
class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
integer(psb_ipk_), intent(in), optional :: nz
integer(psb_ipk_) :: info
integer(psb_ipk_) :: err_act, nz_
character(len=20) :: name='allocate_mnnz'
logical, parameter :: debug=.false.
call psb_erractionsave(err_act)
info = psb_success_
if (present(nz)) then
nz_ = nz
else
nz_ = 10
end if
call a%psb_s_ell_sparse_mat%allocate(m, n, nz_)
call a%sync_dev_space()
call a%set_host()
if (info /= 0) goto 9999
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_s_oacc_ell_allocate_mnnz
end submodule psb_s_oacc_ell_allocate_mnnz_impl

@ -0,0 +1,27 @@
submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_cp_from_coo_impl
use psb_base_mod
contains
module subroutine psb_s_oacc_ell_cp_from_coo(a, b, info)
implicit none
class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
class(psb_s_coo_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
call a%free_dev_space()
call a%psb_s_ell_sparse_mat%cp_from_coo(b, info)
if (info /= 0) goto 9999
call a%sync_dev_space()
call a%set_host()
call a%sync()
return
9999 continue
info = psb_err_alloc_dealloc_
return
end subroutine psb_s_oacc_ell_cp_from_coo
end submodule psb_s_oacc_ell_cp_from_coo_impl

@ -0,0 +1,26 @@
submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_cp_from_fmt_impl
use psb_base_mod
contains
module subroutine psb_s_oacc_ell_cp_from_fmt(a, b, info)
implicit none
class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
class(psb_s_base_sparse_mat), intent(in) :: b
integer(psb_ipk_), intent(out) :: info
info = psb_success_
select type(b)
type is (psb_s_coo_sparse_mat)
call a%cp_from_coo(b, info)
class default
call a%free_dev_space()
call a%psb_s_ell_sparse_mat%cp_from_fmt(b, info)
if (info /= 0) return
call a%sync_dev_space()
call a%set_host()
call a%sync()
end select
end subroutine psb_s_oacc_ell_cp_from_fmt
end submodule psb_s_oacc_ell_cp_from_fmt_impl

@ -0,0 +1,85 @@
submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_inner_vect_sv_impl
use psb_base_mod
contains
module subroutine psb_s_oacc_ell_inner_vect_sv(alpha, a, x, beta, y, info, trans)
implicit none
class(psb_s_oacc_ell_sparse_mat), intent(in) :: a
real(psb_spk_), intent(in) :: alpha, beta
class(psb_s_base_vect_type), intent(inout) :: x, y
integer(psb_ipk_), intent(out) :: info
character, optional, intent(in) :: trans
real(psb_spk_), allocatable :: rx(:), ry(:)
logical :: tra
character :: trans_
integer(psb_ipk_) :: err_act
character(len=20) :: name = 's_oacc_ell_inner_vect_sv'
logical, parameter :: debug = .false.
integer(psb_ipk_) :: i, j, nzt
call psb_get_erraction(err_act)
info = psb_success_
if (present(trans)) then
trans_ = trans
else
trans_ = 'N'
end if
if (.not.a%is_asb()) then
info = psb_err_invalid_mat_state_
call psb_errpush(info, name)
goto 9999
endif
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
if (tra .or. (beta /= dzero)) then
call x%sync()
call y%sync()
call a%psb_s_ell_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans)
call y%set_host()
else
select type (xx => x)
type is (psb_s_vect_oacc)
select type(yy => y)
type is (psb_s_vect_oacc)
if (xx%is_host()) call xx%sync()
if (beta /= dzero) then
if (yy%is_host()) call yy%sync()
end if
nzt = a%nzt
!$acc parallel loop present(a, xx, yy)
do i = 1, size(a%val, 1)
do j = 1, nzt
yy%v(i) = alpha * a%val(i, j) * xx%v(a%ja(i, j)) + beta * yy%v(i)
end do
end do
call yy%set_dev()
class default
rx = xx%get_vect()
ry = y%get_vect()
call a%psb_s_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
call y%bld(ry)
end select
class default
rx = x%get_vect()
ry = y%get_vect()
call a%psb_s_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
call y%bld(ry)
end select
endif
if (info /= psb_success_) then
info = psb_err_from_subroutine_
call psb_errpush(info, name, a_err = 'ell_vect_sv')
goto 9999
endif
call psb_erractionrestore(err_act)
return
9999 call psb_error_handler(err_act)
return
end subroutine psb_s_oacc_ell_inner_vect_sv
end submodule psb_s_oacc_ell_inner_vect_sv_impl

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save