Merge branch 'oacc_loloum' into repackage
commit
174a8e7aef
@ -0,0 +1,104 @@
|
||||
# AC_OPENACC
|
||||
# ---------
|
||||
# Check which options need to be passed to the C compiler to support Openacc.
|
||||
# Set the OPENACC_CFLAGS / OPENACC_CXXFLAGS / OPENACC_FFLAGS variable to these
|
||||
# options.
|
||||
# The options are necessary at compile time (so the #pragmas are understood)
|
||||
# and at link time (so the appropriate library is linked with).
|
||||
# This macro takes care to not produce redundant options if $CC $CFLAGS already
|
||||
# supports Openacc.
|
||||
#
|
||||
# For each candidate option, we do a compile test first, then a link test;
|
||||
# if the compile test succeeds but the link test fails, that means we have
|
||||
# found the correct option but it doesn't work because the libraries are
|
||||
# broken. (This can happen, for instance, with SunPRO C and a bad combination
|
||||
# of operating system patches.)
|
||||
#
|
||||
# Several of the options in our candidate list can be misinterpreted by
|
||||
# compilers that don't use them to activate Openacc support; for example,
|
||||
# many compilers understand "-openacc" to mean "write output to a file
|
||||
# named 'penmp'" rather than "enable Openacc". We can't completely avoid
|
||||
# the possibility of clobbering files named 'penmp' or 'mp' in configure's
|
||||
# working directory; therefore, this macro will bomb out if any such file
|
||||
# already exists when it's invoked.
|
||||
AC_DEFUN([AX_C_OPENACC],
|
||||
[AC_REQUIRE([_AX_OPENACC_SAFE_WD])]dnl
|
||||
[AC_ARG_ENABLE([openacc],
|
||||
[AS_HELP_STRING([--disable-openacc], [do not use Openacc])])]dnl
|
||||
[
|
||||
OPENACC_[]_AC_LANG_PREFIX[]FLAGS=
|
||||
if test "$enable_openacc" != no; then
|
||||
AC_LANG_PUSH([C])
|
||||
AC_CACHE_CHECK([for $[]_AC_CC[] option to support Openacc],
|
||||
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc],
|
||||
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='not found'
|
||||
dnl Try these flags:
|
||||
dnl (on by default) ''
|
||||
dnl GCC >= 4.2 -fopenacc
|
||||
dnl SunPRO C -xopenacc
|
||||
dnl Intel C -openacc
|
||||
dnl SGI C, PGI C -mp
|
||||
dnl Tru64 Compaq C -omp
|
||||
dnl IBM XL C (AIX, Linux) -qsmp=omp
|
||||
dnl Cray CCE -homp
|
||||
dnl NEC SX -Popenacc
|
||||
dnl Lahey Fortran (Linux) --openacc
|
||||
for ac_option in '' -fopenacc -openacc -acc; do
|
||||
|
||||
ac_save_[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS
|
||||
_AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $ac_option"
|
||||
AC_COMPILE_IFELSE([
|
||||
#ifndef _OPENACC
|
||||
#error "OpenACC not supported"
|
||||
#endif
|
||||
#include <openacc.h>
|
||||
int main (void) { acc_init (0); return 0;}
|
||||
],
|
||||
[AC_LINK_IFELSE([
|
||||
#ifndef _OPENACC
|
||||
#error "OpenACC not supported"
|
||||
#endif
|
||||
#include <openacc.h>
|
||||
int main (void) { acc_init (0); return 0;}
|
||||
],
|
||||
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc=$ac_option],
|
||||
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'])])
|
||||
_AC_LANG_PREFIX[]FLAGS=$ac_save_[]_AC_LANG_PREFIX[]FLAGS
|
||||
|
||||
if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'not found'; then
|
||||
break
|
||||
fi
|
||||
done
|
||||
if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = 'not found'; then
|
||||
ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'
|
||||
elif test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = ''; then
|
||||
ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='none needed'
|
||||
fi
|
||||
dnl _AX_OPENACC_SAFE_WD checked that these files did not exist before we
|
||||
dnl started probing for Openacc support, so if they exist now, they were
|
||||
dnl created by the probe loop and it's safe to delete them.
|
||||
rm -f penmp mp])
|
||||
if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'unsupported' && \
|
||||
test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'none needed'; then
|
||||
OPENACC_[]_AC_LANG_PREFIX[]FLAGS="$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc"
|
||||
fi
|
||||
AC_LANG_POP([C])
|
||||
fi
|
||||
])
|
||||
|
||||
# _AC_OPENACC_SAFE_WD
|
||||
# ------------------
|
||||
# AC_REQUIREd by AC_OPENACC. Checks both at autoconf time and at
|
||||
# configure time for files that AC_OPENACC clobbers.
|
||||
AC_DEFUN([_AX_OPENACC_SAFE_WD],
|
||||
[m4_syscmd([test ! -e penmp && test ! -e mp])]dnl
|
||||
[m4_if(sysval, [0], [], [m4_fatal(m4_normalize(
|
||||
[AX_OPENACC clobbers files named 'mp' and 'penmp'.
|
||||
To use AX_OPENACC you must not have either of these files
|
||||
at the top level of your source tree.]))])]dnl
|
||||
[if test -e penmp || test -e mp; then
|
||||
AC_MSG_ERROR(m4_normalize(
|
||||
[AX@&t@_OPENACC clobbers files named 'mp' and 'penmp'.
|
||||
Aborting configure because one of these files already exists.]))
|
||||
fi])
|
||||
|
@ -0,0 +1,104 @@
|
||||
# AC_OPENACC
|
||||
# ---------
|
||||
# Check which options need to be passed to the C compiler to support Openacc.
|
||||
# Set the OPENACC_CFLAGS / OPENACC_CXXFLAGS / OPENACC_FFLAGS variable to these
|
||||
# options.
|
||||
# The options are necessary at compile time (so the #pragmas are understood)
|
||||
# and at link time (so the appropriate library is linked with).
|
||||
# This macro takes care to not produce redundant options if $CC $CFLAGS already
|
||||
# supports Openacc.
|
||||
#
|
||||
# For each candidate option, we do a compile test first, then a link test;
|
||||
# if the compile test succeeds but the link test fails, that means we have
|
||||
# found the correct option but it doesn't work because the libraries are
|
||||
# broken. (This can happen, for instance, with SunPRO C and a bad combination
|
||||
# of operating system patches.)
|
||||
#
|
||||
# Several of the options in our candidate list can be misinterpreted by
|
||||
# compilers that don't use them to activate Openacc support; for example,
|
||||
# many compilers understand "-openacc" to mean "write output to a file
|
||||
# named 'penmp'" rather than "enable Openacc". We can't completely avoid
|
||||
# the possibility of clobbering files named 'penmp' or 'mp' in configure's
|
||||
# working directory; therefore, this macro will bomb out if any such file
|
||||
# already exists when it's invoked.
|
||||
AC_DEFUN([AX_CXX_OPENACC],
|
||||
[AC_REQUIRE([_AX_OPENACC_SAFE_WD])]dnl
|
||||
[AC_ARG_ENABLE([openacc],
|
||||
[AS_HELP_STRING([--disable-openacc], [do not use Openacc])])]dnl
|
||||
[
|
||||
OPENACC_[]_AC_LANG_PREFIX[]FLAGS=
|
||||
if test "$enable_openacc" != no; then
|
||||
AC_LANG_PUSH([C++])
|
||||
AC_CACHE_CHECK([for $[]_AC_CC[] option to support Openacc],
|
||||
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc],
|
||||
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='not found'
|
||||
dnl Try these flags:
|
||||
dnl (on by default) ''
|
||||
dnl GCC >= 4.2 -fopenacc
|
||||
dnl SunPRO C -xopenacc
|
||||
dnl Intel C -openacc
|
||||
dnl SGI C, PGI C -mp
|
||||
dnl Tru64 Compaq C -omp
|
||||
dnl IBM XL C (AIX, Linux) -qsmp=omp
|
||||
dnl Cray CCE -homp
|
||||
dnl NEC SX -Popenacc
|
||||
dnl Lahey Fortran (Linux) --openacc
|
||||
for ac_option in '' -fopenacc -openacc -acc; do
|
||||
|
||||
ac_save_[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS
|
||||
_AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $ac_option"
|
||||
AC_COMPILE_IFELSE([
|
||||
#ifndef _OPENACC
|
||||
#error "OpenACC not supported"
|
||||
#endif
|
||||
#include <openacc.h>
|
||||
int main (void) { acc_init (acc_get_device_type()); return 0;}
|
||||
],
|
||||
[AC_LINK_IFELSE([
|
||||
#ifndef _OPENACC
|
||||
#error "OpenACC not supported"
|
||||
#endif
|
||||
#include <openacc.h>
|
||||
int main (void) { acc_init (acc_get_device_type()); return 0;}
|
||||
],
|
||||
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc=$ac_option],
|
||||
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'])])
|
||||
_AC_LANG_PREFIX[]FLAGS=$ac_save_[]_AC_LANG_PREFIX[]FLAGS
|
||||
|
||||
if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'not found'; then
|
||||
break
|
||||
fi
|
||||
done
|
||||
if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = 'not found'; then
|
||||
ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'
|
||||
elif test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = ''; then
|
||||
ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='none needed'
|
||||
fi
|
||||
dnl _AX_OPENACC_SAFE_WD checked that these files did not exist before we
|
||||
dnl started probing for Openacc support, so if they exist now, they were
|
||||
dnl created by the probe loop and it's safe to delete them.
|
||||
rm -f penmp mp])
|
||||
if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'unsupported' && \
|
||||
test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'none needed'; then
|
||||
OPENACC_[]_AC_LANG_PREFIX[]FLAGS="$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc"
|
||||
fi
|
||||
AC_LANG_POP([C++])
|
||||
fi
|
||||
])
|
||||
|
||||
dnl _AC_OPENACC_SAFE_WD
|
||||
dnl ------------------
|
||||
dnl AC_REQUIREd by AC_OPENACC. Checks both at autoconf time and at
|
||||
dnl configure time for files that AC_OPENACC clobbers.
|
||||
dnl AC_DEFUN([_AX_OPENACC_SAFE_WD],
|
||||
dnl [m4_syscmd([test ! -e penmp && test ! -e mp])]dnl
|
||||
dnl [m4_if(sysval, [0], [], [m4_fatal(m4_normalize(
|
||||
dnl [AX_OPENACC clobbers files named 'mp' and 'penmp'.
|
||||
dnl To use AX_OPENACC you must not have either of these files
|
||||
dnl at the top level of your source tree.]))])]dnl
|
||||
dnl [if test -e penmp || test -e mp; then
|
||||
dnl AC_MSG_ERROR(m4_normalize(
|
||||
dnl [AX@&t@_OPENACC clobbers files named 'mp' and 'penmp'.
|
||||
dnl Aborting configure because one of these files already exists.]))
|
||||
dnl fi])
|
||||
|
@ -0,0 +1,108 @@
|
||||
# AC_OPENACC
|
||||
# ---------
|
||||
# Check which options need to be passed to the C compiler to support Openacc.
|
||||
# Set the OPENACC_CFLAGS / OPENACC_CXXFLAGS / OPENACC_FFLAGS variable to these
|
||||
# options.
|
||||
# The options are necessary at compile time (so the #pragmas are understood)
|
||||
# and at link time (so the appropriate library is linked with).
|
||||
# This macro takes care to not produce redundant options if $CC $CFLAGS already
|
||||
# supports Openacc.
|
||||
#
|
||||
# For each candidate option, we do a compile test first, then a link test;
|
||||
# if the compile test succeeds but the link test fails, that means we have
|
||||
# found the correct option but it doesn't work because the libraries are
|
||||
# broken. (This can happen, for instance, with SunPRO C and a bad combination
|
||||
# of operating system patches.)
|
||||
#
|
||||
# Several of the options in our candidate list can be misinterpreted by
|
||||
# compilers that don't use them to activate Openacc support; for example,
|
||||
# many compilers understand "-openacc" to mean "write output to a file
|
||||
# named 'penmp'" rather than "enable Openacc". We can't completely avoid
|
||||
# the possibility of clobbering files named 'penmp' or 'mp' in configure's
|
||||
# working directory; therefore, this macro will bomb out if any such file
|
||||
# already exists when it's invoked.
|
||||
AC_DEFUN([AX_FC_OPENACC],
|
||||
[AC_REQUIRE([_AX_OPENACC_SAFE_WD])]dnl
|
||||
[AC_ARG_ENABLE([openacc],
|
||||
[AS_HELP_STRING([--disable-openacc], [do not use Openacc])])]dnl
|
||||
[
|
||||
OPENACC_[]_AC_LANG_PREFIX[]FLAGS=
|
||||
if test "$enable_openacc" != no; then
|
||||
AC_LANG_PUSH([Fortran])
|
||||
AC_CACHE_CHECK([for $[]_AC_CC[] option to support Openacc],
|
||||
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc],
|
||||
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='not found'
|
||||
dnl Try these flags:
|
||||
dnl (on by default) ''
|
||||
dnl GCC >= 4.2 -fopenacc
|
||||
dnl SunPRO C -xopenacc
|
||||
dnl Intel C -openacc
|
||||
dnl SGI C, PGI C -mp
|
||||
dnl Tru64 Compaq C -omp
|
||||
dnl IBM XL C (AIX, Linux) -qsmp=omp
|
||||
dnl Cray CCE -homp
|
||||
dnl NEC SX -Popenacc
|
||||
dnl Lahey Fortran (Linux) --openacc
|
||||
for ac_option in '' -fopenacc -openacc -acc; do
|
||||
|
||||
ac_save_[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS
|
||||
_AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $ac_option"
|
||||
AC_COMPILE_IFELSE([
|
||||
program main
|
||||
use openacc
|
||||
implicit none
|
||||
integer tid, np
|
||||
tid = 42
|
||||
call acc_init(0)
|
||||
end
|
||||
],
|
||||
[AC_LINK_IFELSE([
|
||||
program main
|
||||
use openacc
|
||||
implicit none
|
||||
integer tid, np
|
||||
tid = 42
|
||||
call acc_init(0)
|
||||
end
|
||||
],
|
||||
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc=$ac_option],
|
||||
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'])])
|
||||
_AC_LANG_PREFIX[]FLAGS=$ac_save_[]_AC_LANG_PREFIX[]FLAGS
|
||||
|
||||
if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'unsupported'; then
|
||||
break
|
||||
fi
|
||||
done
|
||||
if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = 'not found'; then
|
||||
ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'
|
||||
elif test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = ''; then
|
||||
ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='none needed'
|
||||
fi
|
||||
dnl _AX_OPENACC_SAFE_WD checked that these files did not exist before we
|
||||
dnl started probing for Openacc support, so if they exist now, they were
|
||||
dnl created by the probe loop and it's safe to delete them.
|
||||
rm -f penmp mp])
|
||||
if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'unsupported' && \
|
||||
test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'none needed'; then
|
||||
OPENACC_[]_AC_LANG_PREFIX[]FLAGS="$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc"
|
||||
fi
|
||||
AC_LANG_POP([Fortran])
|
||||
fi
|
||||
])
|
||||
|
||||
# _AC_OPENACC_SAFE_WD
|
||||
# ------------------
|
||||
# AC_REQUIREd by AC_OPENACC. Checks both at autoconf time and at
|
||||
# configure time for files that AC_OPENACC clobbers.
|
||||
AC_DEFUN([_AX_OPENACC_SAFE_WD],
|
||||
[m4_syscmd([test ! -e penmp && test ! -e mp])]dnl
|
||||
[m4_if(sysval, [0], [], [m4_fatal(m4_normalize(
|
||||
[AX_OPENACC clobbers files named 'mp' and 'penmp'.
|
||||
To use AX_OPENACC you must not have either of these files
|
||||
at the top level of your source tree.]))])]dnl
|
||||
[if test -e penmp || test -e mp; then
|
||||
AC_MSG_ERROR(m4_normalize(
|
||||
[AX@&t@_OPENACC clobbers files named 'mp' and 'penmp'.
|
||||
Aborting configure because one of these files already exists.]))
|
||||
fi])
|
||||
|
@ -0,0 +1,89 @@
|
||||
include ../Make.inc
|
||||
|
||||
|
||||
#CC=mpicc
|
||||
#FC=mpif90
|
||||
#FCOPT=-O0 -march=native
|
||||
#OFFLOAD=-fopenacc -foffload=nvptx-none="-march=sm_70"
|
||||
|
||||
LIBDIR=../lib
|
||||
INCDIR=../include
|
||||
MODDIR=../modules
|
||||
IMPLDIR=./impl
|
||||
|
||||
INCLUDES=-I$(LIBDIR) -I$(INCDIR) -I$(MODDIR)
|
||||
FINCLUDES=$(FMFLAG). $(FMFLAG)$(INCDIR) $(FMFLAG)$(MODDIR) $(FIFLAG).
|
||||
CINCLUDES=
|
||||
#LIBS=-L$(LIBDIR) -lpsb_util -lpsb_ext -lpsb_base -lopenblas -lmetis
|
||||
|
||||
|
||||
FOBJS= psb_i_oacc_vect_mod.o psb_l_oacc_vect_mod.o \
|
||||
psb_s_oacc_vect_mod.o psb_s_oacc_csr_mat_mod.o \
|
||||
psb_d_oacc_vect_mod.o psb_d_oacc_csr_mat_mod.o \
|
||||
psb_c_oacc_vect_mod.o psb_c_oacc_csr_mat_mod.o \
|
||||
psb_z_oacc_vect_mod.o psb_z_oacc_csr_mat_mod.o \
|
||||
psb_s_oacc_ell_mat_mod.o psb_s_oacc_hll_mat_mod.o \
|
||||
psb_d_oacc_ell_mat_mod.o psb_d_oacc_hll_mat_mod.o \
|
||||
psb_c_oacc_ell_mat_mod.o psb_c_oacc_hll_mat_mod.o \
|
||||
psb_z_oacc_ell_mat_mod.o psb_z_oacc_hll_mat_mod.o \
|
||||
psb_oacc_mod.o psb_oacc_env_mod.o
|
||||
|
||||
|
||||
LIBNAME=libpsb_openacc.a
|
||||
|
||||
OBJS=$(COBJS) $(FOBJS)
|
||||
|
||||
|
||||
lib: objs ilib
|
||||
ar cur $(LIBNAME) $(OBJS)
|
||||
/bin/cp -p $(LIBNAME) $(LIBDIR)
|
||||
|
||||
objs: $(OBJS) iobjs
|
||||
/bin/cp -p *$(.mod) $(MODDIR)
|
||||
|
||||
iobjs: $(OBJS)
|
||||
$(MAKE) -C impl objs
|
||||
|
||||
ilib: $(OBJS)
|
||||
$(MAKE) -C impl lib
|
||||
|
||||
psb_oacc_mod.o : psb_i_oacc_vect_mod.o psb_l_oacc_vect_mod.o \
|
||||
psb_s_oacc_vect_mod.o psb_s_oacc_csr_mat_mod.o \
|
||||
psb_d_oacc_vect_mod.o psb_d_oacc_csr_mat_mod.o \
|
||||
psb_c_oacc_vect_mod.o psb_c_oacc_csr_mat_mod.o \
|
||||
psb_z_oacc_vect_mod.o psb_z_oacc_csr_mat_mod.o \
|
||||
psb_s_oacc_ell_mat_mod.o psb_s_oacc_hll_mat_mod.o \
|
||||
psb_d_oacc_ell_mat_mod.o psb_d_oacc_hll_mat_mod.o \
|
||||
psb_c_oacc_ell_mat_mod.o psb_c_oacc_hll_mat_mod.o \
|
||||
psb_z_oacc_ell_mat_mod.o psb_z_oacc_hll_mat_mod.o \
|
||||
psb_oacc_env_mod.o
|
||||
|
||||
psb_s_oacc_vect_mod.o psb_d_oacc_vect_mod.o \
|
||||
psb_c_oacc_vect_mod.o psb_z_oacc_vect_mod.o: psb_i_oacc_vect_mod.o psb_l_oacc_vect_mod.o psb_oacc_env_mod.o
|
||||
psb_l_oacc_vect_mod.o: psb_i_oacc_vect_mod.o psb_oacc_env_mod.o
|
||||
psb_i_oacc_vect_mod.o: psb_oacc_env_mod.o
|
||||
|
||||
|
||||
psb_s_oacc_csr_mat_mod.o psb_s_oacc_ell_mat_mod.o psb_s_oacc_hll_mat_mod.o: psb_s_oacc_vect_mod.o
|
||||
psb_d_oacc_csr_mat_mod.o psb_d_oacc_ell_mat_mod.o psb_d_oacc_hll_mat_mod.o: psb_d_oacc_vect_mod.o
|
||||
psb_c_oacc_csr_mat_mod.o psb_c_oacc_ell_mat_mod.o psb_c_oacc_hll_mat_mod.o: psb_c_oacc_vect_mod.o
|
||||
psb_z_oacc_csr_mat_mod.o psb_z_oacc_ell_mat_mod.o psb_z_oacc_hll_mat_mod.o: psb_z_oacc_vect_mod.o
|
||||
|
||||
|
||||
|
||||
clean: cclean iclean
|
||||
/bin/rm -f $(FOBJS) *$(.mod) *.a *.smod
|
||||
veryclean: clean
|
||||
cclean:
|
||||
/bin/rm -f $(COBJS)
|
||||
iclean:
|
||||
$(MAKE) -C impl clean
|
||||
|
||||
.c.o:
|
||||
$(CC) $(CCOPT) $(CCOPENACC) $(CINCLUDES) $(CDEFINES) -c $< -o $@
|
||||
.f90.o:
|
||||
$(FC) $(FCOPT) $(FCOPENACC) $(FINCLUDES) -c $< -o $@
|
||||
.F90.o:
|
||||
$(FC) $(FCOPT) $(FCOPENACC) $(FINCLUDES) $(FDEFINES) -c $< -o $@
|
||||
.cpp.o:
|
||||
$(CXX) $(CXXOPT) $(CXXOPENACC) $(CXXINCLUDES) $(CXXDEFINES) -c $< -o $@
|
@ -0,0 +1,285 @@
|
||||
include ../../Make.inc
|
||||
LIBDIR=../../lib
|
||||
INCDIR=../../include
|
||||
MODDIR=../../modules
|
||||
UP=..
|
||||
#
|
||||
# Compilers and such
|
||||
#
|
||||
#CCOPT= -g
|
||||
FINCLUDES=$(FMFLAG).. $(FMFLAG)$(MODDIR) $(FMFLAG)$(INCDIR) $(FIFLAG)..
|
||||
LIBNAME=libpsb_openacc.a
|
||||
|
||||
OBJS= \
|
||||
psb_s_oacc_csr_vect_mv.o \
|
||||
psb_s_oacc_csr_inner_vect_sv.o \
|
||||
psb_s_oacc_csr_scals.o \
|
||||
psb_s_oacc_csr_scal.o \
|
||||
psb_s_oacc_csr_allocate_mnnz.o \
|
||||
psb_s_oacc_csr_reallocate_nz.o \
|
||||
psb_s_oacc_csr_cp_from_coo.o \
|
||||
psb_s_oacc_csr_cp_from_fmt.o \
|
||||
psb_s_oacc_csr_mv_from_coo.o \
|
||||
psb_s_oacc_csr_mv_from_fmt.o \
|
||||
psb_s_oacc_csr_mold.o \
|
||||
psb_s_oacc_mlt_v_2.o \
|
||||
psb_s_oacc_mlt_v.o \
|
||||
psb_d_oacc_csr_vect_mv.o \
|
||||
psb_d_oacc_csr_inner_vect_sv.o \
|
||||
psb_d_oacc_csr_scals.o \
|
||||
psb_d_oacc_csr_scal.o \
|
||||
psb_d_oacc_csr_allocate_mnnz.o \
|
||||
psb_d_oacc_csr_reallocate_nz.o \
|
||||
psb_d_oacc_csr_cp_from_coo.o \
|
||||
psb_d_oacc_csr_cp_from_fmt.o \
|
||||
psb_d_oacc_csr_mv_from_coo.o \
|
||||
psb_d_oacc_csr_mv_from_fmt.o \
|
||||
psb_d_oacc_csr_mold.o \
|
||||
psb_d_oacc_mlt_v_2.o \
|
||||
psb_d_oacc_mlt_v.o \
|
||||
psb_c_oacc_csr_vect_mv.o \
|
||||
psb_c_oacc_csr_inner_vect_sv.o \
|
||||
psb_c_oacc_csr_scals.o \
|
||||
psb_c_oacc_csr_scal.o \
|
||||
psb_c_oacc_csr_allocate_mnnz.o \
|
||||
psb_c_oacc_csr_reallocate_nz.o \
|
||||
psb_c_oacc_csr_cp_from_coo.o \
|
||||
psb_c_oacc_csr_cp_from_fmt.o \
|
||||
psb_c_oacc_csr_mv_from_coo.o \
|
||||
psb_c_oacc_csr_mv_from_fmt.o \
|
||||
psb_c_oacc_csr_mold.o \
|
||||
psb_c_oacc_mlt_v_2.o \
|
||||
psb_c_oacc_mlt_v.o \
|
||||
psb_z_oacc_csr_vect_mv.o \
|
||||
psb_z_oacc_csr_inner_vect_sv.o \
|
||||
psb_z_oacc_csr_scals.o \
|
||||
psb_z_oacc_csr_scal.o \
|
||||
psb_z_oacc_csr_allocate_mnnz.o \
|
||||
psb_z_oacc_csr_reallocate_nz.o \
|
||||
psb_z_oacc_csr_cp_from_coo.o \
|
||||
psb_z_oacc_csr_cp_from_fmt.o \
|
||||
psb_z_oacc_csr_mv_from_coo.o \
|
||||
psb_z_oacc_csr_mv_from_fmt.o \
|
||||
psb_z_oacc_csr_mold.o \
|
||||
psb_z_oacc_mlt_v_2.o \
|
||||
psb_z_oacc_mlt_v.o \
|
||||
psb_s_oacc_ell_vect_mv.o \
|
||||
psb_s_oacc_ell_inner_vect_sv.o \
|
||||
psb_s_oacc_ell_scals.o \
|
||||
psb_s_oacc_ell_scal.o \
|
||||
psb_s_oacc_ell_reallocate_nz.o \
|
||||
psb_s_oacc_ell_allocate_mnnz.o \
|
||||
psb_s_oacc_ell_cp_from_coo.o \
|
||||
psb_s_oacc_ell_cp_from_fmt.o \
|
||||
psb_s_oacc_ell_mv_from_coo.o \
|
||||
psb_s_oacc_ell_mv_from_fmt.o \
|
||||
psb_s_oacc_ell_mold.o \
|
||||
psb_s_oacc_hll_mold.o \
|
||||
psb_s_oacc_hll_mv_from_fmt.o \
|
||||
psb_s_oacc_hll_mv_from_coo.o \
|
||||
psb_s_oacc_hll_cp_from_fmt.o \
|
||||
psb_s_oacc_hll_cp_from_coo.o \
|
||||
psb_s_oacc_hll_allocate_mnnz.o \
|
||||
psb_s_oacc_hll_reallocate_nz.o \
|
||||
psb_s_oacc_hll_scal.o \
|
||||
psb_s_oacc_hll_scals.o \
|
||||
psb_s_oacc_hll_inner_vect_sv.o \
|
||||
psb_s_oacc_hll_vect_mv.o \
|
||||
psb_d_oacc_ell_vect_mv.o \
|
||||
psb_d_oacc_ell_inner_vect_sv.o \
|
||||
psb_d_oacc_ell_scals.o \
|
||||
psb_d_oacc_ell_scal.o \
|
||||
psb_d_oacc_ell_reallocate_nz.o \
|
||||
psb_d_oacc_ell_allocate_mnnz.o \
|
||||
psb_d_oacc_ell_cp_from_coo.o \
|
||||
psb_d_oacc_ell_cp_from_fmt.o \
|
||||
psb_d_oacc_ell_mv_from_coo.o \
|
||||
psb_d_oacc_ell_mv_from_fmt.o \
|
||||
psb_d_oacc_ell_mold.o \
|
||||
psb_d_oacc_hll_mold.o \
|
||||
psb_d_oacc_hll_mv_from_fmt.o \
|
||||
psb_d_oacc_hll_mv_from_coo.o \
|
||||
psb_d_oacc_hll_cp_from_fmt.o \
|
||||
psb_d_oacc_hll_cp_from_coo.o \
|
||||
psb_d_oacc_hll_allocate_mnnz.o \
|
||||
psb_d_oacc_hll_reallocate_nz.o \
|
||||
psb_d_oacc_hll_scal.o \
|
||||
psb_d_oacc_hll_scals.o \
|
||||
psb_d_oacc_hll_inner_vect_sv.o \
|
||||
psb_d_oacc_hll_vect_mv.o \
|
||||
psb_c_oacc_ell_vect_mv.o \
|
||||
psb_c_oacc_ell_inner_vect_sv.o \
|
||||
psb_c_oacc_ell_scals.o \
|
||||
psb_c_oacc_ell_scal.o \
|
||||
psb_c_oacc_ell_reallocate_nz.o \
|
||||
psb_c_oacc_ell_allocate_mnnz.o \
|
||||
psb_c_oacc_ell_cp_from_coo.o \
|
||||
psb_c_oacc_ell_cp_from_fmt.o \
|
||||
psb_c_oacc_ell_mv_from_coo.o \
|
||||
psb_c_oacc_ell_mv_from_fmt.o \
|
||||
psb_c_oacc_ell_mold.o \
|
||||
psb_c_oacc_hll_mold.o \
|
||||
psb_c_oacc_hll_mv_from_fmt.o \
|
||||
psb_c_oacc_hll_mv_from_coo.o \
|
||||
psb_c_oacc_hll_cp_from_fmt.o \
|
||||
psb_c_oacc_hll_cp_from_coo.o \
|
||||
psb_c_oacc_hll_allocate_mnnz.o \
|
||||
psb_c_oacc_hll_reallocate_nz.o \
|
||||
psb_c_oacc_hll_scal.o \
|
||||
psb_c_oacc_hll_scals.o \
|
||||
psb_c_oacc_hll_inner_vect_sv.o \
|
||||
psb_c_oacc_hll_vect_mv.o \
|
||||
psb_z_oacc_ell_vect_mv.o \
|
||||
psb_z_oacc_ell_inner_vect_sv.o \
|
||||
psb_z_oacc_ell_scals.o \
|
||||
psb_z_oacc_ell_scal.o \
|
||||
psb_z_oacc_ell_reallocate_nz.o \
|
||||
psb_z_oacc_ell_allocate_mnnz.o \
|
||||
psb_z_oacc_ell_cp_from_coo.o \
|
||||
psb_z_oacc_ell_cp_from_fmt.o \
|
||||
psb_z_oacc_ell_mv_from_coo.o \
|
||||
psb_z_oacc_ell_mv_from_fmt.o \
|
||||
psb_z_oacc_ell_mold.o \
|
||||
psb_z_oacc_hll_mold.o \
|
||||
psb_z_oacc_hll_mv_from_fmt.o \
|
||||
psb_z_oacc_hll_mv_from_coo.o \
|
||||
psb_z_oacc_hll_cp_from_fmt.o \
|
||||
psb_z_oacc_hll_cp_from_coo.o \
|
||||
psb_z_oacc_hll_allocate_mnnz.o \
|
||||
psb_z_oacc_hll_reallocate_nz.o \
|
||||
psb_z_oacc_hll_scal.o \
|
||||
psb_z_oacc_hll_scals.o \
|
||||
psb_z_oacc_hll_inner_vect_sv.o \
|
||||
psb_z_oacc_hll_vect_mv.o \
|
||||
psb_z_oacc_ell_vect_mv.o \
|
||||
psb_z_oacc_ell_inner_vect_sv.o \
|
||||
psb_z_oacc_ell_scals.o \
|
||||
psb_z_oacc_ell_scal.o \
|
||||
psb_z_oacc_ell_reallocate_nz.o \
|
||||
psb_z_oacc_ell_allocate_mnnz.o \
|
||||
psb_z_oacc_ell_cp_from_coo.o \
|
||||
psb_z_oacc_ell_cp_from_fmt.o \
|
||||
psb_z_oacc_ell_mv_from_coo.o \
|
||||
psb_z_oacc_ell_mv_from_fmt.o \
|
||||
psb_z_oacc_ell_mold.o \
|
||||
psb_z_oacc_hll_mold.o \
|
||||
psb_z_oacc_hll_mv_from_fmt.o \
|
||||
psb_z_oacc_hll_mv_from_coo.o \
|
||||
psb_z_oacc_hll_cp_from_fmt.o \
|
||||
psb_z_oacc_hll_cp_from_coo.o \
|
||||
psb_z_oacc_hll_allocate_mnnz.o \
|
||||
psb_z_oacc_hll_reallocate_nz.o \
|
||||
psb_z_oacc_hll_scal.o \
|
||||
psb_z_oacc_hll_scals.o \
|
||||
psb_z_oacc_hll_inner_vect_sv.o \
|
||||
psb_z_oacc_hll_vect_mv.o
|
||||
|
||||
|
||||
objs: $(OBJS)
|
||||
|
||||
lib: objs
|
||||
ar cur ../$(LIBNAME) $(OBJS)
|
||||
|
||||
psb_s_oacc_csr_vect_mv.o psb_s_oacc_csr_inner_vect_sv.o \
|
||||
psb_s_oacc_csr_scals.o \
|
||||
psb_s_oacc_csr_scal.o psb_s_oacc_csr_allocate_mnnz.o \
|
||||
psb_s_oacc_csr_reallocate_nz.o psb_s_oacc_csr_cp_from_coo.o \
|
||||
psb_s_oacc_csr_cp_from_fmt.o psb_s_oacc_csr_mv_from_coo.o \
|
||||
psb_s_oacc_csr_mv_from_fmt.o psb_s_oacc_csr_mold.o: $(UP)/psb_s_oacc_csr_mat_mod.o $(UP)/psb_s_oacc_vect_mod.o
|
||||
|
||||
psb_s_oacc_ell_vect_mv.o psb_s_oacc_ell_inner_vect_sv.o \
|
||||
psb_s_oacc_ell_scals.o \
|
||||
psb_s_oacc_ell_scal.o psb_s_oacc_ell_allocate_mnnz.o \
|
||||
psb_s_oacc_ell_reallocate_nz.o psb_s_oacc_ell_cp_from_coo.o \
|
||||
psb_s_oacc_ell_cp_from_fmt.o psb_s_oacc_ell_mv_from_coo.o \
|
||||
psb_s_oacc_ell_mv_from_fmt.o psb_s_oacc_ell_mold.o: $(UP)/psb_s_oacc_ell_mat_mod.o $(UP)/psb_s_oacc_vect_mod.o
|
||||
|
||||
psb_s_oacc_hll_vect_mv.o psb_s_oacc_hll_inner_vect_sv.o \
|
||||
psb_s_oacc_hll_scals.o \
|
||||
psb_s_oacc_hll_scal.o psb_s_oacc_hll_allocate_mnnz.o \
|
||||
psb_s_oacc_hll_reallocate_nz.o psb_s_oacc_hll_cp_from_coo.o \
|
||||
psb_s_oacc_hll_cp_from_fmt.o psb_s_oacc_hll_mv_from_coo.o \
|
||||
psb_s_oacc_hll_mv_from_fmt.o psb_s_oacc_hll_mold.o: $(UP)/psb_s_oacc_hll_mat_mod.o $(UP)/psb_s_oacc_vect_mod.o
|
||||
|
||||
|
||||
psb_d_oacc_csr_vect_mv.o psb_d_oacc_csr_inner_vect_sv.o \
|
||||
psb_d_oacc_csr_scals.o \
|
||||
psb_d_oacc_csr_scal.o psb_d_oacc_csr_allocate_mnnz.o \
|
||||
psb_d_oacc_csr_reallocate_nz.o psb_d_oacc_csr_cp_from_coo.o \
|
||||
psb_d_oacc_csr_cp_from_fmt.o psb_d_oacc_csr_mv_from_coo.o \
|
||||
psb_d_oacc_csr_mv_from_fmt.o psb_d_oacc_csr_mold.o: $(UP)/psb_d_oacc_csr_mat_mod.o $(UP)/psb_d_oacc_vect_mod.o
|
||||
|
||||
psb_d_oacc_ell_vect_mv.o psb_d_oacc_ell_inner_vect_sv.o \
|
||||
psb_d_oacc_ell_scals.o \
|
||||
psb_d_oacc_ell_scal.o psb_d_oacc_ell_allocate_mnnz.o \
|
||||
psb_d_oacc_ell_reallocate_nz.o psb_d_oacc_ell_cp_from_coo.o \
|
||||
psb_d_oacc_ell_cp_from_fmt.o psb_d_oacc_ell_mv_from_coo.o \
|
||||
psb_d_oacc_ell_mv_from_fmt.o psb_d_oacc_ell_mold.o: $(UP)/psb_d_oacc_ell_mat_mod.o $(UP)/psb_d_oacc_vect_mod.o
|
||||
|
||||
psb_d_oacc_hll_vect_mv.o psb_d_oacc_hll_inner_vect_sv.o \
|
||||
psb_d_oacc_hll_scals.o \
|
||||
psb_d_oacc_hll_scal.o psb_d_oacc_hll_allocate_mnnz.o \
|
||||
psb_d_oacc_hll_reallocate_nz.o psb_d_oacc_hll_cp_from_coo.o \
|
||||
psb_d_oacc_hll_cp_from_fmt.o psb_d_oacc_hll_mv_from_coo.o \
|
||||
psb_d_oacc_hll_mv_from_fmt.o psb_d_oacc_hll_mold.o: $(UP)/psb_d_oacc_hll_mat_mod.o $(UP)/psb_d_oacc_vect_mod.o
|
||||
|
||||
psb_c_oacc_csr_vect_mv.o psb_c_oacc_csr_inner_vect_sv.o \
|
||||
psb_c_oacc_csr_scals.o \
|
||||
psb_c_oacc_csr_scal.o psb_c_oacc_csr_allocate_mnnz.o \
|
||||
psb_c_oacc_csr_reallocate_nz.o psb_c_oacc_csr_cp_from_coo.o \
|
||||
psb_c_oacc_csr_cp_from_fmt.o psb_c_oacc_csr_mv_from_coo.o \
|
||||
psb_c_oacc_csr_mv_from_fmt.o psb_c_oacc_csr_mold.o: $(UP)/psb_c_oacc_csr_mat_mod.o $(UP)/psb_c_oacc_vect_mod.o
|
||||
|
||||
psb_c_oacc_ell_vect_mv.o psb_c_oacc_ell_inner_vect_sv.o \
|
||||
psb_c_oacc_ell_scals.o \
|
||||
psb_c_oacc_ell_scal.o psb_c_oacc_ell_allocate_mnnz.o \
|
||||
psb_c_oacc_ell_reallocate_nz.o psb_c_oacc_ell_cp_from_coo.o \
|
||||
psb_c_oacc_ell_cp_from_fmt.o psb_c_oacc_ell_mv_from_coo.o \
|
||||
psb_c_oacc_ell_mv_from_fmt.o psb_c_oacc_ell_mold.o: $(UP)/psb_c_oacc_ell_mat_mod.o $(UP)/psb_c_oacc_vect_mod.o
|
||||
|
||||
psb_c_oacc_hll_vect_mv.o psb_c_oacc_hll_inner_vect_sv.o \
|
||||
psb_c_oacc_hll_scals.o \
|
||||
psb_c_oacc_hll_scal.o psb_c_oacc_hll_allocate_mnnz.o \
|
||||
psb_c_oacc_hll_reallocate_nz.o psb_c_oacc_hll_cp_from_coo.o \
|
||||
psb_c_oacc_hll_cp_from_fmt.o psb_c_oacc_hll_mv_from_coo.o \
|
||||
psb_c_oacc_hll_mv_from_fmt.o psb_c_oacc_hll_mold.o: $(UP)/psb_c_oacc_hll_mat_mod.o $(UP)/psb_c_oacc_vect_mod.o
|
||||
|
||||
|
||||
psb_z_oacc_csr_vect_mv.o psb_z_oacc_csr_inner_vect_sv.o \
|
||||
psb_z_oacc_csr_scals.o \
|
||||
psb_z_oacc_csr_scal.o psb_z_oacc_csr_allocate_mnnz.o \
|
||||
psb_z_oacc_csr_reallocate_nz.o psb_z_oacc_csr_cp_from_coo.o \
|
||||
psb_z_oacc_csr_cp_from_fmt.o psb_z_oacc_csr_mv_from_coo.o \
|
||||
psb_z_oacc_csr_mv_from_fmt.o psb_z_oacc_csr_mold.o: $(UP)/psb_z_oacc_csr_mat_mod.o $(UP)/psb_z_oacc_vect_mod.o
|
||||
|
||||
psb_z_oacc_ell_vect_mv.o psb_z_oacc_ell_inner_vect_sv.o \
|
||||
psb_z_oacc_ell_scals.o \
|
||||
psb_z_oacc_ell_scal.o psb_z_oacc_ell_allocate_mnnz.o \
|
||||
psb_z_oacc_ell_reallocate_nz.o psb_z_oacc_ell_cp_from_coo.o \
|
||||
psb_z_oacc_ell_cp_from_fmt.o psb_z_oacc_ell_mv_from_coo.o \
|
||||
psb_z_oacc_ell_mv_from_fmt.o psb_z_oacc_ell_mold.o: $(UP)/psb_z_oacc_ell_mat_mod.o $(UP)/psb_z_oacc_vect_mod.o
|
||||
|
||||
psb_z_oacc_hll_vect_mv.o psb_z_oacc_hll_inner_vect_sv.o \
|
||||
psb_z_oacc_hll_scals.o \
|
||||
psb_z_oacc_hll_scal.o psb_z_oacc_hll_allocate_mnnz.o \
|
||||
psb_z_oacc_hll_reallocate_nz.o psb_z_oacc_hll_cp_from_coo.o \
|
||||
psb_z_oacc_hll_cp_from_fmt.o psb_z_oacc_hll_mv_from_coo.o \
|
||||
psb_z_oacc_hll_mv_from_fmt.o psb_z_oacc_hll_mold.o: $(UP)/psb_z_oacc_hll_mat_mod.o $(UP)/psb_z_oacc_vect_mod.o
|
||||
|
||||
psb_s_oacc_mlt_v_2.o psb_s_oacc_mlt_v.o: $(UP)/psb_s_oacc_vect_mod.o
|
||||
psb_d_oacc_mlt_v_2.o psb_d_oacc_mlt_v.o: $(UP)/psb_d_oacc_vect_mod.o
|
||||
psb_c_oacc_mlt_v_2.o psb_c_oacc_mlt_v.o: $(UP)/psb_c_oacc_vect_mod.o
|
||||
psb_z_oacc_mlt_v_2.o psb_z_oacc_mlt_v.o: $(UP)/psb_z_oacc_vect_mod.o
|
||||
|
||||
|
||||
clean:
|
||||
/bin/rm -f $(OBJS) *.smod
|
||||
|
||||
.c.o:
|
||||
$(CC) $(CCOPT) $(CCOPENACC) $(CINCLUDES) $(CDEFINES) -c $< -o $@
|
||||
.f90.o:
|
||||
$(FC) $(FCOPT) $(FCOPENACC) $(FINCLUDES) -c $< -o $@
|
||||
.F90.o:
|
||||
$(FC) $(FCOPT) $(FCOPENACC) $(FINCLUDES) $(FDEFINES) -c $< -o $@
|
||||
.cpp.o:
|
||||
$(CXX) $(CXXOPT) $(CXXOPENACC) $(CXXINCLUDES) $(CXXDEFINES) -c $< -o $@
|
@ -0,0 +1,29 @@
|
||||
submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_allocate_mnnz_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_c_oacc_csr_allocate_mnnz(m, n, a, nz)
|
||||
implicit none
|
||||
integer(psb_ipk_), intent(in) :: m, n
|
||||
class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
|
||||
integer(psb_ipk_), intent(in), optional :: nz
|
||||
integer(psb_ipk_) :: info
|
||||
integer(psb_ipk_) :: err_act, nz_
|
||||
character(len=20) :: name='allocate_mnz'
|
||||
logical, parameter :: debug=.false.
|
||||
|
||||
call psb_erractionsave(err_act)
|
||||
info = psb_success_
|
||||
|
||||
call a%psb_c_csr_sparse_mat%allocate(m, n, nz)
|
||||
call a%set_host()
|
||||
call a%sync_dev_space()
|
||||
if (info /= 0) goto 9999
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
return
|
||||
|
||||
end subroutine psb_c_oacc_csr_allocate_mnnz
|
||||
end submodule psb_c_oacc_csr_allocate_mnnz_impl
|
@ -0,0 +1,27 @@
|
||||
submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_cp_from_coo_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_c_oacc_csr_cp_from_coo(a, b, info)
|
||||
implicit none
|
||||
|
||||
class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
|
||||
class(psb_c_coo_sparse_mat), intent(in) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
info = psb_success_
|
||||
|
||||
call a%free_dev_space()
|
||||
call a%psb_c_csr_sparse_mat%cp_from_coo(b, info)
|
||||
if (info /= 0) goto 9999
|
||||
call a%sync_dev_space()
|
||||
call a%set_host()
|
||||
call a%sync()
|
||||
|
||||
return
|
||||
|
||||
9999 continue
|
||||
info = psb_err_alloc_dealloc_
|
||||
return
|
||||
|
||||
end subroutine psb_c_oacc_csr_cp_from_coo
|
||||
end submodule psb_c_oacc_csr_cp_from_coo_impl
|
@ -0,0 +1,26 @@
|
||||
submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_cp_from_fmt_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_c_oacc_csr_cp_from_fmt(a, b, info)
|
||||
implicit none
|
||||
|
||||
class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
|
||||
class(psb_c_base_sparse_mat), intent(in) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
info = psb_success_
|
||||
|
||||
select type(b)
|
||||
type is (psb_c_coo_sparse_mat)
|
||||
call a%cp_from_coo(b, info)
|
||||
class default
|
||||
call a%free_dev_space()
|
||||
call a%psb_c_csr_sparse_mat%cp_from_fmt(b, info)
|
||||
if (info /= 0) return
|
||||
call a%sync_dev_space()
|
||||
call a%set_host()
|
||||
call a%sync()
|
||||
end select
|
||||
|
||||
end subroutine psb_c_oacc_csr_cp_from_fmt
|
||||
end submodule psb_c_oacc_csr_cp_from_fmt_impl
|
@ -0,0 +1,83 @@
|
||||
submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_inner_vect_sv_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_c_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans)
|
||||
implicit none
|
||||
class(psb_c_oacc_csr_sparse_mat), intent(in) :: a
|
||||
complex(psb_spk_), intent(in) :: alpha, beta
|
||||
class(psb_c_base_vect_type), intent(inout) :: x, y
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character, optional, intent(in) :: trans
|
||||
|
||||
complex(psb_spk_), allocatable :: rx(:), ry(:)
|
||||
logical :: tra
|
||||
character :: trans_
|
||||
integer(psb_ipk_) :: err_act
|
||||
character(len=20) :: name = 'c_oacc_csr_inner_vect_sv'
|
||||
logical, parameter :: debug = .false.
|
||||
integer(psb_ipk_) :: i
|
||||
|
||||
call psb_get_erraction(err_act)
|
||||
info = psb_success_
|
||||
|
||||
if (present(trans)) then
|
||||
trans_ = trans
|
||||
else
|
||||
trans_ = 'N'
|
||||
end if
|
||||
|
||||
if (.not.a%is_asb()) then
|
||||
info = psb_err_invalid_mat_state_
|
||||
call psb_errpush(info, name)
|
||||
goto 9999
|
||||
endif
|
||||
|
||||
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
|
||||
|
||||
if (tra .or. (beta /= dzero)) then
|
||||
call x%sync()
|
||||
call y%sync()
|
||||
call a%psb_c_csr_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans)
|
||||
call y%set_host()
|
||||
else
|
||||
select type (xx => x)
|
||||
type is (psb_c_vect_oacc)
|
||||
select type(yy => y)
|
||||
type is (psb_c_vect_oacc)
|
||||
if (xx%is_host()) call xx%sync()
|
||||
if (beta /= dzero) then
|
||||
if (yy%is_host()) call yy%sync()
|
||||
end if
|
||||
!$acc parallel loop present(a, xx, yy)
|
||||
do i = 1, size(a%val)
|
||||
yy%v(i) = alpha * a%val(i) * xx%v(a%ja(i)) + beta * yy%v(i)
|
||||
end do
|
||||
call yy%set_dev()
|
||||
class default
|
||||
rx = xx%get_vect()
|
||||
ry = y%get_vect()
|
||||
call a%psb_c_csr_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
|
||||
call y%bld(ry)
|
||||
end select
|
||||
class default
|
||||
rx = x%get_vect()
|
||||
ry = y%get_vect()
|
||||
call a%psb_c_csr_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
|
||||
call y%bld(ry)
|
||||
end select
|
||||
endif
|
||||
|
||||
if (info /= psb_success_) then
|
||||
info = psb_err_from_subroutine_
|
||||
call psb_errpush(info, name, a_err = 'csrg_vect_sv')
|
||||
goto 9999
|
||||
endif
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
return
|
||||
end subroutine psb_c_oacc_csr_inner_vect_sv
|
||||
end submodule psb_c_oacc_csr_inner_vect_sv_impl
|
||||
|
@ -0,0 +1,35 @@
|
||||
submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_mold_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_c_oacc_csr_mold(a, b, info)
|
||||
implicit none
|
||||
class(psb_c_oacc_csr_sparse_mat), intent(in) :: a
|
||||
class(psb_c_base_sparse_mat), intent(inout), allocatable :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
integer(psb_ipk_) :: err_act
|
||||
character(len=20) :: name='csr_mold'
|
||||
logical, parameter :: debug=.false.
|
||||
|
||||
call psb_get_erraction(err_act)
|
||||
|
||||
info = 0
|
||||
if (allocated(b)) then
|
||||
call b%free()
|
||||
deallocate(b, stat=info)
|
||||
end if
|
||||
if (info == 0) allocate(psb_c_oacc_csr_sparse_mat :: b, stat=info)
|
||||
|
||||
if (info /= psb_success_) then
|
||||
info = psb_err_alloc_dealloc_
|
||||
call psb_errpush(info, name)
|
||||
goto 9999
|
||||
end if
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
end subroutine psb_c_oacc_csr_mold
|
||||
end submodule psb_c_oacc_csr_mold_impl
|
||||
|
@ -0,0 +1,27 @@
|
||||
submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_mv_from_coo_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_c_oacc_csr_mv_from_coo(a, b, info)
|
||||
implicit none
|
||||
|
||||
class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
|
||||
class(psb_c_coo_sparse_mat), intent(inout) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
info = psb_success_
|
||||
|
||||
call a%free_dev_space()
|
||||
call a%psb_c_csr_sparse_mat%mv_from_coo(b, info)
|
||||
if (info /= 0) goto 9999
|
||||
call a%sync_dev_space()
|
||||
call a%set_host()
|
||||
call a%sync()
|
||||
|
||||
return
|
||||
|
||||
9999 continue
|
||||
info = psb_err_alloc_dealloc_
|
||||
return
|
||||
|
||||
end subroutine psb_c_oacc_csr_mv_from_coo
|
||||
end submodule psb_c_oacc_csr_mv_from_coo_impl
|
@ -0,0 +1,26 @@
|
||||
submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_mv_from_fmt_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_c_oacc_csr_mv_from_fmt(a, b, info)
|
||||
implicit none
|
||||
|
||||
class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
|
||||
class(psb_c_base_sparse_mat), intent(inout) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
info = psb_success_
|
||||
|
||||
select type(b)
|
||||
type is (psb_c_coo_sparse_mat)
|
||||
call a%mv_from_coo(b, info)
|
||||
class default
|
||||
call a%free_dev_space()
|
||||
call a%psb_c_csr_sparse_mat%mv_from_fmt(b, info)
|
||||
if (info /= 0) return
|
||||
call a%sync_dev_space()
|
||||
call a%set_host()
|
||||
call a%sync()
|
||||
end select
|
||||
|
||||
end subroutine psb_c_oacc_csr_mv_from_fmt
|
||||
end submodule psb_c_oacc_csr_mv_from_fmt_impl
|
@ -0,0 +1,28 @@
|
||||
submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_reallocate_nz_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_c_oacc_csr_reallocate_nz(nz, a)
|
||||
implicit none
|
||||
integer(psb_ipk_), intent(in) :: nz
|
||||
class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
|
||||
integer(psb_ipk_) :: info
|
||||
integer(psb_ipk_) :: err_act
|
||||
character(len=20) :: name='c_oacc_csr_reallocate_nz'
|
||||
logical, parameter :: debug=.false.
|
||||
|
||||
call psb_erractionsave(err_act)
|
||||
info = psb_success_
|
||||
|
||||
call a%psb_c_csr_sparse_mat%reallocate(nz)
|
||||
call a%sync_dev_space()
|
||||
call a%set_host()
|
||||
if (info /= 0) goto 9999
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
return
|
||||
|
||||
end subroutine psb_c_oacc_csr_reallocate_nz
|
||||
end submodule psb_c_oacc_csr_reallocate_nz_impl
|
@ -0,0 +1,53 @@
|
||||
submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_scal_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_c_oacc_csr_scal(d, a, info, side)
|
||||
implicit none
|
||||
class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
|
||||
complex(psb_spk_), intent(in) :: d(:)
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character, intent(in), optional :: side
|
||||
|
||||
integer(psb_ipk_) :: err_act
|
||||
character(len=20) :: name='scal'
|
||||
logical, parameter :: debug=.false.
|
||||
integer(psb_ipk_) :: i, j
|
||||
|
||||
info = psb_success_
|
||||
call psb_erractionsave(err_act)
|
||||
|
||||
if (a%is_host()) call a%sync()
|
||||
|
||||
if (present(side)) then
|
||||
if (side == 'L') then
|
||||
!$acc parallel loop present(a, d)
|
||||
do i = 1, a%get_nrows()
|
||||
do j = a%irp(i), a%irp(i+1) - 1
|
||||
a%val(j) = a%val(j) * d(i)
|
||||
end do
|
||||
end do
|
||||
else if (side == 'R') then
|
||||
!$acc parallel loop present(a, d)
|
||||
do i = 1, a%get_ncols()
|
||||
do j = a%irp(i), a%irp(i+1) - 1
|
||||
a%val(j) = a%val(j) * d(a%ja(j))
|
||||
end do
|
||||
end do
|
||||
end if
|
||||
else
|
||||
!$acc parallel loop present(a, d)
|
||||
do i = 1, size(a%val)
|
||||
a%val(i) = a%val(i) * d(i)
|
||||
end do
|
||||
end if
|
||||
|
||||
call a%set_dev()
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
return
|
||||
|
||||
end subroutine psb_c_oacc_csr_scal
|
||||
end submodule psb_c_oacc_csr_scal_impl
|
@ -0,0 +1,34 @@
|
||||
submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_scals_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_c_oacc_csr_scals(d, a, info)
|
||||
implicit none
|
||||
class(psb_c_oacc_csr_sparse_mat), intent(inout) :: a
|
||||
complex(psb_spk_), intent(in) :: d
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
integer(psb_ipk_) :: err_act
|
||||
character(len=20) :: name='scal'
|
||||
logical, parameter :: debug=.false.
|
||||
integer(psb_ipk_) :: i
|
||||
|
||||
info = psb_success_
|
||||
call psb_erractionsave(err_act)
|
||||
|
||||
if (a%is_host()) call a%sync()
|
||||
|
||||
!$acc parallel loop present(a)
|
||||
do i = 1, size(a%val)
|
||||
a%val(i) = a%val(i) * d
|
||||
end do
|
||||
|
||||
call a%set_dev()
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
return
|
||||
|
||||
end subroutine psb_c_oacc_csr_scals
|
||||
end submodule psb_c_oacc_csr_scals_impl
|
@ -0,0 +1,86 @@
|
||||
submodule (psb_c_oacc_csr_mat_mod) psb_c_oacc_csr_vect_mv_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_c_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans)
|
||||
implicit none
|
||||
|
||||
complex(psb_spk_), intent(in) :: alpha, beta
|
||||
class(psb_c_oacc_csr_sparse_mat), intent(in) :: a
|
||||
class(psb_c_base_vect_type), intent(inout) :: x, y
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character, optional, intent(in) :: trans
|
||||
|
||||
integer(psb_ipk_) :: m, n
|
||||
character :: trans_
|
||||
logical :: device_done, tra
|
||||
|
||||
info = psb_success_
|
||||
m = a%get_nrows()
|
||||
n = a%get_ncols()
|
||||
|
||||
if ((n > size(x%v)) .or. (m > size(y%v))) then
|
||||
write(0,*) 'ocsrmv Size error ', m, n, size(x%v), size(y%v)
|
||||
info = psb_err_invalid_mat_state_
|
||||
return
|
||||
end if
|
||||
device_done = .false.
|
||||
if (present(trans)) then
|
||||
trans_ = trans
|
||||
else
|
||||
trans_ = 'N'
|
||||
end if
|
||||
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
|
||||
|
||||
if (.not.tra) then
|
||||
select type(xx => x)
|
||||
class is (psb_c_vect_oacc)
|
||||
select type (yy => y)
|
||||
class is (psb_c_vect_oacc)
|
||||
if (a%is_host()) call a%sync()
|
||||
if (xx%is_host()) call xx%sync()
|
||||
if (yy%is_host()) call yy%sync()
|
||||
call inner_spmv(m, n, alpha, a%val, a%ja, a%irp, x%v, beta, y%v, info)
|
||||
call y%set_dev()
|
||||
device_done = .true.
|
||||
end select
|
||||
end select
|
||||
end if
|
||||
|
||||
if (.not.device_done) then
|
||||
if (x%is_dev()) call x%sync()
|
||||
if (y%is_dev()) call y%sync()
|
||||
call a%psb_c_csr_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans)
|
||||
call y%set_host()
|
||||
end if
|
||||
contains
|
||||
|
||||
subroutine inner_spmv(m, n, alpha, val, ja, irp, x, beta, y, info)
|
||||
implicit none
|
||||
integer(psb_ipk_) :: m, n
|
||||
complex(psb_spk_), intent(in) :: alpha, beta
|
||||
complex(psb_spk_) :: val(:), x(:), y(:)
|
||||
integer(psb_ipk_) :: ja(:), irp(:)
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
integer(psb_ipk_) :: i, j, ii, isz
|
||||
complex(psb_spk_) :: tmp
|
||||
integer(psb_ipk_), parameter :: vsz = 256
|
||||
|
||||
info = 0
|
||||
|
||||
!$acc parallel loop vector_length(vsz) private(isz)
|
||||
do ii = 1, m, vsz
|
||||
isz = min(vsz, m - ii + 1)
|
||||
!$acc loop independent private(tmp)
|
||||
do i = ii, ii + isz - 1
|
||||
tmp = 0.0_psb_dpk_
|
||||
!$acc loop seq
|
||||
do j = irp(i), irp(i + 1) - 1
|
||||
tmp = tmp + val(j) * x(ja(j))
|
||||
end do
|
||||
y(i) = alpha * tmp + beta * y(i)
|
||||
end do
|
||||
end do
|
||||
end subroutine inner_spmv
|
||||
|
||||
end subroutine psb_c_oacc_csr_vect_mv
|
||||
end submodule psb_c_oacc_csr_vect_mv_impl
|
@ -0,0 +1,35 @@
|
||||
submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_allocate_mnnz_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_c_oacc_ell_allocate_mnnz(m, n, a, nz)
|
||||
implicit none
|
||||
integer(psb_ipk_), intent(in) :: m, n
|
||||
class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
|
||||
integer(psb_ipk_), intent(in), optional :: nz
|
||||
integer(psb_ipk_) :: info
|
||||
integer(psb_ipk_) :: err_act, nz_
|
||||
character(len=20) :: name='allocate_mnnz'
|
||||
logical, parameter :: debug=.false.
|
||||
|
||||
call psb_erractionsave(err_act)
|
||||
info = psb_success_
|
||||
|
||||
if (present(nz)) then
|
||||
nz_ = nz
|
||||
else
|
||||
nz_ = 10
|
||||
end if
|
||||
|
||||
call a%psb_c_ell_sparse_mat%allocate(m, n, nz_)
|
||||
call a%sync_dev_space()
|
||||
call a%set_host()
|
||||
if (info /= 0) goto 9999
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
return
|
||||
|
||||
end subroutine psb_c_oacc_ell_allocate_mnnz
|
||||
end submodule psb_c_oacc_ell_allocate_mnnz_impl
|
@ -0,0 +1,27 @@
|
||||
submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_cp_from_coo_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_c_oacc_ell_cp_from_coo(a, b, info)
|
||||
implicit none
|
||||
|
||||
class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
|
||||
class(psb_c_coo_sparse_mat), intent(in) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
|
||||
info = psb_success_
|
||||
call a%free_dev_space()
|
||||
call a%psb_c_ell_sparse_mat%cp_from_coo(b, info)
|
||||
if (info /= 0) goto 9999
|
||||
call a%sync_dev_space()
|
||||
call a%set_host()
|
||||
call a%sync()
|
||||
|
||||
return
|
||||
|
||||
9999 continue
|
||||
info = psb_err_alloc_dealloc_
|
||||
return
|
||||
|
||||
end subroutine psb_c_oacc_ell_cp_from_coo
|
||||
end submodule psb_c_oacc_ell_cp_from_coo_impl
|
@ -0,0 +1,26 @@
|
||||
submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_cp_from_fmt_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_c_oacc_ell_cp_from_fmt(a, b, info)
|
||||
implicit none
|
||||
|
||||
class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
|
||||
class(psb_c_base_sparse_mat), intent(in) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
info = psb_success_
|
||||
|
||||
select type(b)
|
||||
type is (psb_c_coo_sparse_mat)
|
||||
call a%cp_from_coo(b, info)
|
||||
class default
|
||||
call a%free_dev_space()
|
||||
call a%psb_c_ell_sparse_mat%cp_from_fmt(b, info)
|
||||
if (info /= 0) return
|
||||
call a%sync_dev_space()
|
||||
call a%set_host()
|
||||
call a%sync()
|
||||
end select
|
||||
|
||||
end subroutine psb_c_oacc_ell_cp_from_fmt
|
||||
end submodule psb_c_oacc_ell_cp_from_fmt_impl
|
@ -0,0 +1,85 @@
|
||||
submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_inner_vect_sv_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_c_oacc_ell_inner_vect_sv(alpha, a, x, beta, y, info, trans)
|
||||
implicit none
|
||||
class(psb_c_oacc_ell_sparse_mat), intent(in) :: a
|
||||
complex(psb_spk_), intent(in) :: alpha, beta
|
||||
class(psb_c_base_vect_type), intent(inout) :: x, y
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character, optional, intent(in) :: trans
|
||||
|
||||
complex(psb_spk_), allocatable :: rx(:), ry(:)
|
||||
logical :: tra
|
||||
character :: trans_
|
||||
integer(psb_ipk_) :: err_act
|
||||
character(len=20) :: name = 'c_oacc_ell_inner_vect_sv'
|
||||
logical, parameter :: debug = .false.
|
||||
integer(psb_ipk_) :: i, j, nzt
|
||||
|
||||
call psb_get_erraction(err_act)
|
||||
info = psb_success_
|
||||
|
||||
if (present(trans)) then
|
||||
trans_ = trans
|
||||
else
|
||||
trans_ = 'N'
|
||||
end if
|
||||
|
||||
if (.not.a%is_asb()) then
|
||||
info = psb_err_invalid_mat_state_
|
||||
call psb_errpush(info, name)
|
||||
goto 9999
|
||||
endif
|
||||
|
||||
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
|
||||
|
||||
if (tra .or. (beta /= dzero)) then
|
||||
call x%sync()
|
||||
call y%sync()
|
||||
call a%psb_c_ell_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans)
|
||||
call y%set_host()
|
||||
else
|
||||
select type (xx => x)
|
||||
type is (psb_c_vect_oacc)
|
||||
select type(yy => y)
|
||||
type is (psb_c_vect_oacc)
|
||||
if (xx%is_host()) call xx%sync()
|
||||
if (beta /= dzero) then
|
||||
if (yy%is_host()) call yy%sync()
|
||||
end if
|
||||
nzt = a%nzt
|
||||
!$acc parallel loop present(a, xx, yy)
|
||||
do i = 1, size(a%val, 1)
|
||||
do j = 1, nzt
|
||||
yy%v(i) = alpha * a%val(i, j) * xx%v(a%ja(i, j)) + beta * yy%v(i)
|
||||
end do
|
||||
end do
|
||||
call yy%set_dev()
|
||||
class default
|
||||
rx = xx%get_vect()
|
||||
ry = y%get_vect()
|
||||
call a%psb_c_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
|
||||
call y%bld(ry)
|
||||
end select
|
||||
class default
|
||||
rx = x%get_vect()
|
||||
ry = y%get_vect()
|
||||
call a%psb_c_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
|
||||
call y%bld(ry)
|
||||
end select
|
||||
endif
|
||||
|
||||
if (info /= psb_success_) then
|
||||
info = psb_err_from_subroutine_
|
||||
call psb_errpush(info, name, a_err = 'ell_vect_sv')
|
||||
goto 9999
|
||||
endif
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
return
|
||||
end subroutine psb_c_oacc_ell_inner_vect_sv
|
||||
end submodule psb_c_oacc_ell_inner_vect_sv_impl
|
@ -0,0 +1,34 @@
|
||||
submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_mold_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_c_oacc_ell_mold(a, b, info)
|
||||
implicit none
|
||||
class(psb_c_oacc_ell_sparse_mat), intent(in) :: a
|
||||
class(psb_c_base_sparse_mat), intent(inout), allocatable :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
integer(psb_ipk_) :: err_act
|
||||
character(len=20) :: name = 'ell_mold'
|
||||
logical, parameter :: debug = .false.
|
||||
|
||||
call psb_get_erraction(err_act)
|
||||
|
||||
info = 0
|
||||
if (allocated(b)) then
|
||||
call b%free()
|
||||
deallocate(b, stat=info)
|
||||
end if
|
||||
if (info == 0) allocate(psb_c_oacc_ell_sparse_mat :: b, stat=info)
|
||||
|
||||
if (info /= psb_success_) then
|
||||
info = psb_err_alloc_dealloc_
|
||||
call psb_errpush(info, name)
|
||||
goto 9999
|
||||
end if
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
end subroutine psb_c_oacc_ell_mold
|
||||
end submodule psb_c_oacc_ell_mold_impl
|
@ -0,0 +1,27 @@
|
||||
submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_mv_from_coo_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_c_oacc_ell_mv_from_coo(a, b, info)
|
||||
implicit none
|
||||
|
||||
class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
|
||||
class(psb_c_coo_sparse_mat), intent(inout) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
info = psb_success_
|
||||
|
||||
call a%free_dev_space()
|
||||
call a%psb_c_ell_sparse_mat%mv_from_coo(b, info)
|
||||
if (info /= 0) goto 9999
|
||||
call a%sync_dev_space()
|
||||
call a%set_host()
|
||||
call a%sync()
|
||||
|
||||
return
|
||||
|
||||
9999 continue
|
||||
info = psb_err_alloc_dealloc_
|
||||
return
|
||||
|
||||
end subroutine psb_c_oacc_ell_mv_from_coo
|
||||
end submodule psb_c_oacc_ell_mv_from_coo_impl
|
@ -0,0 +1,26 @@
|
||||
submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_mv_from_fmt_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_c_oacc_ell_mv_from_fmt(a, b, info)
|
||||
implicit none
|
||||
|
||||
class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
|
||||
class(psb_c_base_sparse_mat), intent(inout) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
info = psb_success_
|
||||
|
||||
select type(b)
|
||||
type is (psb_c_coo_sparse_mat)
|
||||
call a%mv_from_coo(b, info)
|
||||
class default
|
||||
call a%free_dev_space()
|
||||
call a%psb_c_ell_sparse_mat%mv_from_fmt(b, info)
|
||||
if (info /= 0) return
|
||||
call a%sync_dev_space()
|
||||
call a%set_host()
|
||||
call a%sync()
|
||||
end select
|
||||
|
||||
end subroutine psb_c_oacc_ell_mv_from_fmt
|
||||
end submodule psb_c_oacc_ell_mv_from_fmt_impl
|
@ -0,0 +1,28 @@
|
||||
submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_reallocate_nz_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_c_oacc_ell_reallocate_nz(nz, a)
|
||||
implicit none
|
||||
integer(psb_ipk_), intent(in) :: nz
|
||||
class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
|
||||
integer(psb_ipk_) :: info
|
||||
integer(psb_ipk_) :: err_act
|
||||
character(len=20) :: name='c_oacc_ell_reallocate_nz'
|
||||
logical, parameter :: debug=.false.
|
||||
|
||||
call psb_erractionsave(err_act)
|
||||
info = psb_success_
|
||||
|
||||
call a%psb_c_ell_sparse_mat%reallocate(nz)
|
||||
call a%sync_dev_space()
|
||||
call a%set_host()
|
||||
if (info /= 0) goto 9999
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
return
|
||||
|
||||
end subroutine psb_c_oacc_ell_reallocate_nz
|
||||
end submodule psb_c_oacc_ell_reallocate_nz_impl
|
@ -0,0 +1,58 @@
|
||||
submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_scal_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_c_oacc_ell_scal(d, a, info, side)
|
||||
implicit none
|
||||
class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
|
||||
complex(psb_spk_), intent(in) :: d(:)
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character, intent(in), optional :: side
|
||||
|
||||
integer(psb_ipk_) :: err_act
|
||||
character(len=20) :: name='scal'
|
||||
logical, parameter :: debug=.false.
|
||||
integer(psb_ipk_) :: i, j, m, nzt
|
||||
|
||||
info = psb_success_
|
||||
call psb_erractionsave(err_act)
|
||||
|
||||
if (a%is_host()) call a%sync()
|
||||
|
||||
m = a%get_nrows()
|
||||
nzt = a%nzt
|
||||
|
||||
if (present(side)) then
|
||||
if (side == 'L') then
|
||||
!$acc parallel loop collapse(2) present(a, d)
|
||||
do i = 1, m
|
||||
do j = 1, nzt
|
||||
a%val(i, j) = a%val(i, j) * d(i)
|
||||
end do
|
||||
end do
|
||||
else if (side == 'R') then
|
||||
!$acc parallel loop collapse(2) present(a, d)
|
||||
do i = 1, m
|
||||
do j = 1, nzt
|
||||
a%val(i, j) = a%val(i, j) * d(a%ja(i, j))
|
||||
end do
|
||||
end do
|
||||
end if
|
||||
else
|
||||
!$acc parallel loop collapse(2) present(a, d)
|
||||
do i = 1, m
|
||||
do j = 1, nzt
|
||||
a%val(i, j) = a%val(i, j) * d(j)
|
||||
end do
|
||||
end do
|
||||
end if
|
||||
|
||||
call a%set_dev()
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
return
|
||||
|
||||
end subroutine psb_c_oacc_ell_scal
|
||||
end submodule psb_c_oacc_ell_scal_impl
|
@ -0,0 +1,39 @@
|
||||
submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_scals_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_c_oacc_ell_scals(d, a, info)
|
||||
implicit none
|
||||
class(psb_c_oacc_ell_sparse_mat), intent(inout) :: a
|
||||
complex(psb_spk_), intent(in) :: d
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
integer(psb_ipk_) :: err_act
|
||||
character(len=20) :: name='scal'
|
||||
logical, parameter :: debug=.false.
|
||||
integer(psb_ipk_) :: i, j, nzt, m
|
||||
|
||||
info = psb_success_
|
||||
call psb_erractionsave(err_act)
|
||||
|
||||
if (a%is_host()) call a%sync()
|
||||
|
||||
m = a%get_nrows()
|
||||
nzt = a%nzt
|
||||
|
||||
!$acc parallel loop collapse(2) present(a)
|
||||
do i = 1, m
|
||||
do j = 1, nzt
|
||||
a%val(i, j) = a%val(i, j) * d
|
||||
end do
|
||||
end do
|
||||
|
||||
call a%set_dev()
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
return
|
||||
|
||||
end subroutine psb_c_oacc_ell_scals
|
||||
end submodule psb_c_oacc_ell_scals_impl
|
@ -0,0 +1,90 @@
|
||||
submodule (psb_c_oacc_ell_mat_mod) psb_c_oacc_ell_vect_mv_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_c_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans)
|
||||
implicit none
|
||||
|
||||
complex(psb_spk_), intent(in) :: alpha, beta
|
||||
class(psb_c_oacc_ell_sparse_mat), intent(in) :: a
|
||||
class(psb_c_base_vect_type), intent(inout) :: x, y
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character, optional, intent(in) :: trans
|
||||
|
||||
integer(psb_ipk_) :: m, n, nzt, nc
|
||||
character :: trans_
|
||||
logical :: device_done, tra
|
||||
|
||||
info = psb_success_
|
||||
m = a%get_nrows()
|
||||
n = a%get_ncols()
|
||||
nzt = a%nzt
|
||||
nc = size(a%ja,2)
|
||||
if ((n > size(x%v)) .or. (m > size(y%v))) then
|
||||
write(0,*) 'oellmv Size error ', m, n, size(x%v), size(y%v)
|
||||
info = psb_err_invalid_mat_state_
|
||||
return
|
||||
end if
|
||||
device_done = .false.
|
||||
if (present(trans)) then
|
||||
trans_ = trans
|
||||
else
|
||||
trans_ = 'N'
|
||||
end if
|
||||
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
|
||||
|
||||
if (.not.tra) then
|
||||
select type(xx => x)
|
||||
class is (psb_c_vect_oacc)
|
||||
select type (yy => y)
|
||||
class is (psb_c_vect_oacc)
|
||||
if (a%is_host()) call a%sync()
|
||||
if (xx%is_host()) call xx%sync()
|
||||
if (yy%is_host()) call yy%sync()
|
||||
call inner_spmv(m, n, nc, alpha, a%val, a%ja, x%v, beta, y%v, info)
|
||||
call y%set_dev()
|
||||
device_done = .true.
|
||||
end select
|
||||
end select
|
||||
end if
|
||||
|
||||
if (.not.device_done) then
|
||||
if (x%is_dev()) call x%sync()
|
||||
if (y%is_dev()) call y%sync()
|
||||
call a%psb_c_ell_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans)
|
||||
call y%set_host()
|
||||
end if
|
||||
|
||||
contains
|
||||
|
||||
subroutine inner_spmv(m, n, nc, alpha, val, ja, x, beta, y, info)
|
||||
implicit none
|
||||
integer(psb_ipk_) :: m, n, nc
|
||||
complex(psb_spk_), intent(in) :: alpha, beta
|
||||
complex(psb_spk_) :: val(:,:), x(:), y(:)
|
||||
integer(psb_ipk_) :: ja(:,:)
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
integer(psb_ipk_) :: i, j, ii, isz
|
||||
complex(psb_spk_) :: tmp
|
||||
integer(psb_ipk_), parameter :: vsz = 256
|
||||
|
||||
info = 0
|
||||
|
||||
!$acc parallel loop vector_length(vsz) private(isz)
|
||||
do ii = 1, m, vsz
|
||||
isz = min(vsz, m - ii + 1)
|
||||
!$acc loop independent private(tmp)
|
||||
do i = ii, ii + isz - 1
|
||||
tmp = 0.0_psb_dpk_
|
||||
!$acc loop seq
|
||||
do j = 1, nc
|
||||
if (ja(i,j) > 0) then
|
||||
tmp = tmp + val(i,j) * x(ja(i,j))
|
||||
end if
|
||||
end do
|
||||
y(i) = alpha * tmp + beta * y(i)
|
||||
end do
|
||||
end do
|
||||
end subroutine inner_spmv
|
||||
|
||||
end subroutine psb_c_oacc_ell_vect_mv
|
||||
end submodule psb_c_oacc_ell_vect_mv_impl
|
@ -0,0 +1,36 @@
|
||||
submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_allocate_mnnz_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_c_oacc_hll_allocate_mnnz(m, n, a, nz)
|
||||
implicit none
|
||||
integer(psb_ipk_), intent(in) :: m, n
|
||||
class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
|
||||
integer(psb_ipk_), intent(in), optional :: nz
|
||||
integer(psb_ipk_) :: info
|
||||
integer(psb_ipk_) :: err_act, nz_
|
||||
character(len=20) :: name='allocate_mnnz'
|
||||
logical, parameter :: debug=.false.
|
||||
integer(psb_ipk_) :: hksz, nhacks
|
||||
|
||||
call psb_erractionsave(err_act)
|
||||
info = psb_success_
|
||||
|
||||
if (present(nz)) then
|
||||
nz_ = nz
|
||||
else
|
||||
nz_ = 10
|
||||
end if
|
||||
|
||||
call a%psb_c_hll_sparse_mat%allocate(m, n, nz_)
|
||||
call a%sync_dev_space()
|
||||
call a%set_host()
|
||||
if (info /= 0) goto 9999
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
return
|
||||
|
||||
end subroutine psb_c_oacc_hll_allocate_mnnz
|
||||
end submodule psb_c_oacc_hll_allocate_mnnz_impl
|
@ -0,0 +1,27 @@
|
||||
submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_cp_from_coo_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_c_oacc_hll_cp_from_coo(a, b, info)
|
||||
implicit none
|
||||
|
||||
class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
|
||||
class(psb_c_coo_sparse_mat), intent(in) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
info = psb_success_
|
||||
|
||||
call a%free_dev_space()
|
||||
call a%psb_c_hll_sparse_mat%cp_from_coo(b, info)
|
||||
if (info /= 0) goto 9999
|
||||
call a%sync_dev_space()
|
||||
call a%set_host()
|
||||
call a%sync()
|
||||
|
||||
return
|
||||
|
||||
9999 continue
|
||||
info = psb_err_alloc_dealloc_
|
||||
return
|
||||
|
||||
end subroutine psb_c_oacc_hll_cp_from_coo
|
||||
end submodule psb_c_oacc_hll_cp_from_coo_impl
|
@ -0,0 +1,26 @@
|
||||
submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_cp_from_fmt_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_c_oacc_hll_cp_from_fmt(a, b, info)
|
||||
implicit none
|
||||
|
||||
class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
|
||||
class(psb_c_base_sparse_mat), intent(in) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
info = psb_success_
|
||||
|
||||
select type(b)
|
||||
type is (psb_c_coo_sparse_mat)
|
||||
call a%cp_from_coo(b, info)
|
||||
class default
|
||||
call a%free_dev_space()
|
||||
call a%psb_c_hll_sparse_mat%cp_from_fmt(b, info)
|
||||
if (info /= 0) return
|
||||
call a%sync_dev_space()
|
||||
call a%set_host()
|
||||
call a%sync()
|
||||
end select
|
||||
|
||||
end subroutine psb_c_oacc_hll_cp_from_fmt
|
||||
end submodule psb_c_oacc_hll_cp_from_fmt_impl
|
@ -0,0 +1,86 @@
|
||||
submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_inner_vect_sv_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_c_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans)
|
||||
implicit none
|
||||
class(psb_c_oacc_hll_sparse_mat), intent(in) :: a
|
||||
complex(psb_spk_), intent(in) :: alpha, beta
|
||||
class(psb_c_base_vect_type), intent(inout) :: x, y
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character, optional, intent(in) :: trans
|
||||
|
||||
complex(psb_spk_), allocatable :: rx(:), ry(:)
|
||||
logical :: tra
|
||||
character :: trans_
|
||||
integer(psb_ipk_) :: err_act
|
||||
character(len=20) :: name = 'c_oacc_hll_inner_vect_sv'
|
||||
logical, parameter :: debug = .false.
|
||||
integer(psb_ipk_) :: i, j, nhacks, hksz
|
||||
|
||||
call psb_get_erraction(err_act)
|
||||
info = psb_success_
|
||||
|
||||
if (present(trans)) then
|
||||
trans_ = trans
|
||||
else
|
||||
trans_ = 'N'
|
||||
end if
|
||||
|
||||
if (.not.a%is_asb()) then
|
||||
info = psb_err_invalid_mat_state_
|
||||
call psb_errpush(info, name)
|
||||
goto 9999
|
||||
endif
|
||||
|
||||
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
|
||||
|
||||
if (tra .or. (beta /= dzero)) then
|
||||
call x%sync()
|
||||
call y%sync()
|
||||
call a%psb_c_hll_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans)
|
||||
call y%set_host()
|
||||
else
|
||||
select type (xx => x)
|
||||
type is (psb_c_vect_oacc)
|
||||
select type(yy => y)
|
||||
type is (psb_c_vect_oacc)
|
||||
if (xx%is_host()) call xx%sync()
|
||||
if (beta /= dzero) then
|
||||
if (yy%is_host()) call yy%sync()
|
||||
end if
|
||||
nhacks = size(a%hkoffs) - 1
|
||||
hksz = a%hksz
|
||||
!$acc parallel loop present(a, xx, yy)
|
||||
do i = 1, nhacks
|
||||
do j = a%hkoffs(i), a%hkoffs(i+1) - 1
|
||||
yy%v(a%irn(j)) = alpha * a%val(j) * xx%v(a%ja(j)) + beta * yy%v(a%irn(j))
|
||||
end do
|
||||
end do
|
||||
call yy%set_dev()
|
||||
class default
|
||||
rx = xx%get_vect()
|
||||
ry = y%get_vect()
|
||||
call a%psb_c_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
|
||||
call y%bld(ry)
|
||||
end select
|
||||
class default
|
||||
rx = x%get_vect()
|
||||
ry = y%get_vect()
|
||||
call a%psb_c_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
|
||||
call y%bld(ry)
|
||||
end select
|
||||
endif
|
||||
|
||||
if (info /= psb_success_) then
|
||||
info = psb_err_from_subroutine_
|
||||
call psb_errpush(info, name, a_err = 'hll_vect_sv')
|
||||
goto 9999
|
||||
endif
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
return
|
||||
end subroutine psb_c_oacc_hll_inner_vect_sv
|
||||
end submodule psb_c_oacc_hll_inner_vect_sv_impl
|
@ -0,0 +1,34 @@
|
||||
submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_mold_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_c_oacc_hll_mold(a, b, info)
|
||||
implicit none
|
||||
class(psb_c_oacc_hll_sparse_mat), intent(in) :: a
|
||||
class(psb_c_base_sparse_mat), intent(inout), allocatable :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
integer(psb_ipk_) :: err_act
|
||||
character(len=20) :: name = 'hll_mold'
|
||||
logical, parameter :: debug = .false.
|
||||
|
||||
call psb_get_erraction(err_act)
|
||||
|
||||
info = 0
|
||||
if (allocated(b)) then
|
||||
call b%free()
|
||||
deallocate(b, stat=info)
|
||||
end if
|
||||
if (info == 0) allocate(psb_c_oacc_hll_sparse_mat :: b, stat=info)
|
||||
|
||||
if (info /= psb_success_) then
|
||||
info = psb_err_alloc_dealloc_
|
||||
call psb_errpush(info, name)
|
||||
goto 9999
|
||||
end if
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
end subroutine psb_c_oacc_hll_mold
|
||||
end submodule psb_c_oacc_hll_mold_impl
|
@ -0,0 +1,27 @@
|
||||
submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_mv_from_coo_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_c_oacc_hll_mv_from_coo(a, b, info)
|
||||
implicit none
|
||||
|
||||
class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
|
||||
class(psb_c_coo_sparse_mat), intent(inout) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
info = psb_success_
|
||||
|
||||
call a%free_dev_space()
|
||||
call a%psb_c_hll_sparse_mat%mv_from_coo(b, info)
|
||||
if (info /= 0) goto 9999
|
||||
call a%sync_dev_space()
|
||||
call a%set_host()
|
||||
call a%sync()
|
||||
|
||||
return
|
||||
|
||||
9999 continue
|
||||
info = psb_err_alloc_dealloc_
|
||||
return
|
||||
|
||||
end subroutine psb_c_oacc_hll_mv_from_coo
|
||||
end submodule psb_c_oacc_hll_mv_from_coo_impl
|
@ -0,0 +1,25 @@
|
||||
submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_mv_from_fmt_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_c_oacc_hll_mv_from_fmt(a, b, info)
|
||||
implicit none
|
||||
|
||||
class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
|
||||
class(psb_c_base_sparse_mat), intent(inout) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
info = psb_success_
|
||||
|
||||
select type(b)
|
||||
type is (psb_c_coo_sparse_mat)
|
||||
call a%mv_from_coo(b, info)
|
||||
class default
|
||||
call a%free_dev_space()
|
||||
call a%psb_c_hll_sparse_mat%mv_from_fmt(b, info)
|
||||
if (info /= 0) return
|
||||
call a%sync_dev_space()
|
||||
call a%set_host()
|
||||
call a%sync()
|
||||
end select
|
||||
end subroutine psb_c_oacc_hll_mv_from_fmt
|
||||
end submodule psb_c_oacc_hll_mv_from_fmt_impl
|
@ -0,0 +1,29 @@
|
||||
submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_reallocate_nz_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_c_oacc_hll_reallocate_nz(nz, a)
|
||||
implicit none
|
||||
integer(psb_ipk_), intent(in) :: nz
|
||||
class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
|
||||
integer(psb_ipk_) :: info
|
||||
integer(psb_ipk_) :: err_act
|
||||
character(len=20) :: name='c_oacc_hll_reallocate_nz'
|
||||
logical, parameter :: debug=.false.
|
||||
integer(psb_ipk_) :: hksz, nhacks
|
||||
|
||||
call psb_erractionsave(err_act)
|
||||
info = psb_success_
|
||||
|
||||
call a%psb_c_hll_sparse_mat%reallocate(nz)
|
||||
call a%sync_dev_space()
|
||||
call a%set_host()
|
||||
if (info /= 0) goto 9999
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
return
|
||||
|
||||
end subroutine psb_c_oacc_hll_reallocate_nz
|
||||
end submodule psb_c_oacc_hll_reallocate_nz_impl
|
@ -0,0 +1,62 @@
|
||||
submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_scal_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_c_oacc_hll_scal(d, a, info, side)
|
||||
implicit none
|
||||
class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
|
||||
complex(psb_spk_), intent(in) :: d(:)
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character, intent(in), optional :: side
|
||||
|
||||
integer(psb_ipk_) :: err_act
|
||||
character(len=20) :: name = 'scal'
|
||||
integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks
|
||||
|
||||
info = psb_success_
|
||||
call psb_erractionsave(err_act)
|
||||
|
||||
if (a%is_host()) call a%sync()
|
||||
|
||||
hksz = a%hksz
|
||||
nhacks = (a%get_nrows() + hksz - 1) / hksz
|
||||
nzt = a%nzt
|
||||
|
||||
if (present(side)) then
|
||||
if (side == 'L') then
|
||||
! $ a parallel loop collapse(2) present(a, d)
|
||||
!$acc parallel loop present(a, d)
|
||||
do i = 1, nhacks
|
||||
do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
|
||||
k = (j - a%hkoffs(i)) / nzt + (i - 1) * hksz + 1
|
||||
a%val(j) = a%val(j) * d(k)
|
||||
end do
|
||||
end do
|
||||
else if (side == 'R') then
|
||||
! $ a parallel loop collapse(2) present(a, d)
|
||||
!$acc parallel loop present(a, d)
|
||||
do i = 1, nhacks
|
||||
do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
|
||||
a%val(j) = a%val(j) * d(a%ja(j))
|
||||
end do
|
||||
end do
|
||||
end if
|
||||
else
|
||||
! $ a parallel loop collapse(2) present(a, d)
|
||||
!$acc parallel loop present(a, d)
|
||||
do i = 1, nhacks
|
||||
do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
|
||||
a%val(j) = a%val(j) * d(j - a%hkoffs(i) + 1)
|
||||
end do
|
||||
end do
|
||||
end if
|
||||
|
||||
call a%set_dev()
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
return
|
||||
|
||||
end subroutine psb_c_oacc_hll_scal
|
||||
end submodule psb_c_oacc_hll_scal_impl
|
@ -0,0 +1,40 @@
|
||||
submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_scals_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_c_oacc_hll_scals(d, a, info)
|
||||
implicit none
|
||||
class(psb_c_oacc_hll_sparse_mat), intent(inout) :: a
|
||||
complex(psb_spk_), intent(in) :: d
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
integer(psb_ipk_) :: err_act
|
||||
character(len=20) :: name = 'scal'
|
||||
integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks
|
||||
|
||||
info = psb_success_
|
||||
call psb_erractionsave(err_act)
|
||||
|
||||
if (a%is_host()) call a%sync()
|
||||
|
||||
hksz = a%hksz
|
||||
nhacks = (a%get_nrows() + hksz - 1) / hksz
|
||||
nzt = a%nzt
|
||||
|
||||
! $ a parallel loop collapse(2) present(a)
|
||||
!$acc parallel loop present(a)
|
||||
do i = 1, nhacks
|
||||
do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
|
||||
a%val(j) = a%val(j) * d
|
||||
end do
|
||||
end do
|
||||
|
||||
call a%set_dev()
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
return
|
||||
|
||||
end subroutine psb_c_oacc_hll_scals
|
||||
end submodule psb_c_oacc_hll_scals_impl
|
@ -0,0 +1,90 @@
|
||||
submodule (psb_c_oacc_hll_mat_mod) psb_c_oacc_hll_vect_mv_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_c_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans)
|
||||
implicit none
|
||||
|
||||
complex(psb_spk_), intent(in) :: alpha, beta
|
||||
class(psb_c_oacc_hll_sparse_mat), intent(in) :: a
|
||||
class(psb_c_base_vect_type), intent(inout) :: x, y
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character, optional, intent(in) :: trans
|
||||
|
||||
integer(psb_ipk_) :: m, n, nhacks, hksz
|
||||
character :: trans_
|
||||
logical :: device_done, tra
|
||||
|
||||
info = psb_success_
|
||||
m = a%get_nrows()
|
||||
n = a%get_ncols()
|
||||
nhacks = size(a%hkoffs) - 1
|
||||
hksz = a%hksz
|
||||
|
||||
if ((n > size(x%v)) .or. (m > size(y%v))) then
|
||||
write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
|
||||
info = psb_err_invalid_mat_state_
|
||||
return
|
||||
end if
|
||||
device_done = .false.
|
||||
if (present(trans)) then
|
||||
trans_ = trans
|
||||
else
|
||||
trans_ = 'N'
|
||||
end if
|
||||
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
|
||||
|
||||
if (.not.tra) then
|
||||
select type(xx => x)
|
||||
class is (psb_c_vect_oacc)
|
||||
select type (yy => y)
|
||||
class is (psb_c_vect_oacc)
|
||||
if (a%is_host()) call a%sync()
|
||||
if (xx%is_host()) call xx%sync()
|
||||
if (yy%is_host()) call yy%sync()
|
||||
call inner_spmv(m, nhacks, hksz, alpha, a%val, a%ja, a%hkoffs, x%v, beta, y%v, info)
|
||||
call y%set_dev()
|
||||
device_done = .true.
|
||||
end select
|
||||
end select
|
||||
end if
|
||||
|
||||
if (.not.device_done) then
|
||||
if (x%is_dev()) call x%sync()
|
||||
if (y%is_dev()) call y%sync()
|
||||
call a%psb_c_hll_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans)
|
||||
call y%set_host()
|
||||
end if
|
||||
contains
|
||||
|
||||
subroutine inner_spmv(m, nhacks, hksz, alpha, val, ja, hkoffs, x, beta, y, info)
|
||||
implicit none
|
||||
integer(psb_ipk_) :: m, nhacks, hksz
|
||||
complex(psb_spk_), intent(in) :: alpha, beta
|
||||
complex(psb_spk_) :: val(:), x(:), y(:)
|
||||
integer(psb_ipk_) :: ja(:), hkoffs(:)
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
integer(psb_ipk_) :: i, j, idx, k, ipnt,ir,nr,nlc,isz,ii
|
||||
complex(psb_spk_) :: tmp
|
||||
|
||||
info = 0
|
||||
!$acc parallel loop private(nlc, isz,ir,nr)
|
||||
do i = 1, nhacks
|
||||
isz = hkoffs(i + 1) - hkoffs(i)
|
||||
nlc = isz/hksz
|
||||
ir = (i-1)*hksz
|
||||
nr = min(hksz,m-ir)
|
||||
!$acc loop independent private(tmp,ii,ipnt)
|
||||
do ii = 1, nr
|
||||
ipnt = hkoffs(i) + ii
|
||||
tmp = czero
|
||||
!$acc loop seq
|
||||
do j = 1, nlc
|
||||
tmp = tmp + val(ipnt) * x(ja(ipnt))
|
||||
ipnt = ipnt + hksz
|
||||
end do
|
||||
y(ii+ir) = alpha * tmp + beta * y(ii+ir)
|
||||
end do
|
||||
end do
|
||||
end subroutine inner_spmv
|
||||
end subroutine psb_c_oacc_hll_vect_mv
|
||||
end submodule psb_c_oacc_hll_vect_mv_impl
|
@ -0,0 +1,46 @@
|
||||
|
||||
subroutine psb_c_oacc_mlt_v(x, y, info)
|
||||
use psb_c_oacc_vect_mod, psb_protect_name => psb_c_oacc_mlt_v
|
||||
|
||||
implicit none
|
||||
class(psb_c_base_vect_type), intent(inout) :: x
|
||||
class(psb_c_vect_oacc), intent(inout) :: y
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
integer(psb_ipk_) :: i, n
|
||||
|
||||
info = 0
|
||||
n = min(x%get_nrows(), y%get_nrows())
|
||||
info = 0
|
||||
n = min(x%get_nrows(), y%get_nrows())
|
||||
select type(xx => x)
|
||||
class is (psb_c_vect_oacc)
|
||||
if (y%is_host()) call y%sync()
|
||||
if (xx%is_host()) call xx%sync()
|
||||
call c_inner_oacc_mlt_v(n,xx%v, y%v)
|
||||
!!$ !$acc parallel loop
|
||||
!!$ do i = 1, n
|
||||
!!$ y%v(i) = y%v(i) * xx%v(i)
|
||||
!!$ end do
|
||||
call y%set_dev()
|
||||
class default
|
||||
if (xx%is_dev()) call xx%sync()
|
||||
if (y%is_dev()) call y%sync()
|
||||
do i = 1, n
|
||||
y%v(i) = y%v(i) * xx%v(i)
|
||||
end do
|
||||
call y%set_host()
|
||||
end select
|
||||
contains
|
||||
subroutine c_inner_oacc_mlt_v(n,x, y)
|
||||
implicit none
|
||||
integer(psb_ipk_), intent(in) :: n
|
||||
complex(psb_spk_), intent(inout) :: x(:), y(:)
|
||||
|
||||
integer(psb_ipk_) :: i
|
||||
!$acc parallel loop present(x,y)
|
||||
do i = 1, n
|
||||
y(i) = (x(i)) * (y(i))
|
||||
end do
|
||||
end subroutine c_inner_oacc_mlt_v
|
||||
end subroutine psb_c_oacc_mlt_v
|
@ -0,0 +1,91 @@
|
||||
subroutine psb_c_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
|
||||
use psb_c_oacc_vect_mod, psb_protect_name => psb_c_oacc_mlt_v_2
|
||||
use psb_string_mod
|
||||
implicit none
|
||||
complex(psb_spk_), intent(in) :: alpha, beta
|
||||
class(psb_c_base_vect_type), intent(inout) :: x
|
||||
class(psb_c_base_vect_type), intent(inout) :: y
|
||||
class(psb_c_vect_oacc), intent(inout) :: z
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character(len=1), intent(in), optional :: conjgx, conjgy
|
||||
integer(psb_ipk_) :: i, n
|
||||
logical :: conjgx_, conjgy_, device_done
|
||||
|
||||
conjgx_ = .false.
|
||||
conjgy_ = .false.
|
||||
device_done = .false.
|
||||
if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C')
|
||||
if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C')
|
||||
|
||||
n = min(x%get_nrows(), y%get_nrows(), z%get_nrows())
|
||||
info = 0
|
||||
select type(xx => x)
|
||||
class is (psb_c_vect_oacc)
|
||||
select type (yy => y)
|
||||
class is (psb_c_vect_oacc)
|
||||
if (xx%is_host()) call xx%sync()
|
||||
if (yy%is_host()) call yy%sync()
|
||||
if ((beta /= czero) .and. (z%is_host())) call z%sync()
|
||||
call c_inner_oacc_mlt_v_2(n,alpha, xx%v, yy%v, beta, z%v, info, conjgx_, conjgy_)
|
||||
call z%set_dev()
|
||||
device_done = .true.
|
||||
end select
|
||||
end select
|
||||
if (.not.device_done) then
|
||||
if (x%is_dev()) call x%sync()
|
||||
if (y%is_dev()) call y%sync()
|
||||
if ((beta /= czero) .and. (z%is_dev())) call z%sync()
|
||||
if (conjgx_.and.conjgy_) then
|
||||
do i = 1, n
|
||||
z%v(i) = alpha * conjg(x%v(i)) * conjg(y%v(i)) + beta * z%v(i)
|
||||
end do
|
||||
else if (conjgx_.and.(.not.conjgy_)) then
|
||||
do i = 1, n
|
||||
z%v(i) = alpha * conjg(x%v(i)) * (y%v(i)) + beta * z%v(i)
|
||||
end do
|
||||
else if ((.not.conjgx_).and.(conjgy_)) then
|
||||
do i = 1, n
|
||||
z%v(i) = alpha * (x%v(i)) * conjg(y%v(i)) + beta * z%v(i)
|
||||
end do
|
||||
else
|
||||
do i = 1, n
|
||||
z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i)
|
||||
end do
|
||||
end if
|
||||
call z%set_host()
|
||||
end if
|
||||
|
||||
contains
|
||||
subroutine c_inner_oacc_mlt_v_2(n,alpha, x, y, beta, z, info, conjgx, conjgy)
|
||||
implicit none
|
||||
integer(psb_ipk_), intent(in) :: n
|
||||
complex(psb_spk_), intent(in) :: alpha, beta
|
||||
complex(psb_spk_), intent(inout) :: x(:), y(:), z(:)
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
logical, intent(in) :: conjgx, conjgy
|
||||
|
||||
integer(psb_ipk_) :: i
|
||||
if (conjgx.and.conjgy) then
|
||||
!$acc parallel loop present(x,y,z)
|
||||
do i = 1, n
|
||||
z(i) = alpha * conjg(x(i)) * conjg(y(i)) + beta * z(i)
|
||||
end do
|
||||
else if (conjgx.and.(.not.conjgy)) then
|
||||
!$acc parallel loop present(x,y,z)
|
||||
do i = 1, n
|
||||
z(i) = alpha * conjg(x(i)) * (y(i)) + beta * z(i)
|
||||
end do
|
||||
else if ((.not.conjgx).and.(conjgy)) then
|
||||
!$acc parallel loop present(x,y,z)
|
||||
do i = 1, n
|
||||
z(i) = alpha * (x(i)) * conjg(y(i)) + beta * z(i)
|
||||
end do
|
||||
else
|
||||
!$acc parallel loop present(x,y,z)
|
||||
do i = 1, n
|
||||
z(i) = alpha * (x(i)) * (y(i)) + beta * z(i)
|
||||
end do
|
||||
end if
|
||||
end subroutine c_inner_oacc_mlt_v_2
|
||||
end subroutine psb_c_oacc_mlt_v_2
|
||||
|
@ -0,0 +1,29 @@
|
||||
submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_allocate_mnnz_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_d_oacc_csr_allocate_mnnz(m, n, a, nz)
|
||||
implicit none
|
||||
integer(psb_ipk_), intent(in) :: m, n
|
||||
class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
|
||||
integer(psb_ipk_), intent(in), optional :: nz
|
||||
integer(psb_ipk_) :: info
|
||||
integer(psb_ipk_) :: err_act, nz_
|
||||
character(len=20) :: name='allocate_mnz'
|
||||
logical, parameter :: debug=.false.
|
||||
|
||||
call psb_erractionsave(err_act)
|
||||
info = psb_success_
|
||||
|
||||
call a%psb_d_csr_sparse_mat%allocate(m, n, nz)
|
||||
call a%set_host()
|
||||
call a%sync_dev_space()
|
||||
if (info /= 0) goto 9999
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
return
|
||||
|
||||
end subroutine psb_d_oacc_csr_allocate_mnnz
|
||||
end submodule psb_d_oacc_csr_allocate_mnnz_impl
|
@ -0,0 +1,27 @@
|
||||
submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_cp_from_coo_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_d_oacc_csr_cp_from_coo(a, b, info)
|
||||
implicit none
|
||||
|
||||
class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
|
||||
class(psb_d_coo_sparse_mat), intent(in) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
info = psb_success_
|
||||
|
||||
call a%free_dev_space()
|
||||
call a%psb_d_csr_sparse_mat%cp_from_coo(b, info)
|
||||
if (info /= 0) goto 9999
|
||||
call a%sync_dev_space()
|
||||
call a%set_host()
|
||||
call a%sync()
|
||||
|
||||
return
|
||||
|
||||
9999 continue
|
||||
info = psb_err_alloc_dealloc_
|
||||
return
|
||||
|
||||
end subroutine psb_d_oacc_csr_cp_from_coo
|
||||
end submodule psb_d_oacc_csr_cp_from_coo_impl
|
@ -0,0 +1,26 @@
|
||||
submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_cp_from_fmt_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_d_oacc_csr_cp_from_fmt(a, b, info)
|
||||
implicit none
|
||||
|
||||
class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
|
||||
class(psb_d_base_sparse_mat), intent(in) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
info = psb_success_
|
||||
|
||||
select type(b)
|
||||
type is (psb_d_coo_sparse_mat)
|
||||
call a%cp_from_coo(b, info)
|
||||
class default
|
||||
call a%free_dev_space()
|
||||
call a%psb_d_csr_sparse_mat%cp_from_fmt(b, info)
|
||||
if (info /= 0) return
|
||||
call a%sync_dev_space()
|
||||
call a%set_host()
|
||||
call a%sync()
|
||||
end select
|
||||
|
||||
end subroutine psb_d_oacc_csr_cp_from_fmt
|
||||
end submodule psb_d_oacc_csr_cp_from_fmt_impl
|
@ -0,0 +1,83 @@
|
||||
submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_inner_vect_sv_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_d_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans)
|
||||
implicit none
|
||||
class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
|
||||
real(psb_dpk_), intent(in) :: alpha, beta
|
||||
class(psb_d_base_vect_type), intent(inout) :: x, y
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character, optional, intent(in) :: trans
|
||||
|
||||
real(psb_dpk_), allocatable :: rx(:), ry(:)
|
||||
logical :: tra
|
||||
character :: trans_
|
||||
integer(psb_ipk_) :: err_act
|
||||
character(len=20) :: name = 'd_oacc_csr_inner_vect_sv'
|
||||
logical, parameter :: debug = .false.
|
||||
integer(psb_ipk_) :: i
|
||||
|
||||
call psb_get_erraction(err_act)
|
||||
info = psb_success_
|
||||
|
||||
if (present(trans)) then
|
||||
trans_ = trans
|
||||
else
|
||||
trans_ = 'N'
|
||||
end if
|
||||
|
||||
if (.not.a%is_asb()) then
|
||||
info = psb_err_invalid_mat_state_
|
||||
call psb_errpush(info, name)
|
||||
goto 9999
|
||||
endif
|
||||
|
||||
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
|
||||
|
||||
if (tra .or. (beta /= dzero)) then
|
||||
call x%sync()
|
||||
call y%sync()
|
||||
call a%psb_d_csr_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans)
|
||||
call y%set_host()
|
||||
else
|
||||
select type (xx => x)
|
||||
type is (psb_d_vect_oacc)
|
||||
select type(yy => y)
|
||||
type is (psb_d_vect_oacc)
|
||||
if (xx%is_host()) call xx%sync()
|
||||
if (beta /= dzero) then
|
||||
if (yy%is_host()) call yy%sync()
|
||||
end if
|
||||
!$acc parallel loop present(a, xx, yy)
|
||||
do i = 1, size(a%val)
|
||||
yy%v(i) = alpha * a%val(i) * xx%v(a%ja(i)) + beta * yy%v(i)
|
||||
end do
|
||||
call yy%set_dev()
|
||||
class default
|
||||
rx = xx%get_vect()
|
||||
ry = y%get_vect()
|
||||
call a%psb_d_csr_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
|
||||
call y%bld(ry)
|
||||
end select
|
||||
class default
|
||||
rx = x%get_vect()
|
||||
ry = y%get_vect()
|
||||
call a%psb_d_csr_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
|
||||
call y%bld(ry)
|
||||
end select
|
||||
endif
|
||||
|
||||
if (info /= psb_success_) then
|
||||
info = psb_err_from_subroutine_
|
||||
call psb_errpush(info, name, a_err = 'csrg_vect_sv')
|
||||
goto 9999
|
||||
endif
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
return
|
||||
end subroutine psb_d_oacc_csr_inner_vect_sv
|
||||
end submodule psb_d_oacc_csr_inner_vect_sv_impl
|
||||
|
@ -0,0 +1,35 @@
|
||||
submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_mold_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_d_oacc_csr_mold(a, b, info)
|
||||
implicit none
|
||||
class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
|
||||
class(psb_d_base_sparse_mat), intent(inout), allocatable :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
integer(psb_ipk_) :: err_act
|
||||
character(len=20) :: name='csr_mold'
|
||||
logical, parameter :: debug=.false.
|
||||
|
||||
call psb_get_erraction(err_act)
|
||||
|
||||
info = 0
|
||||
if (allocated(b)) then
|
||||
call b%free()
|
||||
deallocate(b, stat=info)
|
||||
end if
|
||||
if (info == 0) allocate(psb_d_oacc_csr_sparse_mat :: b, stat=info)
|
||||
|
||||
if (info /= psb_success_) then
|
||||
info = psb_err_alloc_dealloc_
|
||||
call psb_errpush(info, name)
|
||||
goto 9999
|
||||
end if
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
end subroutine psb_d_oacc_csr_mold
|
||||
end submodule psb_d_oacc_csr_mold_impl
|
||||
|
@ -0,0 +1,27 @@
|
||||
submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_mv_from_coo_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_d_oacc_csr_mv_from_coo(a, b, info)
|
||||
implicit none
|
||||
|
||||
class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
|
||||
class(psb_d_coo_sparse_mat), intent(inout) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
info = psb_success_
|
||||
|
||||
call a%free_dev_space()
|
||||
call a%psb_d_csr_sparse_mat%mv_from_coo(b, info)
|
||||
if (info /= 0) goto 9999
|
||||
call a%sync_dev_space()
|
||||
call a%set_host()
|
||||
call a%sync()
|
||||
|
||||
return
|
||||
|
||||
9999 continue
|
||||
info = psb_err_alloc_dealloc_
|
||||
return
|
||||
|
||||
end subroutine psb_d_oacc_csr_mv_from_coo
|
||||
end submodule psb_d_oacc_csr_mv_from_coo_impl
|
@ -0,0 +1,26 @@
|
||||
submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_mv_from_fmt_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_d_oacc_csr_mv_from_fmt(a, b, info)
|
||||
implicit none
|
||||
|
||||
class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
|
||||
class(psb_d_base_sparse_mat), intent(inout) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
info = psb_success_
|
||||
|
||||
select type(b)
|
||||
type is (psb_d_coo_sparse_mat)
|
||||
call a%mv_from_coo(b, info)
|
||||
class default
|
||||
call a%free_dev_space()
|
||||
call a%psb_d_csr_sparse_mat%mv_from_fmt(b, info)
|
||||
if (info /= 0) return
|
||||
call a%sync_dev_space()
|
||||
call a%set_host()
|
||||
call a%sync()
|
||||
end select
|
||||
|
||||
end subroutine psb_d_oacc_csr_mv_from_fmt
|
||||
end submodule psb_d_oacc_csr_mv_from_fmt_impl
|
@ -0,0 +1,28 @@
|
||||
submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_reallocate_nz_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_d_oacc_csr_reallocate_nz(nz, a)
|
||||
implicit none
|
||||
integer(psb_ipk_), intent(in) :: nz
|
||||
class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
|
||||
integer(psb_ipk_) :: info
|
||||
integer(psb_ipk_) :: err_act
|
||||
character(len=20) :: name='d_oacc_csr_reallocate_nz'
|
||||
logical, parameter :: debug=.false.
|
||||
|
||||
call psb_erractionsave(err_act)
|
||||
info = psb_success_
|
||||
|
||||
call a%psb_d_csr_sparse_mat%reallocate(nz)
|
||||
call a%sync_dev_space()
|
||||
call a%set_host()
|
||||
if (info /= 0) goto 9999
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
return
|
||||
|
||||
end subroutine psb_d_oacc_csr_reallocate_nz
|
||||
end submodule psb_d_oacc_csr_reallocate_nz_impl
|
@ -0,0 +1,53 @@
|
||||
submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_scal_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_d_oacc_csr_scal(d, a, info, side)
|
||||
implicit none
|
||||
class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
|
||||
real(psb_dpk_), intent(in) :: d(:)
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character, intent(in), optional :: side
|
||||
|
||||
integer(psb_ipk_) :: err_act
|
||||
character(len=20) :: name='scal'
|
||||
logical, parameter :: debug=.false.
|
||||
integer(psb_ipk_) :: i, j
|
||||
|
||||
info = psb_success_
|
||||
call psb_erractionsave(err_act)
|
||||
|
||||
if (a%is_host()) call a%sync()
|
||||
|
||||
if (present(side)) then
|
||||
if (side == 'L') then
|
||||
!$acc parallel loop present(a, d)
|
||||
do i = 1, a%get_nrows()
|
||||
do j = a%irp(i), a%irp(i+1) - 1
|
||||
a%val(j) = a%val(j) * d(i)
|
||||
end do
|
||||
end do
|
||||
else if (side == 'R') then
|
||||
!$acc parallel loop present(a, d)
|
||||
do i = 1, a%get_ncols()
|
||||
do j = a%irp(i), a%irp(i+1) - 1
|
||||
a%val(j) = a%val(j) * d(a%ja(j))
|
||||
end do
|
||||
end do
|
||||
end if
|
||||
else
|
||||
!$acc parallel loop present(a, d)
|
||||
do i = 1, size(a%val)
|
||||
a%val(i) = a%val(i) * d(i)
|
||||
end do
|
||||
end if
|
||||
|
||||
call a%set_dev()
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
return
|
||||
|
||||
end subroutine psb_d_oacc_csr_scal
|
||||
end submodule psb_d_oacc_csr_scal_impl
|
@ -0,0 +1,34 @@
|
||||
submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_scals_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_d_oacc_csr_scals(d, a, info)
|
||||
implicit none
|
||||
class(psb_d_oacc_csr_sparse_mat), intent(inout) :: a
|
||||
real(psb_dpk_), intent(in) :: d
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
integer(psb_ipk_) :: err_act
|
||||
character(len=20) :: name='scal'
|
||||
logical, parameter :: debug=.false.
|
||||
integer(psb_ipk_) :: i
|
||||
|
||||
info = psb_success_
|
||||
call psb_erractionsave(err_act)
|
||||
|
||||
if (a%is_host()) call a%sync()
|
||||
|
||||
!$acc parallel loop present(a)
|
||||
do i = 1, size(a%val)
|
||||
a%val(i) = a%val(i) * d
|
||||
end do
|
||||
|
||||
call a%set_dev()
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
return
|
||||
|
||||
end subroutine psb_d_oacc_csr_scals
|
||||
end submodule psb_d_oacc_csr_scals_impl
|
@ -0,0 +1,86 @@
|
||||
submodule (psb_d_oacc_csr_mat_mod) psb_d_oacc_csr_vect_mv_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_d_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans)
|
||||
implicit none
|
||||
|
||||
real(psb_dpk_), intent(in) :: alpha, beta
|
||||
class(psb_d_oacc_csr_sparse_mat), intent(in) :: a
|
||||
class(psb_d_base_vect_type), intent(inout) :: x, y
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character, optional, intent(in) :: trans
|
||||
|
||||
integer(psb_ipk_) :: m, n
|
||||
character :: trans_
|
||||
logical :: device_done, tra
|
||||
|
||||
info = psb_success_
|
||||
m = a%get_nrows()
|
||||
n = a%get_ncols()
|
||||
|
||||
if ((n > size(x%v)) .or. (m > size(y%v))) then
|
||||
write(0,*) 'ocsrmv Size error ', m, n, size(x%v), size(y%v)
|
||||
info = psb_err_invalid_mat_state_
|
||||
return
|
||||
end if
|
||||
device_done = .false.
|
||||
if (present(trans)) then
|
||||
trans_ = trans
|
||||
else
|
||||
trans_ = 'N'
|
||||
end if
|
||||
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
|
||||
|
||||
if (.not.tra) then
|
||||
select type(xx => x)
|
||||
class is (psb_d_vect_oacc)
|
||||
select type (yy => y)
|
||||
class is (psb_d_vect_oacc)
|
||||
if (a%is_host()) call a%sync()
|
||||
if (xx%is_host()) call xx%sync()
|
||||
if (yy%is_host()) call yy%sync()
|
||||
call inner_spmv(m, n, alpha, a%val, a%ja, a%irp, x%v, beta, y%v, info)
|
||||
call y%set_dev()
|
||||
device_done = .true.
|
||||
end select
|
||||
end select
|
||||
end if
|
||||
|
||||
if (.not.device_done) then
|
||||
if (x%is_dev()) call x%sync()
|
||||
if (y%is_dev()) call y%sync()
|
||||
call a%psb_d_csr_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans)
|
||||
call y%set_host()
|
||||
end if
|
||||
contains
|
||||
|
||||
subroutine inner_spmv(m, n, alpha, val, ja, irp, x, beta, y, info)
|
||||
implicit none
|
||||
integer(psb_ipk_) :: m, n
|
||||
real(psb_dpk_), intent(in) :: alpha, beta
|
||||
real(psb_dpk_) :: val(:), x(:), y(:)
|
||||
integer(psb_ipk_) :: ja(:), irp(:)
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
integer(psb_ipk_) :: i, j, ii, isz
|
||||
real(psb_dpk_) :: tmp
|
||||
integer(psb_ipk_), parameter :: vsz = 256
|
||||
|
||||
info = 0
|
||||
|
||||
!$acc parallel loop vector_length(vsz) private(isz)
|
||||
do ii = 1, m, vsz
|
||||
isz = min(vsz, m - ii + 1)
|
||||
!$acc loop independent private(tmp)
|
||||
do i = ii, ii + isz - 1
|
||||
tmp = 0.0_psb_dpk_
|
||||
!$acc loop seq
|
||||
do j = irp(i), irp(i + 1) - 1
|
||||
tmp = tmp + val(j) * x(ja(j))
|
||||
end do
|
||||
y(i) = alpha * tmp + beta * y(i)
|
||||
end do
|
||||
end do
|
||||
end subroutine inner_spmv
|
||||
|
||||
end subroutine psb_d_oacc_csr_vect_mv
|
||||
end submodule psb_d_oacc_csr_vect_mv_impl
|
@ -0,0 +1,35 @@
|
||||
submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_allocate_mnnz_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_d_oacc_ell_allocate_mnnz(m, n, a, nz)
|
||||
implicit none
|
||||
integer(psb_ipk_), intent(in) :: m, n
|
||||
class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
|
||||
integer(psb_ipk_), intent(in), optional :: nz
|
||||
integer(psb_ipk_) :: info
|
||||
integer(psb_ipk_) :: err_act, nz_
|
||||
character(len=20) :: name='allocate_mnnz'
|
||||
logical, parameter :: debug=.false.
|
||||
|
||||
call psb_erractionsave(err_act)
|
||||
info = psb_success_
|
||||
|
||||
if (present(nz)) then
|
||||
nz_ = nz
|
||||
else
|
||||
nz_ = 10
|
||||
end if
|
||||
|
||||
call a%psb_d_ell_sparse_mat%allocate(m, n, nz_)
|
||||
call a%sync_dev_space()
|
||||
call a%set_host()
|
||||
if (info /= 0) goto 9999
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
return
|
||||
|
||||
end subroutine psb_d_oacc_ell_allocate_mnnz
|
||||
end submodule psb_d_oacc_ell_allocate_mnnz_impl
|
@ -0,0 +1,27 @@
|
||||
submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_cp_from_coo_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_d_oacc_ell_cp_from_coo(a, b, info)
|
||||
implicit none
|
||||
|
||||
class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
|
||||
class(psb_d_coo_sparse_mat), intent(in) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
|
||||
info = psb_success_
|
||||
call a%free_dev_space()
|
||||
call a%psb_d_ell_sparse_mat%cp_from_coo(b, info)
|
||||
if (info /= 0) goto 9999
|
||||
call a%sync_dev_space()
|
||||
call a%set_host()
|
||||
call a%sync()
|
||||
|
||||
return
|
||||
|
||||
9999 continue
|
||||
info = psb_err_alloc_dealloc_
|
||||
return
|
||||
|
||||
end subroutine psb_d_oacc_ell_cp_from_coo
|
||||
end submodule psb_d_oacc_ell_cp_from_coo_impl
|
@ -0,0 +1,26 @@
|
||||
submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_cp_from_fmt_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_d_oacc_ell_cp_from_fmt(a, b, info)
|
||||
implicit none
|
||||
|
||||
class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
|
||||
class(psb_d_base_sparse_mat), intent(in) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
info = psb_success_
|
||||
|
||||
select type(b)
|
||||
type is (psb_d_coo_sparse_mat)
|
||||
call a%cp_from_coo(b, info)
|
||||
class default
|
||||
call a%free_dev_space()
|
||||
call a%psb_d_ell_sparse_mat%cp_from_fmt(b, info)
|
||||
if (info /= 0) return
|
||||
call a%sync_dev_space()
|
||||
call a%set_host()
|
||||
call a%sync()
|
||||
end select
|
||||
|
||||
end subroutine psb_d_oacc_ell_cp_from_fmt
|
||||
end submodule psb_d_oacc_ell_cp_from_fmt_impl
|
@ -0,0 +1,85 @@
|
||||
submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_inner_vect_sv_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_d_oacc_ell_inner_vect_sv(alpha, a, x, beta, y, info, trans)
|
||||
implicit none
|
||||
class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
|
||||
real(psb_dpk_), intent(in) :: alpha, beta
|
||||
class(psb_d_base_vect_type), intent(inout) :: x, y
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character, optional, intent(in) :: trans
|
||||
|
||||
real(psb_dpk_), allocatable :: rx(:), ry(:)
|
||||
logical :: tra
|
||||
character :: trans_
|
||||
integer(psb_ipk_) :: err_act
|
||||
character(len=20) :: name = 'd_oacc_ell_inner_vect_sv'
|
||||
logical, parameter :: debug = .false.
|
||||
integer(psb_ipk_) :: i, j, nzt
|
||||
|
||||
call psb_get_erraction(err_act)
|
||||
info = psb_success_
|
||||
|
||||
if (present(trans)) then
|
||||
trans_ = trans
|
||||
else
|
||||
trans_ = 'N'
|
||||
end if
|
||||
|
||||
if (.not.a%is_asb()) then
|
||||
info = psb_err_invalid_mat_state_
|
||||
call psb_errpush(info, name)
|
||||
goto 9999
|
||||
endif
|
||||
|
||||
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
|
||||
|
||||
if (tra .or. (beta /= dzero)) then
|
||||
call x%sync()
|
||||
call y%sync()
|
||||
call a%psb_d_ell_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans)
|
||||
call y%set_host()
|
||||
else
|
||||
select type (xx => x)
|
||||
type is (psb_d_vect_oacc)
|
||||
select type(yy => y)
|
||||
type is (psb_d_vect_oacc)
|
||||
if (xx%is_host()) call xx%sync()
|
||||
if (beta /= dzero) then
|
||||
if (yy%is_host()) call yy%sync()
|
||||
end if
|
||||
nzt = a%nzt
|
||||
!$acc parallel loop present(a, xx, yy)
|
||||
do i = 1, size(a%val, 1)
|
||||
do j = 1, nzt
|
||||
yy%v(i) = alpha * a%val(i, j) * xx%v(a%ja(i, j)) + beta * yy%v(i)
|
||||
end do
|
||||
end do
|
||||
call yy%set_dev()
|
||||
class default
|
||||
rx = xx%get_vect()
|
||||
ry = y%get_vect()
|
||||
call a%psb_d_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
|
||||
call y%bld(ry)
|
||||
end select
|
||||
class default
|
||||
rx = x%get_vect()
|
||||
ry = y%get_vect()
|
||||
call a%psb_d_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
|
||||
call y%bld(ry)
|
||||
end select
|
||||
endif
|
||||
|
||||
if (info /= psb_success_) then
|
||||
info = psb_err_from_subroutine_
|
||||
call psb_errpush(info, name, a_err = 'ell_vect_sv')
|
||||
goto 9999
|
||||
endif
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
return
|
||||
end subroutine psb_d_oacc_ell_inner_vect_sv
|
||||
end submodule psb_d_oacc_ell_inner_vect_sv_impl
|
@ -0,0 +1,34 @@
|
||||
submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_mold_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_d_oacc_ell_mold(a, b, info)
|
||||
implicit none
|
||||
class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
|
||||
class(psb_d_base_sparse_mat), intent(inout), allocatable :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
integer(psb_ipk_) :: err_act
|
||||
character(len=20) :: name = 'ell_mold'
|
||||
logical, parameter :: debug = .false.
|
||||
|
||||
call psb_get_erraction(err_act)
|
||||
|
||||
info = 0
|
||||
if (allocated(b)) then
|
||||
call b%free()
|
||||
deallocate(b, stat=info)
|
||||
end if
|
||||
if (info == 0) allocate(psb_d_oacc_ell_sparse_mat :: b, stat=info)
|
||||
|
||||
if (info /= psb_success_) then
|
||||
info = psb_err_alloc_dealloc_
|
||||
call psb_errpush(info, name)
|
||||
goto 9999
|
||||
end if
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
end subroutine psb_d_oacc_ell_mold
|
||||
end submodule psb_d_oacc_ell_mold_impl
|
@ -0,0 +1,27 @@
|
||||
submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_mv_from_coo_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_d_oacc_ell_mv_from_coo(a, b, info)
|
||||
implicit none
|
||||
|
||||
class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
|
||||
class(psb_d_coo_sparse_mat), intent(inout) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
info = psb_success_
|
||||
|
||||
call a%free_dev_space()
|
||||
call a%psb_d_ell_sparse_mat%mv_from_coo(b, info)
|
||||
if (info /= 0) goto 9999
|
||||
call a%sync_dev_space()
|
||||
call a%set_host()
|
||||
call a%sync()
|
||||
|
||||
return
|
||||
|
||||
9999 continue
|
||||
info = psb_err_alloc_dealloc_
|
||||
return
|
||||
|
||||
end subroutine psb_d_oacc_ell_mv_from_coo
|
||||
end submodule psb_d_oacc_ell_mv_from_coo_impl
|
@ -0,0 +1,26 @@
|
||||
submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_mv_from_fmt_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_d_oacc_ell_mv_from_fmt(a, b, info)
|
||||
implicit none
|
||||
|
||||
class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
|
||||
class(psb_d_base_sparse_mat), intent(inout) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
info = psb_success_
|
||||
|
||||
select type(b)
|
||||
type is (psb_d_coo_sparse_mat)
|
||||
call a%mv_from_coo(b, info)
|
||||
class default
|
||||
call a%free_dev_space()
|
||||
call a%psb_d_ell_sparse_mat%mv_from_fmt(b, info)
|
||||
if (info /= 0) return
|
||||
call a%sync_dev_space()
|
||||
call a%set_host()
|
||||
call a%sync()
|
||||
end select
|
||||
|
||||
end subroutine psb_d_oacc_ell_mv_from_fmt
|
||||
end submodule psb_d_oacc_ell_mv_from_fmt_impl
|
@ -0,0 +1,28 @@
|
||||
submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_reallocate_nz_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_d_oacc_ell_reallocate_nz(nz, a)
|
||||
implicit none
|
||||
integer(psb_ipk_), intent(in) :: nz
|
||||
class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
|
||||
integer(psb_ipk_) :: info
|
||||
integer(psb_ipk_) :: err_act
|
||||
character(len=20) :: name='d_oacc_ell_reallocate_nz'
|
||||
logical, parameter :: debug=.false.
|
||||
|
||||
call psb_erractionsave(err_act)
|
||||
info = psb_success_
|
||||
|
||||
call a%psb_d_ell_sparse_mat%reallocate(nz)
|
||||
call a%sync_dev_space()
|
||||
call a%set_host()
|
||||
if (info /= 0) goto 9999
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
return
|
||||
|
||||
end subroutine psb_d_oacc_ell_reallocate_nz
|
||||
end submodule psb_d_oacc_ell_reallocate_nz_impl
|
@ -0,0 +1,58 @@
|
||||
submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_scal_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_d_oacc_ell_scal(d, a, info, side)
|
||||
implicit none
|
||||
class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
|
||||
real(psb_dpk_), intent(in) :: d(:)
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character, intent(in), optional :: side
|
||||
|
||||
integer(psb_ipk_) :: err_act
|
||||
character(len=20) :: name='scal'
|
||||
logical, parameter :: debug=.false.
|
||||
integer(psb_ipk_) :: i, j, m, nzt
|
||||
|
||||
info = psb_success_
|
||||
call psb_erractionsave(err_act)
|
||||
|
||||
if (a%is_host()) call a%sync()
|
||||
|
||||
m = a%get_nrows()
|
||||
nzt = a%nzt
|
||||
|
||||
if (present(side)) then
|
||||
if (side == 'L') then
|
||||
!$acc parallel loop collapse(2) present(a, d)
|
||||
do i = 1, m
|
||||
do j = 1, nzt
|
||||
a%val(i, j) = a%val(i, j) * d(i)
|
||||
end do
|
||||
end do
|
||||
else if (side == 'R') then
|
||||
!$acc parallel loop collapse(2) present(a, d)
|
||||
do i = 1, m
|
||||
do j = 1, nzt
|
||||
a%val(i, j) = a%val(i, j) * d(a%ja(i, j))
|
||||
end do
|
||||
end do
|
||||
end if
|
||||
else
|
||||
!$acc parallel loop collapse(2) present(a, d)
|
||||
do i = 1, m
|
||||
do j = 1, nzt
|
||||
a%val(i, j) = a%val(i, j) * d(j)
|
||||
end do
|
||||
end do
|
||||
end if
|
||||
|
||||
call a%set_dev()
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
return
|
||||
|
||||
end subroutine psb_d_oacc_ell_scal
|
||||
end submodule psb_d_oacc_ell_scal_impl
|
@ -0,0 +1,39 @@
|
||||
submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_scals_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_d_oacc_ell_scals(d, a, info)
|
||||
implicit none
|
||||
class(psb_d_oacc_ell_sparse_mat), intent(inout) :: a
|
||||
real(psb_dpk_), intent(in) :: d
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
integer(psb_ipk_) :: err_act
|
||||
character(len=20) :: name='scal'
|
||||
logical, parameter :: debug=.false.
|
||||
integer(psb_ipk_) :: i, j, nzt, m
|
||||
|
||||
info = psb_success_
|
||||
call psb_erractionsave(err_act)
|
||||
|
||||
if (a%is_host()) call a%sync()
|
||||
|
||||
m = a%get_nrows()
|
||||
nzt = a%nzt
|
||||
|
||||
!$acc parallel loop collapse(2) present(a)
|
||||
do i = 1, m
|
||||
do j = 1, nzt
|
||||
a%val(i, j) = a%val(i, j) * d
|
||||
end do
|
||||
end do
|
||||
|
||||
call a%set_dev()
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
return
|
||||
|
||||
end subroutine psb_d_oacc_ell_scals
|
||||
end submodule psb_d_oacc_ell_scals_impl
|
@ -0,0 +1,90 @@
|
||||
submodule (psb_d_oacc_ell_mat_mod) psb_d_oacc_ell_vect_mv_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_d_oacc_ell_vect_mv(alpha, a, x, beta, y, info, trans)
|
||||
implicit none
|
||||
|
||||
real(psb_dpk_), intent(in) :: alpha, beta
|
||||
class(psb_d_oacc_ell_sparse_mat), intent(in) :: a
|
||||
class(psb_d_base_vect_type), intent(inout) :: x, y
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character, optional, intent(in) :: trans
|
||||
|
||||
integer(psb_ipk_) :: m, n, nzt, nc
|
||||
character :: trans_
|
||||
logical :: device_done, tra
|
||||
|
||||
info = psb_success_
|
||||
m = a%get_nrows()
|
||||
n = a%get_ncols()
|
||||
nzt = a%nzt
|
||||
nc = size(a%ja,2)
|
||||
if ((n > size(x%v)) .or. (m > size(y%v))) then
|
||||
write(0,*) 'oellmv Size error ', m, n, size(x%v), size(y%v)
|
||||
info = psb_err_invalid_mat_state_
|
||||
return
|
||||
end if
|
||||
device_done = .false.
|
||||
if (present(trans)) then
|
||||
trans_ = trans
|
||||
else
|
||||
trans_ = 'N'
|
||||
end if
|
||||
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
|
||||
|
||||
if (.not.tra) then
|
||||
select type(xx => x)
|
||||
class is (psb_d_vect_oacc)
|
||||
select type (yy => y)
|
||||
class is (psb_d_vect_oacc)
|
||||
if (a%is_host()) call a%sync()
|
||||
if (xx%is_host()) call xx%sync()
|
||||
if (yy%is_host()) call yy%sync()
|
||||
call inner_spmv(m, n, nc, alpha, a%val, a%ja, x%v, beta, y%v, info)
|
||||
call y%set_dev()
|
||||
device_done = .true.
|
||||
end select
|
||||
end select
|
||||
end if
|
||||
|
||||
if (.not.device_done) then
|
||||
if (x%is_dev()) call x%sync()
|
||||
if (y%is_dev()) call y%sync()
|
||||
call a%psb_d_ell_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans)
|
||||
call y%set_host()
|
||||
end if
|
||||
|
||||
contains
|
||||
|
||||
subroutine inner_spmv(m, n, nc, alpha, val, ja, x, beta, y, info)
|
||||
implicit none
|
||||
integer(psb_ipk_) :: m, n, nc
|
||||
real(psb_dpk_), intent(in) :: alpha, beta
|
||||
real(psb_dpk_) :: val(:,:), x(:), y(:)
|
||||
integer(psb_ipk_) :: ja(:,:)
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
integer(psb_ipk_) :: i, j, ii, isz
|
||||
real(psb_dpk_) :: tmp
|
||||
integer(psb_ipk_), parameter :: vsz = 256
|
||||
|
||||
info = 0
|
||||
|
||||
!$acc parallel loop vector_length(vsz) private(isz)
|
||||
do ii = 1, m, vsz
|
||||
isz = min(vsz, m - ii + 1)
|
||||
!$acc loop independent private(tmp)
|
||||
do i = ii, ii + isz - 1
|
||||
tmp = 0.0_psb_dpk_
|
||||
!$acc loop seq
|
||||
do j = 1, nc
|
||||
if (ja(i,j) > 0) then
|
||||
tmp = tmp + val(i,j) * x(ja(i,j))
|
||||
end if
|
||||
end do
|
||||
y(i) = alpha * tmp + beta * y(i)
|
||||
end do
|
||||
end do
|
||||
end subroutine inner_spmv
|
||||
|
||||
end subroutine psb_d_oacc_ell_vect_mv
|
||||
end submodule psb_d_oacc_ell_vect_mv_impl
|
@ -0,0 +1,36 @@
|
||||
submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_allocate_mnnz_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_d_oacc_hll_allocate_mnnz(m, n, a, nz)
|
||||
implicit none
|
||||
integer(psb_ipk_), intent(in) :: m, n
|
||||
class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
|
||||
integer(psb_ipk_), intent(in), optional :: nz
|
||||
integer(psb_ipk_) :: info
|
||||
integer(psb_ipk_) :: err_act, nz_
|
||||
character(len=20) :: name='allocate_mnnz'
|
||||
logical, parameter :: debug=.false.
|
||||
integer(psb_ipk_) :: hksz, nhacks
|
||||
|
||||
call psb_erractionsave(err_act)
|
||||
info = psb_success_
|
||||
|
||||
if (present(nz)) then
|
||||
nz_ = nz
|
||||
else
|
||||
nz_ = 10
|
||||
end if
|
||||
|
||||
call a%psb_d_hll_sparse_mat%allocate(m, n, nz_)
|
||||
call a%sync_dev_space()
|
||||
call a%set_host()
|
||||
if (info /= 0) goto 9999
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
return
|
||||
|
||||
end subroutine psb_d_oacc_hll_allocate_mnnz
|
||||
end submodule psb_d_oacc_hll_allocate_mnnz_impl
|
@ -0,0 +1,27 @@
|
||||
submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_cp_from_coo_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_d_oacc_hll_cp_from_coo(a, b, info)
|
||||
implicit none
|
||||
|
||||
class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
|
||||
class(psb_d_coo_sparse_mat), intent(in) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
info = psb_success_
|
||||
|
||||
call a%free_dev_space()
|
||||
call a%psb_d_hll_sparse_mat%cp_from_coo(b, info)
|
||||
if (info /= 0) goto 9999
|
||||
call a%sync_dev_space()
|
||||
call a%set_host()
|
||||
call a%sync()
|
||||
|
||||
return
|
||||
|
||||
9999 continue
|
||||
info = psb_err_alloc_dealloc_
|
||||
return
|
||||
|
||||
end subroutine psb_d_oacc_hll_cp_from_coo
|
||||
end submodule psb_d_oacc_hll_cp_from_coo_impl
|
@ -0,0 +1,26 @@
|
||||
submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_cp_from_fmt_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_d_oacc_hll_cp_from_fmt(a, b, info)
|
||||
implicit none
|
||||
|
||||
class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
|
||||
class(psb_d_base_sparse_mat), intent(in) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
info = psb_success_
|
||||
|
||||
select type(b)
|
||||
type is (psb_d_coo_sparse_mat)
|
||||
call a%cp_from_coo(b, info)
|
||||
class default
|
||||
call a%free_dev_space()
|
||||
call a%psb_d_hll_sparse_mat%cp_from_fmt(b, info)
|
||||
if (info /= 0) return
|
||||
call a%sync_dev_space()
|
||||
call a%set_host()
|
||||
call a%sync()
|
||||
end select
|
||||
|
||||
end subroutine psb_d_oacc_hll_cp_from_fmt
|
||||
end submodule psb_d_oacc_hll_cp_from_fmt_impl
|
@ -0,0 +1,86 @@
|
||||
submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_inner_vect_sv_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_d_oacc_hll_inner_vect_sv(alpha, a, x, beta, y, info, trans)
|
||||
implicit none
|
||||
class(psb_d_oacc_hll_sparse_mat), intent(in) :: a
|
||||
real(psb_dpk_), intent(in) :: alpha, beta
|
||||
class(psb_d_base_vect_type), intent(inout) :: x, y
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character, optional, intent(in) :: trans
|
||||
|
||||
real(psb_dpk_), allocatable :: rx(:), ry(:)
|
||||
logical :: tra
|
||||
character :: trans_
|
||||
integer(psb_ipk_) :: err_act
|
||||
character(len=20) :: name = 'd_oacc_hll_inner_vect_sv'
|
||||
logical, parameter :: debug = .false.
|
||||
integer(psb_ipk_) :: i, j, nhacks, hksz
|
||||
|
||||
call psb_get_erraction(err_act)
|
||||
info = psb_success_
|
||||
|
||||
if (present(trans)) then
|
||||
trans_ = trans
|
||||
else
|
||||
trans_ = 'N'
|
||||
end if
|
||||
|
||||
if (.not.a%is_asb()) then
|
||||
info = psb_err_invalid_mat_state_
|
||||
call psb_errpush(info, name)
|
||||
goto 9999
|
||||
endif
|
||||
|
||||
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
|
||||
|
||||
if (tra .or. (beta /= dzero)) then
|
||||
call x%sync()
|
||||
call y%sync()
|
||||
call a%psb_d_hll_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans)
|
||||
call y%set_host()
|
||||
else
|
||||
select type (xx => x)
|
||||
type is (psb_d_vect_oacc)
|
||||
select type(yy => y)
|
||||
type is (psb_d_vect_oacc)
|
||||
if (xx%is_host()) call xx%sync()
|
||||
if (beta /= dzero) then
|
||||
if (yy%is_host()) call yy%sync()
|
||||
end if
|
||||
nhacks = size(a%hkoffs) - 1
|
||||
hksz = a%hksz
|
||||
!$acc parallel loop present(a, xx, yy)
|
||||
do i = 1, nhacks
|
||||
do j = a%hkoffs(i), a%hkoffs(i+1) - 1
|
||||
yy%v(a%irn(j)) = alpha * a%val(j) * xx%v(a%ja(j)) + beta * yy%v(a%irn(j))
|
||||
end do
|
||||
end do
|
||||
call yy%set_dev()
|
||||
class default
|
||||
rx = xx%get_vect()
|
||||
ry = y%get_vect()
|
||||
call a%psb_d_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
|
||||
call y%bld(ry)
|
||||
end select
|
||||
class default
|
||||
rx = x%get_vect()
|
||||
ry = y%get_vect()
|
||||
call a%psb_d_hll_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
|
||||
call y%bld(ry)
|
||||
end select
|
||||
endif
|
||||
|
||||
if (info /= psb_success_) then
|
||||
info = psb_err_from_subroutine_
|
||||
call psb_errpush(info, name, a_err = 'hll_vect_sv')
|
||||
goto 9999
|
||||
endif
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
return
|
||||
end subroutine psb_d_oacc_hll_inner_vect_sv
|
||||
end submodule psb_d_oacc_hll_inner_vect_sv_impl
|
@ -0,0 +1,34 @@
|
||||
submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_mold_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_d_oacc_hll_mold(a, b, info)
|
||||
implicit none
|
||||
class(psb_d_oacc_hll_sparse_mat), intent(in) :: a
|
||||
class(psb_d_base_sparse_mat), intent(inout), allocatable :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
integer(psb_ipk_) :: err_act
|
||||
character(len=20) :: name = 'hll_mold'
|
||||
logical, parameter :: debug = .false.
|
||||
|
||||
call psb_get_erraction(err_act)
|
||||
|
||||
info = 0
|
||||
if (allocated(b)) then
|
||||
call b%free()
|
||||
deallocate(b, stat=info)
|
||||
end if
|
||||
if (info == 0) allocate(psb_d_oacc_hll_sparse_mat :: b, stat=info)
|
||||
|
||||
if (info /= psb_success_) then
|
||||
info = psb_err_alloc_dealloc_
|
||||
call psb_errpush(info, name)
|
||||
goto 9999
|
||||
end if
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
end subroutine psb_d_oacc_hll_mold
|
||||
end submodule psb_d_oacc_hll_mold_impl
|
@ -0,0 +1,27 @@
|
||||
submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_mv_from_coo_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_d_oacc_hll_mv_from_coo(a, b, info)
|
||||
implicit none
|
||||
|
||||
class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
|
||||
class(psb_d_coo_sparse_mat), intent(inout) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
info = psb_success_
|
||||
|
||||
call a%free_dev_space()
|
||||
call a%psb_d_hll_sparse_mat%mv_from_coo(b, info)
|
||||
if (info /= 0) goto 9999
|
||||
call a%sync_dev_space()
|
||||
call a%set_host()
|
||||
call a%sync()
|
||||
|
||||
return
|
||||
|
||||
9999 continue
|
||||
info = psb_err_alloc_dealloc_
|
||||
return
|
||||
|
||||
end subroutine psb_d_oacc_hll_mv_from_coo
|
||||
end submodule psb_d_oacc_hll_mv_from_coo_impl
|
@ -0,0 +1,25 @@
|
||||
submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_mv_from_fmt_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_d_oacc_hll_mv_from_fmt(a, b, info)
|
||||
implicit none
|
||||
|
||||
class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
|
||||
class(psb_d_base_sparse_mat), intent(inout) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
info = psb_success_
|
||||
|
||||
select type(b)
|
||||
type is (psb_d_coo_sparse_mat)
|
||||
call a%mv_from_coo(b, info)
|
||||
class default
|
||||
call a%free_dev_space()
|
||||
call a%psb_d_hll_sparse_mat%mv_from_fmt(b, info)
|
||||
if (info /= 0) return
|
||||
call a%sync_dev_space()
|
||||
call a%set_host()
|
||||
call a%sync()
|
||||
end select
|
||||
end subroutine psb_d_oacc_hll_mv_from_fmt
|
||||
end submodule psb_d_oacc_hll_mv_from_fmt_impl
|
@ -0,0 +1,29 @@
|
||||
submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_reallocate_nz_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_d_oacc_hll_reallocate_nz(nz, a)
|
||||
implicit none
|
||||
integer(psb_ipk_), intent(in) :: nz
|
||||
class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
|
||||
integer(psb_ipk_) :: info
|
||||
integer(psb_ipk_) :: err_act
|
||||
character(len=20) :: name='d_oacc_hll_reallocate_nz'
|
||||
logical, parameter :: debug=.false.
|
||||
integer(psb_ipk_) :: hksz, nhacks
|
||||
|
||||
call psb_erractionsave(err_act)
|
||||
info = psb_success_
|
||||
|
||||
call a%psb_d_hll_sparse_mat%reallocate(nz)
|
||||
call a%sync_dev_space()
|
||||
call a%set_host()
|
||||
if (info /= 0) goto 9999
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
return
|
||||
|
||||
end subroutine psb_d_oacc_hll_reallocate_nz
|
||||
end submodule psb_d_oacc_hll_reallocate_nz_impl
|
@ -0,0 +1,62 @@
|
||||
submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_scal_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_d_oacc_hll_scal(d, a, info, side)
|
||||
implicit none
|
||||
class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
|
||||
real(psb_dpk_), intent(in) :: d(:)
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character, intent(in), optional :: side
|
||||
|
||||
integer(psb_ipk_) :: err_act
|
||||
character(len=20) :: name = 'scal'
|
||||
integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks
|
||||
|
||||
info = psb_success_
|
||||
call psb_erractionsave(err_act)
|
||||
|
||||
if (a%is_host()) call a%sync()
|
||||
|
||||
hksz = a%hksz
|
||||
nhacks = (a%get_nrows() + hksz - 1) / hksz
|
||||
nzt = a%nzt
|
||||
|
||||
if (present(side)) then
|
||||
if (side == 'L') then
|
||||
! $ a parallel loop collapse(2) present(a, d)
|
||||
!$acc parallel loop present(a, d)
|
||||
do i = 1, nhacks
|
||||
do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
|
||||
k = (j - a%hkoffs(i)) / nzt + (i - 1) * hksz + 1
|
||||
a%val(j) = a%val(j) * d(k)
|
||||
end do
|
||||
end do
|
||||
else if (side == 'R') then
|
||||
! $ a parallel loop collapse(2) present(a, d)
|
||||
!$acc parallel loop present(a, d)
|
||||
do i = 1, nhacks
|
||||
do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
|
||||
a%val(j) = a%val(j) * d(a%ja(j))
|
||||
end do
|
||||
end do
|
||||
end if
|
||||
else
|
||||
! $ a parallel loop collapse(2) present(a, d)
|
||||
!$acc parallel loop present(a, d)
|
||||
do i = 1, nhacks
|
||||
do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
|
||||
a%val(j) = a%val(j) * d(j - a%hkoffs(i) + 1)
|
||||
end do
|
||||
end do
|
||||
end if
|
||||
|
||||
call a%set_dev()
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
return
|
||||
|
||||
end subroutine psb_d_oacc_hll_scal
|
||||
end submodule psb_d_oacc_hll_scal_impl
|
@ -0,0 +1,40 @@
|
||||
submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_scals_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_d_oacc_hll_scals(d, a, info)
|
||||
implicit none
|
||||
class(psb_d_oacc_hll_sparse_mat), intent(inout) :: a
|
||||
real(psb_dpk_), intent(in) :: d
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
integer(psb_ipk_) :: err_act
|
||||
character(len=20) :: name = 'scal'
|
||||
integer(psb_ipk_) :: i, j, k, hksz, nzt, nhacks
|
||||
|
||||
info = psb_success_
|
||||
call psb_erractionsave(err_act)
|
||||
|
||||
if (a%is_host()) call a%sync()
|
||||
|
||||
hksz = a%hksz
|
||||
nhacks = (a%get_nrows() + hksz - 1) / hksz
|
||||
nzt = a%nzt
|
||||
|
||||
! $ a parallel loop collapse(2) present(a)
|
||||
!$acc parallel loop present(a)
|
||||
do i = 1, nhacks
|
||||
do j = a%hkoffs(i), a%hkoffs(i + 1) - 1
|
||||
a%val(j) = a%val(j) * d
|
||||
end do
|
||||
end do
|
||||
|
||||
call a%set_dev()
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
return
|
||||
|
||||
end subroutine psb_d_oacc_hll_scals
|
||||
end submodule psb_d_oacc_hll_scals_impl
|
@ -0,0 +1,90 @@
|
||||
submodule (psb_d_oacc_hll_mat_mod) psb_d_oacc_hll_vect_mv_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_d_oacc_hll_vect_mv(alpha, a, x, beta, y, info, trans)
|
||||
implicit none
|
||||
|
||||
real(psb_dpk_), intent(in) :: alpha, beta
|
||||
class(psb_d_oacc_hll_sparse_mat), intent(in) :: a
|
||||
class(psb_d_base_vect_type), intent(inout) :: x, y
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character, optional, intent(in) :: trans
|
||||
|
||||
integer(psb_ipk_) :: m, n, nhacks, hksz
|
||||
character :: trans_
|
||||
logical :: device_done, tra
|
||||
|
||||
info = psb_success_
|
||||
m = a%get_nrows()
|
||||
n = a%get_ncols()
|
||||
nhacks = size(a%hkoffs) - 1
|
||||
hksz = a%hksz
|
||||
|
||||
if ((n > size(x%v)) .or. (m > size(y%v))) then
|
||||
write(0,*) 'Size error ', m, n, size(x%v), size(y%v)
|
||||
info = psb_err_invalid_mat_state_
|
||||
return
|
||||
end if
|
||||
device_done = .false.
|
||||
if (present(trans)) then
|
||||
trans_ = trans
|
||||
else
|
||||
trans_ = 'N'
|
||||
end if
|
||||
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
|
||||
|
||||
if (.not.tra) then
|
||||
select type(xx => x)
|
||||
class is (psb_d_vect_oacc)
|
||||
select type (yy => y)
|
||||
class is (psb_d_vect_oacc)
|
||||
if (a%is_host()) call a%sync()
|
||||
if (xx%is_host()) call xx%sync()
|
||||
if (yy%is_host()) call yy%sync()
|
||||
call inner_spmv(m, nhacks, hksz, alpha, a%val, a%ja, a%hkoffs, x%v, beta, y%v, info)
|
||||
call y%set_dev()
|
||||
device_done = .true.
|
||||
end select
|
||||
end select
|
||||
end if
|
||||
|
||||
if (.not.device_done) then
|
||||
if (x%is_dev()) call x%sync()
|
||||
if (y%is_dev()) call y%sync()
|
||||
call a%psb_d_hll_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans)
|
||||
call y%set_host()
|
||||
end if
|
||||
contains
|
||||
|
||||
subroutine inner_spmv(m, nhacks, hksz, alpha, val, ja, hkoffs, x, beta, y, info)
|
||||
implicit none
|
||||
integer(psb_ipk_) :: m, nhacks, hksz
|
||||
real(psb_dpk_), intent(in) :: alpha, beta
|
||||
real(psb_dpk_) :: val(:), x(:), y(:)
|
||||
integer(psb_ipk_) :: ja(:), hkoffs(:)
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
integer(psb_ipk_) :: i, j, idx, k, ipnt,ir,nr,nlc,isz,ii
|
||||
real(psb_dpk_) :: tmp
|
||||
|
||||
info = 0
|
||||
!$acc parallel loop private(nlc, isz,ir,nr)
|
||||
do i = 1, nhacks
|
||||
isz = hkoffs(i + 1) - hkoffs(i)
|
||||
nlc = isz/hksz
|
||||
ir = (i-1)*hksz
|
||||
nr = min(hksz,m-ir)
|
||||
!$acc loop independent private(tmp,ii,ipnt)
|
||||
do ii = 1, nr
|
||||
ipnt = hkoffs(i) + ii
|
||||
tmp = dzero
|
||||
!$acc loop seq
|
||||
do j = 1, nlc
|
||||
tmp = tmp + val(ipnt) * x(ja(ipnt))
|
||||
ipnt = ipnt + hksz
|
||||
end do
|
||||
y(ii+ir) = alpha * tmp + beta * y(ii+ir)
|
||||
end do
|
||||
end do
|
||||
end subroutine inner_spmv
|
||||
end subroutine psb_d_oacc_hll_vect_mv
|
||||
end submodule psb_d_oacc_hll_vect_mv_impl
|
@ -0,0 +1,46 @@
|
||||
|
||||
subroutine psb_d_oacc_mlt_v(x, y, info)
|
||||
use psb_d_oacc_vect_mod, psb_protect_name => psb_d_oacc_mlt_v
|
||||
|
||||
implicit none
|
||||
class(psb_d_base_vect_type), intent(inout) :: x
|
||||
class(psb_d_vect_oacc), intent(inout) :: y
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
integer(psb_ipk_) :: i, n
|
||||
|
||||
info = 0
|
||||
n = min(x%get_nrows(), y%get_nrows())
|
||||
info = 0
|
||||
n = min(x%get_nrows(), y%get_nrows())
|
||||
select type(xx => x)
|
||||
class is (psb_d_vect_oacc)
|
||||
if (y%is_host()) call y%sync()
|
||||
if (xx%is_host()) call xx%sync()
|
||||
call d_inner_oacc_mlt_v(n,xx%v, y%v)
|
||||
!!$ !$acc parallel loop
|
||||
!!$ do i = 1, n
|
||||
!!$ y%v(i) = y%v(i) * xx%v(i)
|
||||
!!$ end do
|
||||
call y%set_dev()
|
||||
class default
|
||||
if (xx%is_dev()) call xx%sync()
|
||||
if (y%is_dev()) call y%sync()
|
||||
do i = 1, n
|
||||
y%v(i) = y%v(i) * xx%v(i)
|
||||
end do
|
||||
call y%set_host()
|
||||
end select
|
||||
contains
|
||||
subroutine d_inner_oacc_mlt_v(n,x, y)
|
||||
implicit none
|
||||
integer(psb_ipk_), intent(in) :: n
|
||||
real(psb_dpk_), intent(inout) :: x(:), y(:)
|
||||
|
||||
integer(psb_ipk_) :: i
|
||||
!$acc parallel loop present(x,y)
|
||||
do i = 1, n
|
||||
y(i) = (x(i)) * (y(i))
|
||||
end do
|
||||
end subroutine d_inner_oacc_mlt_v
|
||||
end subroutine psb_d_oacc_mlt_v
|
@ -0,0 +1,91 @@
|
||||
subroutine psb_d_oacc_mlt_v_2(alpha, x, y, beta, z, info, conjgx, conjgy)
|
||||
use psb_d_oacc_vect_mod, psb_protect_name => psb_d_oacc_mlt_v_2
|
||||
use psb_string_mod
|
||||
implicit none
|
||||
real(psb_dpk_), intent(in) :: alpha, beta
|
||||
class(psb_d_base_vect_type), intent(inout) :: x
|
||||
class(psb_d_base_vect_type), intent(inout) :: y
|
||||
class(psb_d_vect_oacc), intent(inout) :: z
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character(len=1), intent(in), optional :: conjgx, conjgy
|
||||
integer(psb_ipk_) :: i, n
|
||||
logical :: conjgx_, conjgy_, device_done
|
||||
|
||||
conjgx_ = .false.
|
||||
conjgy_ = .false.
|
||||
device_done = .false.
|
||||
if (present(conjgx)) conjgx_ = (psb_toupper(conjgx) == 'C')
|
||||
if (present(conjgy)) conjgy_ = (psb_toupper(conjgy) == 'C')
|
||||
|
||||
n = min(x%get_nrows(), y%get_nrows(), z%get_nrows())
|
||||
info = 0
|
||||
select type(xx => x)
|
||||
class is (psb_d_vect_oacc)
|
||||
select type (yy => y)
|
||||
class is (psb_d_vect_oacc)
|
||||
if (xx%is_host()) call xx%sync()
|
||||
if (yy%is_host()) call yy%sync()
|
||||
if ((beta /= dzero) .and. (z%is_host())) call z%sync()
|
||||
call d_inner_oacc_mlt_v_2(n,alpha, xx%v, yy%v, beta, z%v, info, conjgx_, conjgy_)
|
||||
call z%set_dev()
|
||||
device_done = .true.
|
||||
end select
|
||||
end select
|
||||
if (.not.device_done) then
|
||||
if (x%is_dev()) call x%sync()
|
||||
if (y%is_dev()) call y%sync()
|
||||
if ((beta /= dzero) .and. (z%is_dev())) call z%sync()
|
||||
if (conjgx_.and.conjgy_) then
|
||||
do i = 1, n
|
||||
z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i)
|
||||
end do
|
||||
else if (conjgx_.and.(.not.conjgy_)) then
|
||||
do i = 1, n
|
||||
z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i)
|
||||
end do
|
||||
else if ((.not.conjgx_).and.(conjgy_)) then
|
||||
do i = 1, n
|
||||
z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i)
|
||||
end do
|
||||
else
|
||||
do i = 1, n
|
||||
z%v(i) = alpha * (x%v(i)) * (y%v(i)) + beta * z%v(i)
|
||||
end do
|
||||
end if
|
||||
call z%set_host()
|
||||
end if
|
||||
|
||||
contains
|
||||
subroutine d_inner_oacc_mlt_v_2(n,alpha, x, y, beta, z, info, conjgx, conjgy)
|
||||
implicit none
|
||||
integer(psb_ipk_), intent(in) :: n
|
||||
real(psb_dpk_), intent(in) :: alpha, beta
|
||||
real(psb_dpk_), intent(inout) :: x(:), y(:), z(:)
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
logical, intent(in) :: conjgx, conjgy
|
||||
|
||||
integer(psb_ipk_) :: i
|
||||
if (conjgx.and.conjgy) then
|
||||
!$acc parallel loop present(x,y,z)
|
||||
do i = 1, n
|
||||
z(i) = alpha * (x(i)) * (y(i)) + beta * z(i)
|
||||
end do
|
||||
else if (conjgx.and.(.not.conjgy)) then
|
||||
!$acc parallel loop present(x,y,z)
|
||||
do i = 1, n
|
||||
z(i) = alpha * (x(i)) * (y(i)) + beta * z(i)
|
||||
end do
|
||||
else if ((.not.conjgx).and.(conjgy)) then
|
||||
!$acc parallel loop present(x,y,z)
|
||||
do i = 1, n
|
||||
z(i) = alpha * (x(i)) * (y(i)) + beta * z(i)
|
||||
end do
|
||||
else
|
||||
!$acc parallel loop present(x,y,z)
|
||||
do i = 1, n
|
||||
z(i) = alpha * (x(i)) * (y(i)) + beta * z(i)
|
||||
end do
|
||||
end if
|
||||
end subroutine d_inner_oacc_mlt_v_2
|
||||
end subroutine psb_d_oacc_mlt_v_2
|
||||
|
@ -0,0 +1,29 @@
|
||||
submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_allocate_mnnz_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_s_oacc_csr_allocate_mnnz(m, n, a, nz)
|
||||
implicit none
|
||||
integer(psb_ipk_), intent(in) :: m, n
|
||||
class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
|
||||
integer(psb_ipk_), intent(in), optional :: nz
|
||||
integer(psb_ipk_) :: info
|
||||
integer(psb_ipk_) :: err_act, nz_
|
||||
character(len=20) :: name='allocate_mnz'
|
||||
logical, parameter :: debug=.false.
|
||||
|
||||
call psb_erractionsave(err_act)
|
||||
info = psb_success_
|
||||
|
||||
call a%psb_s_csr_sparse_mat%allocate(m, n, nz)
|
||||
call a%set_host()
|
||||
call a%sync_dev_space()
|
||||
if (info /= 0) goto 9999
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
return
|
||||
|
||||
end subroutine psb_s_oacc_csr_allocate_mnnz
|
||||
end submodule psb_s_oacc_csr_allocate_mnnz_impl
|
@ -0,0 +1,27 @@
|
||||
submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_cp_from_coo_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_s_oacc_csr_cp_from_coo(a, b, info)
|
||||
implicit none
|
||||
|
||||
class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
|
||||
class(psb_s_coo_sparse_mat), intent(in) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
info = psb_success_
|
||||
|
||||
call a%free_dev_space()
|
||||
call a%psb_s_csr_sparse_mat%cp_from_coo(b, info)
|
||||
if (info /= 0) goto 9999
|
||||
call a%sync_dev_space()
|
||||
call a%set_host()
|
||||
call a%sync()
|
||||
|
||||
return
|
||||
|
||||
9999 continue
|
||||
info = psb_err_alloc_dealloc_
|
||||
return
|
||||
|
||||
end subroutine psb_s_oacc_csr_cp_from_coo
|
||||
end submodule psb_s_oacc_csr_cp_from_coo_impl
|
@ -0,0 +1,26 @@
|
||||
submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_cp_from_fmt_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_s_oacc_csr_cp_from_fmt(a, b, info)
|
||||
implicit none
|
||||
|
||||
class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
|
||||
class(psb_s_base_sparse_mat), intent(in) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
info = psb_success_
|
||||
|
||||
select type(b)
|
||||
type is (psb_s_coo_sparse_mat)
|
||||
call a%cp_from_coo(b, info)
|
||||
class default
|
||||
call a%free_dev_space()
|
||||
call a%psb_s_csr_sparse_mat%cp_from_fmt(b, info)
|
||||
if (info /= 0) return
|
||||
call a%sync_dev_space()
|
||||
call a%set_host()
|
||||
call a%sync()
|
||||
end select
|
||||
|
||||
end subroutine psb_s_oacc_csr_cp_from_fmt
|
||||
end submodule psb_s_oacc_csr_cp_from_fmt_impl
|
@ -0,0 +1,83 @@
|
||||
submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_inner_vect_sv_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_s_oacc_csr_inner_vect_sv(alpha, a, x, beta, y, info, trans)
|
||||
implicit none
|
||||
class(psb_s_oacc_csr_sparse_mat), intent(in) :: a
|
||||
real(psb_spk_), intent(in) :: alpha, beta
|
||||
class(psb_s_base_vect_type), intent(inout) :: x, y
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character, optional, intent(in) :: trans
|
||||
|
||||
real(psb_spk_), allocatable :: rx(:), ry(:)
|
||||
logical :: tra
|
||||
character :: trans_
|
||||
integer(psb_ipk_) :: err_act
|
||||
character(len=20) :: name = 's_oacc_csr_inner_vect_sv'
|
||||
logical, parameter :: debug = .false.
|
||||
integer(psb_ipk_) :: i
|
||||
|
||||
call psb_get_erraction(err_act)
|
||||
info = psb_success_
|
||||
|
||||
if (present(trans)) then
|
||||
trans_ = trans
|
||||
else
|
||||
trans_ = 'N'
|
||||
end if
|
||||
|
||||
if (.not.a%is_asb()) then
|
||||
info = psb_err_invalid_mat_state_
|
||||
call psb_errpush(info, name)
|
||||
goto 9999
|
||||
endif
|
||||
|
||||
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
|
||||
|
||||
if (tra .or. (beta /= dzero)) then
|
||||
call x%sync()
|
||||
call y%sync()
|
||||
call a%psb_s_csr_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans)
|
||||
call y%set_host()
|
||||
else
|
||||
select type (xx => x)
|
||||
type is (psb_s_vect_oacc)
|
||||
select type(yy => y)
|
||||
type is (psb_s_vect_oacc)
|
||||
if (xx%is_host()) call xx%sync()
|
||||
if (beta /= dzero) then
|
||||
if (yy%is_host()) call yy%sync()
|
||||
end if
|
||||
!$acc parallel loop present(a, xx, yy)
|
||||
do i = 1, size(a%val)
|
||||
yy%v(i) = alpha * a%val(i) * xx%v(a%ja(i)) + beta * yy%v(i)
|
||||
end do
|
||||
call yy%set_dev()
|
||||
class default
|
||||
rx = xx%get_vect()
|
||||
ry = y%get_vect()
|
||||
call a%psb_s_csr_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
|
||||
call y%bld(ry)
|
||||
end select
|
||||
class default
|
||||
rx = x%get_vect()
|
||||
ry = y%get_vect()
|
||||
call a%psb_s_csr_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
|
||||
call y%bld(ry)
|
||||
end select
|
||||
endif
|
||||
|
||||
if (info /= psb_success_) then
|
||||
info = psb_err_from_subroutine_
|
||||
call psb_errpush(info, name, a_err = 'csrg_vect_sv')
|
||||
goto 9999
|
||||
endif
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
return
|
||||
end subroutine psb_s_oacc_csr_inner_vect_sv
|
||||
end submodule psb_s_oacc_csr_inner_vect_sv_impl
|
||||
|
@ -0,0 +1,35 @@
|
||||
submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_mold_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_s_oacc_csr_mold(a, b, info)
|
||||
implicit none
|
||||
class(psb_s_oacc_csr_sparse_mat), intent(in) :: a
|
||||
class(psb_s_base_sparse_mat), intent(inout), allocatable :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
integer(psb_ipk_) :: err_act
|
||||
character(len=20) :: name='csr_mold'
|
||||
logical, parameter :: debug=.false.
|
||||
|
||||
call psb_get_erraction(err_act)
|
||||
|
||||
info = 0
|
||||
if (allocated(b)) then
|
||||
call b%free()
|
||||
deallocate(b, stat=info)
|
||||
end if
|
||||
if (info == 0) allocate(psb_s_oacc_csr_sparse_mat :: b, stat=info)
|
||||
|
||||
if (info /= psb_success_) then
|
||||
info = psb_err_alloc_dealloc_
|
||||
call psb_errpush(info, name)
|
||||
goto 9999
|
||||
end if
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
|
||||
return
|
||||
|
||||
end subroutine psb_s_oacc_csr_mold
|
||||
end submodule psb_s_oacc_csr_mold_impl
|
||||
|
@ -0,0 +1,27 @@
|
||||
submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_mv_from_coo_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_s_oacc_csr_mv_from_coo(a, b, info)
|
||||
implicit none
|
||||
|
||||
class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
|
||||
class(psb_s_coo_sparse_mat), intent(inout) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
info = psb_success_
|
||||
|
||||
call a%free_dev_space()
|
||||
call a%psb_s_csr_sparse_mat%mv_from_coo(b, info)
|
||||
if (info /= 0) goto 9999
|
||||
call a%sync_dev_space()
|
||||
call a%set_host()
|
||||
call a%sync()
|
||||
|
||||
return
|
||||
|
||||
9999 continue
|
||||
info = psb_err_alloc_dealloc_
|
||||
return
|
||||
|
||||
end subroutine psb_s_oacc_csr_mv_from_coo
|
||||
end submodule psb_s_oacc_csr_mv_from_coo_impl
|
@ -0,0 +1,26 @@
|
||||
submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_mv_from_fmt_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_s_oacc_csr_mv_from_fmt(a, b, info)
|
||||
implicit none
|
||||
|
||||
class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
|
||||
class(psb_s_base_sparse_mat), intent(inout) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
info = psb_success_
|
||||
|
||||
select type(b)
|
||||
type is (psb_s_coo_sparse_mat)
|
||||
call a%mv_from_coo(b, info)
|
||||
class default
|
||||
call a%free_dev_space()
|
||||
call a%psb_s_csr_sparse_mat%mv_from_fmt(b, info)
|
||||
if (info /= 0) return
|
||||
call a%sync_dev_space()
|
||||
call a%set_host()
|
||||
call a%sync()
|
||||
end select
|
||||
|
||||
end subroutine psb_s_oacc_csr_mv_from_fmt
|
||||
end submodule psb_s_oacc_csr_mv_from_fmt_impl
|
@ -0,0 +1,28 @@
|
||||
submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_reallocate_nz_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_s_oacc_csr_reallocate_nz(nz, a)
|
||||
implicit none
|
||||
integer(psb_ipk_), intent(in) :: nz
|
||||
class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
|
||||
integer(psb_ipk_) :: info
|
||||
integer(psb_ipk_) :: err_act
|
||||
character(len=20) :: name='s_oacc_csr_reallocate_nz'
|
||||
logical, parameter :: debug=.false.
|
||||
|
||||
call psb_erractionsave(err_act)
|
||||
info = psb_success_
|
||||
|
||||
call a%psb_s_csr_sparse_mat%reallocate(nz)
|
||||
call a%sync_dev_space()
|
||||
call a%set_host()
|
||||
if (info /= 0) goto 9999
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
return
|
||||
|
||||
end subroutine psb_s_oacc_csr_reallocate_nz
|
||||
end submodule psb_s_oacc_csr_reallocate_nz_impl
|
@ -0,0 +1,53 @@
|
||||
submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_scal_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_s_oacc_csr_scal(d, a, info, side)
|
||||
implicit none
|
||||
class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
|
||||
real(psb_spk_), intent(in) :: d(:)
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character, intent(in), optional :: side
|
||||
|
||||
integer(psb_ipk_) :: err_act
|
||||
character(len=20) :: name='scal'
|
||||
logical, parameter :: debug=.false.
|
||||
integer(psb_ipk_) :: i, j
|
||||
|
||||
info = psb_success_
|
||||
call psb_erractionsave(err_act)
|
||||
|
||||
if (a%is_host()) call a%sync()
|
||||
|
||||
if (present(side)) then
|
||||
if (side == 'L') then
|
||||
!$acc parallel loop present(a, d)
|
||||
do i = 1, a%get_nrows()
|
||||
do j = a%irp(i), a%irp(i+1) - 1
|
||||
a%val(j) = a%val(j) * d(i)
|
||||
end do
|
||||
end do
|
||||
else if (side == 'R') then
|
||||
!$acc parallel loop present(a, d)
|
||||
do i = 1, a%get_ncols()
|
||||
do j = a%irp(i), a%irp(i+1) - 1
|
||||
a%val(j) = a%val(j) * d(a%ja(j))
|
||||
end do
|
||||
end do
|
||||
end if
|
||||
else
|
||||
!$acc parallel loop present(a, d)
|
||||
do i = 1, size(a%val)
|
||||
a%val(i) = a%val(i) * d(i)
|
||||
end do
|
||||
end if
|
||||
|
||||
call a%set_dev()
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
return
|
||||
|
||||
end subroutine psb_s_oacc_csr_scal
|
||||
end submodule psb_s_oacc_csr_scal_impl
|
@ -0,0 +1,34 @@
|
||||
submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_scals_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_s_oacc_csr_scals(d, a, info)
|
||||
implicit none
|
||||
class(psb_s_oacc_csr_sparse_mat), intent(inout) :: a
|
||||
real(psb_spk_), intent(in) :: d
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
integer(psb_ipk_) :: err_act
|
||||
character(len=20) :: name='scal'
|
||||
logical, parameter :: debug=.false.
|
||||
integer(psb_ipk_) :: i
|
||||
|
||||
info = psb_success_
|
||||
call psb_erractionsave(err_act)
|
||||
|
||||
if (a%is_host()) call a%sync()
|
||||
|
||||
!$acc parallel loop present(a)
|
||||
do i = 1, size(a%val)
|
||||
a%val(i) = a%val(i) * d
|
||||
end do
|
||||
|
||||
call a%set_dev()
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
return
|
||||
|
||||
end subroutine psb_s_oacc_csr_scals
|
||||
end submodule psb_s_oacc_csr_scals_impl
|
@ -0,0 +1,86 @@
|
||||
submodule (psb_s_oacc_csr_mat_mod) psb_s_oacc_csr_vect_mv_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_s_oacc_csr_vect_mv(alpha, a, x, beta, y, info, trans)
|
||||
implicit none
|
||||
|
||||
real(psb_spk_), intent(in) :: alpha, beta
|
||||
class(psb_s_oacc_csr_sparse_mat), intent(in) :: a
|
||||
class(psb_s_base_vect_type), intent(inout) :: x, y
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character, optional, intent(in) :: trans
|
||||
|
||||
integer(psb_ipk_) :: m, n
|
||||
character :: trans_
|
||||
logical :: device_done, tra
|
||||
|
||||
info = psb_success_
|
||||
m = a%get_nrows()
|
||||
n = a%get_ncols()
|
||||
|
||||
if ((n > size(x%v)) .or. (m > size(y%v))) then
|
||||
write(0,*) 'ocsrmv Size error ', m, n, size(x%v), size(y%v)
|
||||
info = psb_err_invalid_mat_state_
|
||||
return
|
||||
end if
|
||||
device_done = .false.
|
||||
if (present(trans)) then
|
||||
trans_ = trans
|
||||
else
|
||||
trans_ = 'N'
|
||||
end if
|
||||
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
|
||||
|
||||
if (.not.tra) then
|
||||
select type(xx => x)
|
||||
class is (psb_s_vect_oacc)
|
||||
select type (yy => y)
|
||||
class is (psb_s_vect_oacc)
|
||||
if (a%is_host()) call a%sync()
|
||||
if (xx%is_host()) call xx%sync()
|
||||
if (yy%is_host()) call yy%sync()
|
||||
call inner_spmv(m, n, alpha, a%val, a%ja, a%irp, x%v, beta, y%v, info)
|
||||
call y%set_dev()
|
||||
device_done = .true.
|
||||
end select
|
||||
end select
|
||||
end if
|
||||
|
||||
if (.not.device_done) then
|
||||
if (x%is_dev()) call x%sync()
|
||||
if (y%is_dev()) call y%sync()
|
||||
call a%psb_s_csr_sparse_mat%spmm(alpha, x%v, beta, y%v, info, trans)
|
||||
call y%set_host()
|
||||
end if
|
||||
contains
|
||||
|
||||
subroutine inner_spmv(m, n, alpha, val, ja, irp, x, beta, y, info)
|
||||
implicit none
|
||||
integer(psb_ipk_) :: m, n
|
||||
real(psb_spk_), intent(in) :: alpha, beta
|
||||
real(psb_spk_) :: val(:), x(:), y(:)
|
||||
integer(psb_ipk_) :: ja(:), irp(:)
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
integer(psb_ipk_) :: i, j, ii, isz
|
||||
real(psb_spk_) :: tmp
|
||||
integer(psb_ipk_), parameter :: vsz = 256
|
||||
|
||||
info = 0
|
||||
|
||||
!$acc parallel loop vector_length(vsz) private(isz)
|
||||
do ii = 1, m, vsz
|
||||
isz = min(vsz, m - ii + 1)
|
||||
!$acc loop independent private(tmp)
|
||||
do i = ii, ii + isz - 1
|
||||
tmp = 0.0_psb_dpk_
|
||||
!$acc loop seq
|
||||
do j = irp(i), irp(i + 1) - 1
|
||||
tmp = tmp + val(j) * x(ja(j))
|
||||
end do
|
||||
y(i) = alpha * tmp + beta * y(i)
|
||||
end do
|
||||
end do
|
||||
end subroutine inner_spmv
|
||||
|
||||
end subroutine psb_s_oacc_csr_vect_mv
|
||||
end submodule psb_s_oacc_csr_vect_mv_impl
|
@ -0,0 +1,35 @@
|
||||
submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_allocate_mnnz_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_s_oacc_ell_allocate_mnnz(m, n, a, nz)
|
||||
implicit none
|
||||
integer(psb_ipk_), intent(in) :: m, n
|
||||
class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
|
||||
integer(psb_ipk_), intent(in), optional :: nz
|
||||
integer(psb_ipk_) :: info
|
||||
integer(psb_ipk_) :: err_act, nz_
|
||||
character(len=20) :: name='allocate_mnnz'
|
||||
logical, parameter :: debug=.false.
|
||||
|
||||
call psb_erractionsave(err_act)
|
||||
info = psb_success_
|
||||
|
||||
if (present(nz)) then
|
||||
nz_ = nz
|
||||
else
|
||||
nz_ = 10
|
||||
end if
|
||||
|
||||
call a%psb_s_ell_sparse_mat%allocate(m, n, nz_)
|
||||
call a%sync_dev_space()
|
||||
call a%set_host()
|
||||
if (info /= 0) goto 9999
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
return
|
||||
|
||||
end subroutine psb_s_oacc_ell_allocate_mnnz
|
||||
end submodule psb_s_oacc_ell_allocate_mnnz_impl
|
@ -0,0 +1,27 @@
|
||||
submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_cp_from_coo_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_s_oacc_ell_cp_from_coo(a, b, info)
|
||||
implicit none
|
||||
|
||||
class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
|
||||
class(psb_s_coo_sparse_mat), intent(in) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
|
||||
info = psb_success_
|
||||
call a%free_dev_space()
|
||||
call a%psb_s_ell_sparse_mat%cp_from_coo(b, info)
|
||||
if (info /= 0) goto 9999
|
||||
call a%sync_dev_space()
|
||||
call a%set_host()
|
||||
call a%sync()
|
||||
|
||||
return
|
||||
|
||||
9999 continue
|
||||
info = psb_err_alloc_dealloc_
|
||||
return
|
||||
|
||||
end subroutine psb_s_oacc_ell_cp_from_coo
|
||||
end submodule psb_s_oacc_ell_cp_from_coo_impl
|
@ -0,0 +1,26 @@
|
||||
submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_cp_from_fmt_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_s_oacc_ell_cp_from_fmt(a, b, info)
|
||||
implicit none
|
||||
|
||||
class(psb_s_oacc_ell_sparse_mat), intent(inout) :: a
|
||||
class(psb_s_base_sparse_mat), intent(in) :: b
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
|
||||
info = psb_success_
|
||||
|
||||
select type(b)
|
||||
type is (psb_s_coo_sparse_mat)
|
||||
call a%cp_from_coo(b, info)
|
||||
class default
|
||||
call a%free_dev_space()
|
||||
call a%psb_s_ell_sparse_mat%cp_from_fmt(b, info)
|
||||
if (info /= 0) return
|
||||
call a%sync_dev_space()
|
||||
call a%set_host()
|
||||
call a%sync()
|
||||
end select
|
||||
|
||||
end subroutine psb_s_oacc_ell_cp_from_fmt
|
||||
end submodule psb_s_oacc_ell_cp_from_fmt_impl
|
@ -0,0 +1,85 @@
|
||||
submodule (psb_s_oacc_ell_mat_mod) psb_s_oacc_ell_inner_vect_sv_impl
|
||||
use psb_base_mod
|
||||
contains
|
||||
module subroutine psb_s_oacc_ell_inner_vect_sv(alpha, a, x, beta, y, info, trans)
|
||||
implicit none
|
||||
class(psb_s_oacc_ell_sparse_mat), intent(in) :: a
|
||||
real(psb_spk_), intent(in) :: alpha, beta
|
||||
class(psb_s_base_vect_type), intent(inout) :: x, y
|
||||
integer(psb_ipk_), intent(out) :: info
|
||||
character, optional, intent(in) :: trans
|
||||
|
||||
real(psb_spk_), allocatable :: rx(:), ry(:)
|
||||
logical :: tra
|
||||
character :: trans_
|
||||
integer(psb_ipk_) :: err_act
|
||||
character(len=20) :: name = 's_oacc_ell_inner_vect_sv'
|
||||
logical, parameter :: debug = .false.
|
||||
integer(psb_ipk_) :: i, j, nzt
|
||||
|
||||
call psb_get_erraction(err_act)
|
||||
info = psb_success_
|
||||
|
||||
if (present(trans)) then
|
||||
trans_ = trans
|
||||
else
|
||||
trans_ = 'N'
|
||||
end if
|
||||
|
||||
if (.not.a%is_asb()) then
|
||||
info = psb_err_invalid_mat_state_
|
||||
call psb_errpush(info, name)
|
||||
goto 9999
|
||||
endif
|
||||
|
||||
tra = (psb_toupper(trans_) == 'T') .or. (psb_toupper(trans_) == 'C')
|
||||
|
||||
if (tra .or. (beta /= dzero)) then
|
||||
call x%sync()
|
||||
call y%sync()
|
||||
call a%psb_s_ell_sparse_mat%inner_spsm(alpha, x, beta, y, info, trans)
|
||||
call y%set_host()
|
||||
else
|
||||
select type (xx => x)
|
||||
type is (psb_s_vect_oacc)
|
||||
select type(yy => y)
|
||||
type is (psb_s_vect_oacc)
|
||||
if (xx%is_host()) call xx%sync()
|
||||
if (beta /= dzero) then
|
||||
if (yy%is_host()) call yy%sync()
|
||||
end if
|
||||
nzt = a%nzt
|
||||
!$acc parallel loop present(a, xx, yy)
|
||||
do i = 1, size(a%val, 1)
|
||||
do j = 1, nzt
|
||||
yy%v(i) = alpha * a%val(i, j) * xx%v(a%ja(i, j)) + beta * yy%v(i)
|
||||
end do
|
||||
end do
|
||||
call yy%set_dev()
|
||||
class default
|
||||
rx = xx%get_vect()
|
||||
ry = y%get_vect()
|
||||
call a%psb_s_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
|
||||
call y%bld(ry)
|
||||
end select
|
||||
class default
|
||||
rx = x%get_vect()
|
||||
ry = y%get_vect()
|
||||
call a%psb_s_ell_sparse_mat%inner_spsm(alpha, rx, beta, ry, info)
|
||||
call y%bld(ry)
|
||||
end select
|
||||
endif
|
||||
|
||||
if (info /= psb_success_) then
|
||||
info = psb_err_from_subroutine_
|
||||
call psb_errpush(info, name, a_err = 'ell_vect_sv')
|
||||
goto 9999
|
||||
endif
|
||||
|
||||
call psb_erractionrestore(err_act)
|
||||
return
|
||||
|
||||
9999 call psb_error_handler(err_act)
|
||||
return
|
||||
end subroutine psb_s_oacc_ell_inner_vect_sv
|
||||
end submodule psb_s_oacc_ell_inner_vect_sv_impl
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue