Merge branch 'cmake2' into cmake

cmake
Luca Pepè Sciarria 2 months ago
commit ef71a32484

@ -0,0 +1,12 @@
$Format:%d%n%n$
# Fall back version, probably last release:
3.9.0
# PSBLAS version file.
#
# Release archive created from commit:
# $Format:%H %d$
# $Format:Created on %ci by %cN, and$
# $Format:signed by %GS using %GK.$
# $Format:Signature status: %G?$
$Format:%GG$

1
.gitignore vendored

@ -1,6 +1,7 @@
*.a
*.o
*.mod
*.smod
*~
# header files generated

@ -430,7 +430,11 @@ endif()
#---------------------------------------
# Link order, left to right:
<<<<<<< HEAD
# cbind.a, util.a krylov.a prec.a base.a
=======
# cbind.a, util.a linsolve.a prec.a base.a
>>>>>>> cmake2
include(${CMAKE_CURRENT_LIST_DIR}/base/CMakeLists.txt)
@ -515,6 +519,7 @@ target_include_directories(prec PUBLIC
$<INSTALL_INTERFACE:modules>)
target_link_libraries(prec PUBLIC base)
<<<<<<< HEAD
include(${CMAKE_CURRENT_LIST_DIR}/krylov/CMakeLists.txt)
add_library(krylov ${krylov_source_files})
set_target_properties(krylov
@ -528,6 +533,21 @@ target_include_directories(krylov PUBLIC
$<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/modules>
$<INSTALL_INTERFACE:modules>)
target_link_libraries(krylov PUBLIC base prec)
=======
include(${CMAKE_CURRENT_LIST_DIR}/linsolve/CMakeLists.txt)
add_library(linsolve ${linsolve_source_files})
set_target_properties(linsolve
PROPERTIES
Fortran_MODULE_DIRECTORY "${CMAKE_BINARY_DIR}/modules"
POSITION_INDEPENDENT_CODE TRUE
OUTPUT_NAME psb_linsolve
LINKER_LANGUAGE Fortran
)
target_include_directories(linsolve PUBLIC
$<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/modules>
$<INSTALL_INTERFACE:modules>)
target_link_libraries(linsolve PUBLIC base prec)
>>>>>>> cmake2
include(${CMAKE_CURRENT_LIST_DIR}/ext/CMakeLists.txt)
@ -644,7 +664,11 @@ target_include_directories(cbind PUBLIC
$<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/modules> # Path for building
$<INSTALL_INTERFACE:modules> # Path for installation
)
<<<<<<< HEAD
target_link_libraries(cbind PUBLIC base prec krylov ext util)
=======
target_link_libraries(cbind PUBLIC base prec linsolve ext util)
>>>>>>> cmake2
# Custom command to copy all header files
@ -706,13 +730,21 @@ if(MPI_FOUND)
endif()
<<<<<<< HEAD
foreach(lib base prec krylov ext util cbind)
=======
foreach(lib base prec linsolve ext util cbind)
>>>>>>> cmake2
target_link_libraries(${lib} PUBLIC ${MPI_C_LIBRARIES} ${MPI_Fortran_LIBRARIES})
endforeach()
endif()
if(OpenCoarrays_FOUND)
<<<<<<< HEAD
foreach(lib base prec krylov ext util cbind) #TODO: check if cbind goes here!
=======
foreach(lib base prec linsolve ext util cbind) #TODO: check if cbind goes here!
>>>>>>> cmake2
target_link_libraries(${lib} PUBLIC OpenCoarrays::caf_mpi_static)
endforeach()
endif()
@ -731,7 +763,11 @@ install(DIRECTORY "${CMAKE_BINARY_DIR}/include" DESTINATION "include"
install(DIRECTORY "${CMAKE_BINARY_DIR}/modules" DESTINATION "modules"
FILES_MATCHING PATTERN "*.mod")
<<<<<<< HEAD
install(TARGETS base prec krylov ext util cbind
=======
install(TARGETS base prec linsolve ext util cbind
>>>>>>> cmake2
EXPORT ${CMAKE_PROJECT_NAME}-targets
DESTINATION "${CMAKE_INSTALL_LIBDIR}"
LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}"

@ -67,6 +67,12 @@ UTILMODNAME=@UTILMODNAME@
CBINDLIBNAME=libpsb_cbind.a
OACCD=@OACCD@
OACCLD=@OACCLD@
FCOPENACC=@FCOPENACC@
CCOPENACC=@CCOPENACC@
CXXOPENACC=@CXXOPENACC@
CUDAD=@CUDAD@
CUDALD=@CUDALD@
LCUDA=@LCUDA@
@ -74,14 +80,18 @@ LCUDA=@LCUDA@
SPGPU_LIBS=@SPGPU_LIBS@
CUDA_DIR=@CUDA_DIR@
CUDA_DEFINES=@CUDA_DEFINES@
CUDA_INCLUDES=@CUDA_INCLUDES@
CUDA_LIBS=@CUDA_LIBS@
CUDA_VERSION=@CUDA_VERSION@
CUDA_SHORT_VERSION=@CUDA_SHORT_VERSION@
CUDA_DEFINES=@CUDA_DEFINES@
FCUDEFINES=@FCUDEFINES@
CCUDEFINES=@CCUDEFINES@
CXXCUDEFINES=@CXXCUDEFINES@
NVCC=@CUDA_NVCC@
CUDEFINES=@CUDEFINES@
.SUFFIXES: .cu
.cu.o:
$(NVCC) $(CINCLUDES) $(CDEFINES) $(CUDEFINES) -c $<

@ -1,6 +1,6 @@
include Make.inc
all: dirs based precd kryld utild cbindd extd $(CUDAD) libd
all: dirs based precd linslvd utild cbindd extd $(CUDAD) $(OACCD) libd
@echo "====================================="
@echo "PSBLAS libraries Compilation Successful."
@ -11,28 +11,31 @@ dirs:
precd: based
utild: based
kryld: precd
linslvd: precd
extd: based
cudad: extd
cbindd: based precd kryld utild
oaccd: extd
cbindd: based precd linslvd utild
libd: based precd kryld utild cbindd extd $(CUDALD)
libd: based precd linslvd utild cbindd extd $(CUDALD) $(OACCLD)
$(MAKE) -C base lib
$(MAKE) -C prec lib
$(MAKE) -C krylov lib
$(MAKE) -C linsolve lib
$(MAKE) -C util lib
$(MAKE) -C cbind lib
$(MAKE) -C ext lib
cudald: cudad
$(MAKE) -C cuda lib
oaccld: oaccd
$(MAKE) -C openacc lib
based:
$(MAKE) -C base objs
precd:
$(MAKE) -C prec objs
kryld:
$(MAKE) -C krylov objs
linslvd:
$(MAKE) -C linsolve objs
utild:
$(MAKE) -C util objs
cbindd:
@ -41,6 +44,8 @@ extd: based
$(MAKE) -C ext objs
cudad: based extd
$(MAKE) -C cuda objs
oaccd: based extd
$(MAKE) -C openacc objs
install: all
@ -62,11 +67,12 @@ install: all
clean:
$(MAKE) -C base clean
$(MAKE) -C prec clean
$(MAKE) -C krylov clean
$(MAKE) -C linsolve clean
$(MAKE) -C util clean
$(MAKE) -C cbind clean
$(MAKE) -C ext clean
$(MAKE) -C cuda clean
$(MAKE) -C openacc clean
check: all
make check -C test/serial
@ -79,11 +85,12 @@ cleanlib:
veryclean: cleanlib
cd base && $(MAKE) veryclean
cd prec && $(MAKE) veryclean
cd krylov && $(MAKE) veryclean
cd linsolve && $(MAKE) veryclean
cd util && $(MAKE) veryclean
cd cbind && $(MAKE) veryclean
cd ext && $(MAKE) veryclean
cd cuda && $(MAKE) veryclean
cd openacc && $(MAKE) veryclean
cd test/fileread && $(MAKE) clean
cd test/pargen && $(MAKE) clean
cd test/util && $(MAKE) clean

@ -1,37 +1,42 @@
PSBLAS library, version 3.9
===========================
# PSBLAS library, version 3.9
The architecture of the Fortran 2003 sparse BLAS is described in:
The PSBLAS library, developed with the aim to facilitate the parallelization of computationally intensive scientific applications, is designed to address parallel implementation of iterative solvers for sparse linear systems through the distributed memory paradigm. It includes routines for multiplying sparse matrices by dense matrices, solving block diagonal systems with triangular diagonal entries, preprocessing sparse matrices, and contains additional routines for dense matrix operations. The current implementation of PSBLAS addresses a distributed memory execution model operating with message passing.
The PSBLAS library version 3 is implemented in the Fortran 2008 programming language, with reuse and/or adaptation of existing Fortran 77 and Fortran 95 software, plus a handful of C routines.
## References
The architecture, philosophy and implementation details of the library are contained in the following papers:
- The architecture of the Fortran 2003 sparse BLAS is described in:
>S. Filippone, A. Buttari. Object-Oriented Techniques for Sparse Matrix
>Computations in Fortran 2003, ACM Trans. on Math. Software, vol. 38, No.
4, 2012.
The ideas are explored further with the paper:
- The ideas are explored further with the paper:
>V. Cardellini, S. Filippone and D. Rouson. Design Patterns for
>sparse-matrix computations on hybrid CPU/GPU platforms, Scientific
>Programming, 22(2014), pp.1-19.
Version 1.0 of the library is described in:
- Version 1.0 of the library is described in:
>S. Filippone, M. Colajanni. PSBLAS: A library for parallel linear
>algebra computation on sparse matrices, ACM Trans. on Math. Software,
>26(4), Dec. 2000, pp. 527-550.
- The software infrastructure changes required to accommodate the implementation of the
Additive-Schwarz preconditioners available in [AMG4PSBLAS](https://github.com/sfilippone/amg4psblas/) are detailed in:
> A. Buttari, P. D'Ambra, D. di Serafino, S. Filippone, Extending PSBLAS to build parallel Schwarz preconditioners, Applied Parallel Computing. State of the Art in Scientific Computing: 7th International Workshop, PARA 2004, LNCS 3732, 2006, pp. 593-602.
> A. Buttari, P. D'Ambra, D. Di Serafino, S. Filippone, 2LEV-D2P4: A package of high-performance preconditioners for scientific and engineering applications, Applicable Algebra in Engineering, Communications and Computing, 2007, 18(3), pp. 223-239.
UTILITIES
---------
The `test/util` directory contains some utilities to convert to/from
Harwell-Boeing and MatrixMarket file formats.
> P. D'Ambra, D. Di Serafino, S. Filippone, MLD2P4: A package of parallel algebraic multilevel domain decomposition preconditioners in Fortran 95 ACM Transactions on Mathematical Software, 2010, 37(3), 30
PSBLAS is the backbone of the Parallel Sparse Computation Toolkit ([PSCToolkit](https://psctoolkit.github.io/)) suite of libraries. See the paper:
> DAmbra, P., Durastante, F., & Filippone, S. (2023). Parallel Sparse Computation Toolkit. Software Impacts, 15, 100463.
DOCUMENTATION
-------------
See docs/psblas-3.9.pdf; an HTML version of the same document is
available in docs/html. Please consult the sample programs, especially
test/pargen/psb_[sd]_pde[23]d.f90
### Other Software credits
OTHER SOFTWARE CREDITS
----------------------
We originally included a modified implementation of some of the Sparker
(serial sparse BLAS) material; this has been completely rewritten, way
beyond the intention(s) and responsibilities of the original developers.
@ -40,18 +45,8 @@ The main reference for the serial sparse BLAS is:
>linear algebra subprograms for sparse matrices: a user level interface,
>ACM Trans. Math. Softw., 23(3), 379-401, 1997.
CUDA and GPU support
--------------------
This version of PSBLAS incorporates into a single package three
entities that were previouslty separated:
1. PSBLAS -- the base library
2. PSBLAS-EXT -- a library providing additional storage formats
3. SPGPU -- a package of kernels for NVIDIA GPUs originally
written by Davide Barbieri and Salvatore Filippone;
see the license file cuda/License-spgpu.md
INSTALLING
----------
## Installing
To compile and run our software you will need the following
prerequisites (see also SERIAL below):
@ -71,7 +66,8 @@ prerequisites (see also SERIAL below):
We use the C interface to AMD.
5. If you have CUDA available, use
--with-cuda=<path> to specify the CUDA toolkit location
--enable-cuda to compile CUDA-enabled methods
--with-cudadir=<path> to specify the CUDA toolkit location
--with-cudacc=XX,YY,ZZ to specify a list of target CCs (compute
capabilities) to compile the CUDA code for.
@ -79,21 +75,27 @@ The configure script will generate a Make.inc file suitable for building
the library. The script is capable of recognizing the needed libraries
with their default names; if they are in unusual places consider adding
the paths with `--with-libs`, or explicitly specifying the names in
`--with-blas`, etc. Please note that a common way for the configure script
to fail is to specify inconsistent MPI vs. plain compilers, either
directly or indirectly via environment variables; e.g. specifying the
Intel compiler with `FC=ifort` while at the same time having an
`MPIFC=mpif90` which points to GNU Fortran. The best way to avoid this
situation is (in our opinion) to use the environment modules package
(see http://modules.sourceforge.net/), and load the relevant
variables with (e.g.)
```
module load gnu46 openmpi
```
This will delegate to the modules setup to make sure that the version of
openmpi in use is the one compiled with the gnu46 compilers. After the
configure script has completed you can always tweak the Make.inc file
yourself.
`--with-blas`, etc.
>[!CAUTION]
> Please note that a common way for the configure script
> to fail is to specify inconsistent MPI vs. plain compilers, either
> directly or indirectly via environment variables; e.g. specifying the
> Intel compiler with `FC=ifort` while at the same time having an
> `MPIFC=mpif90` which points to GNU Fortran.
>[!TIP]
> The best way to avoid this
> situation is (in our opinion) to use the environment modules package
> (see [http://modules.sourceforge.net/](http://modules.sourceforge.net/)), and load the relevant
> variables with (e.g.)
> ```
> module load gcc/13.2.0 openmpi/4.1.6
> ```
> This will delegate to the modules setup to make sure that the version of
> openmpi in use is the one compiled with the gnu46 compilers. After the
> configure script has completed you can always tweak the Make.inc file
> yourself.
After you have Make.inc fixed, run
```
@ -105,58 +107,117 @@ install and the libraries will be installed under `/path/lib`, while the
module files will be installed under `/path/modules`. The regular and
experimental C interface header files are under `/path/include`.
SERIAL
------
### CUDA and GPU support
This version of PSBLAS incorporates into a single package three
entities that were previouslty separated:
| Library | |
|---------|--------------------|
| PSBLAS | the base library |
| PSBLAS-EXT | a library providing additional storage formats for matrices and vectors |
| SPGPU | a package of kernels for NVIDIA GPUs originally written by Davide Barbieri and Salvatore Filippone; see the license file [cuda/License-spgpu.md](cuda/License-spgpu.md) |
### OpenACC
There is a highly experimental version of an OpenACC interface,
you can access it by speficifying
```bash
--enable-openacc --with-extraopenacc="-foffload=nvptx-none=-march=sm_70"
```
where the argument to the extraopenacc option depends on the compiler
you are using (the example shown here is relevant for the GNU
compiler).
### Serial
Configuring with `--enable-serial` will provide a fake MPI stub library
that enables running in pure serial mode; no MPI installation is needed
in this case (but note that the fake MPI stubs are only guaranteed to
cover what we use internally, it's not a complete replacement).
INTEGER SIZES
-------------
### Integers
We have two kind of integers: IPK for local indices, and LPK for
global indices. They can be specified independently at configure time,
e.g.
```bash
--with-ipk=4 --with-lpk=8
```
which is asking for 4-bytes local indices, and 8-bytes global indices
(this is the default).
## Documentation
TODO
----
Fix all reamining bugs. Bugs? We dont' have any ! ;-)
Further information on installation and configuration can be found in the documentation.
See [docs/psblas-3.9.pdf](docs/psblas-3.9.pdf); an HTML version of the same document is
available in docs/html. Please consult the sample programs, especially
- [test/pargen/psb_s_pde2d.F90](test/pargen/psb_s_pde2d.F90) [test/pargen/psb_d_pde2d.F90](test/pargen/psb_d_pde2d.F90)
- [test/pargen/psb_s_pde2d.F90](test/pargen/psb_s_pde3d.F90) [test/pargen/psb_d_pde2d.F90](test/pargen/psb_d_pde3d.F90)
which contain examples for the solution of linear systems obtained by the discretization of a generic second-order differential equation in two:
```math
- a_1 \frac{\partial^2 u}{\partial x^2}
- a_2 \frac{\partial^2 u}{\partial y^2}
+ b_1 \frac{\partial u}{\partial x}
+ b_2 \frac{\partial u}{\partial y}
+ c u = f
```
or three
```math
- a_1 \frac{\partial^2 u}{\partial x^2}
- a_2 \frac{\partial^2 u}{\partial y^2}
- a_3 \frac{\partial^2 u}{\partial z^2}
+ b_1 \frac{\partial u}{\partial x}
+ b_2 \frac{\partial u}{\partial y}
+ b_3 \frac{\partial u}{\partial z}
+ c u = f
```
dimensions on the unit square/cube with Dirichlet boundary conditions.
### Utilities
The PSBLAS team.
---------------
Project lead:
Salvatore Filippone
The [test/util](test/util) directory contains some utilities to convert to/from
Harwell-Boeing and MatrixMarket file formats.
Contributors (roughly reverse cronological order):
## TODO and bugs
Dimitri Walther
Andea Di Iorio
Stefano Petrilli
Soren Rasmussen
Zaak Beekman
Ambra Abdullahi Hassan
Pasqua D'Ambra
Alfredo Buttari
Daniela di Serafino
Michele Martone
Michele Colajanni
Fabio Cerioni
Stefano Maiolatesi
Dario Pascucci
- [ ] Improving OpenACC support
- [ ] Improving OpenMP support
- [X] Fix all reamining bugs. Bugs? We dont' have any ! 🤓
> [!NOTE]
> To report bugs 🐛 or issues ❓ please use the [GitHub issue system](https://github.com/sfilippone/psblas3/issues).
RELATED SOFTWARE
----------------
## The PSBLAS team.
**Project lead:**
Salvatore Filippone
**Contributors** (_roughly reverse cronological order_):
- Theophane Loloum
- Fabio Durastante
- Dimitri Walther
- Andea Di Iorio
- Stefano Petrilli
- Soren Rasmussen
- Zaak Beekman
- Ambra Abdullahi Hassan
- Pasqua D'Ambra
- Alfredo Buttari
- Daniela di Serafino
- Michele Martone
- Michele Colajanni
- Fabio Cerioni
- Stefano Maiolatesi
- Dario Pascucci
## RELATED SOFTWARE
If you are looking for more sophisticated preconditioners, you may be
interested in the package AMG4PSBLAS from
<http://github.com/sfilippone/amg4psblas>
<http://github.com/sfilippone/amg4psblas> and the whole [PSCTooolkit suite](https://psctoolkit.github.io/).
Contact: <https://github.com/sfilippone/psblas3>

@ -10,13 +10,14 @@ BASIC_MODS= psb_const_mod.o psb_cbind_const_mod.o psb_error_mod.o psb_realloc_mo
auxil/psb_z_realloc_mod.o
COMMINT= penv/psi_penv_mod.o \
penv/psi_p2p_mod.o penv/psi_m_p2p_mod.o \
penv/psi_p2p_mod.o penv/psi_m_p2p_mod.o penv/psi_i2_p2p_mod.o \
penv/psi_e_p2p_mod.o \
penv/psi_s_p2p_mod.o \
penv/psi_d_p2p_mod.o \
penv/psi_c_p2p_mod.o \
penv/psi_z_p2p_mod.o \
penv/psi_collective_mod.o \
penv/psi_i2_collective_mod.o \
penv/psi_e_collective_mod.o \
penv/psi_m_collective_mod.o \
penv/psi_s_collective_mod.o \
@ -149,23 +150,25 @@ psb_realloc_mod.o: auxil/psb_m_realloc_mod.o \
auxil/psb_c_realloc_mod.o \
auxil/psb_z_realloc_mod.o
penv/psi_p2p_mod.o: penv/psi_m_p2p_mod.o \
penv/psi_p2p_mod.o: penv/psi_i2_p2p_mod.o \
penv/psi_m_p2p_mod.o \
penv/psi_e_p2p_mod.o \
penv/psi_s_p2p_mod.o \
penv/psi_d_p2p_mod.o \
penv/psi_c_p2p_mod.o \
penv/psi_z_p2p_mod.o
penv/psi_collective_mod.o: penv/psi_e_collective_mod.o \
penv/psi_collective_mod.o: penv/psi_i2_collective_mod.o \
penv/psi_e_collective_mod.o \
penv/psi_m_collective_mod.o \
penv/psi_s_collective_mod.o \
penv/psi_d_collective_mod.o \
penv/psi_c_collective_mod.o \
penv/psi_z_collective_mod.o
penv/psi_m_p2p_mod.o penv/psi_e_p2p_mod.o penv/psi_s_p2p_mod.o \
penv/psi_i2_p2p_mod.o penv/psi_m_p2p_mod.o penv/psi_e_p2p_mod.o penv/psi_s_p2p_mod.o \
penv/psi_d_p2p_mod.o penv/psi_c_p2p_mod.o penv/psi_z_p2p_mod.o: penv/psi_penv_mod.o
penv/psi_e_collective_mod.o penv/psi_m_collective_mod.o penv/psi_s_collective_mod.o \
penv/psi_i2_collective_mod.o penv/psi_e_collective_mod.o penv/psi_m_collective_mod.o penv/psi_s_collective_mod.o \
penv/psi_d_collective_mod.o penv/psi_c_collective_mod.o penv/psi_z_collective_mod.o: penv/psi_penv_mod.o \
penv/psi_m_p2p_mod.o penv/psi_e_p2p_mod.o penv/psi_s_p2p_mod.o \
penv/psi_d_p2p_mod.o penv/psi_c_p2p_mod.o penv/psi_z_p2p_mod.o

@ -35,7 +35,7 @@
! Auxiliary module for descriptor: constant values.
!
module psb_desc_const_mod
use psb_const_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_
use psb_const_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_, psb_i2pk_
!
! Communication, prolongation & restriction
!

@ -167,6 +167,7 @@ module psi_penv_mod
#else
integer(psb_mpk_), save :: mpi_i2amx_op, mpi_i2amn_op
integer(psb_mpk_), save :: mpi_iamx_op, mpi_iamn_op
integer(psb_mpk_), save :: mpi_mamx_op, mpi_mamn_op
integer(psb_mpk_), save :: mpi_eamx_op, mpi_eamn_op
@ -181,6 +182,7 @@ module psi_penv_mod
#endif
private :: psi_get_sizes, psi_register_mpi_extras
private :: psi_i2amx_op, psi_i2amn_op
private :: psi_iamx_op, psi_iamn_op
private :: psi_mamx_op, psi_mamn_op
private :: psi_eamx_op, psi_eamn_op
@ -773,6 +775,8 @@ contains
#if defined(SERIAL_MPI)
#else
if (info == 0) call mpi_op_create(psi_i2amx_op,.true.,mpi_i2amx_op,info)
if (info == 0) call mpi_op_create(psi_i2amn_op,.true.,mpi_i2amn_op,info)
if (info == 0) call mpi_op_create(psi_mamx_op,.true.,mpi_mamx_op,info)
if (info == 0) call mpi_op_create(psi_mamn_op,.true.,mpi_mamn_op,info)
if (info == 0) call mpi_op_create(psi_eamx_op,.true.,mpi_eamx_op,info)
@ -997,6 +1001,8 @@ contains
& call mpi_comm_Free(ctxt%ctxt,info)
end if
if (close_) then
if (info == 0) call mpi_op_free(mpi_i2amx_op,info)
if (info == 0) call mpi_op_free(mpi_i2amn_op,info)
if (info == 0) call mpi_op_free(mpi_mamx_op,info)
if (info == 0) call mpi_op_free(mpi_mamn_op,info)
if (info == 0) call mpi_op_free(mpi_eamx_op,info)
@ -1188,6 +1194,26 @@ contains
! Note: len & type are always default integer.
!
! !!!!!!!!!!!!!!!!!!!!!!
subroutine psi_i2amx_op(inv, outv,len,type)
integer(psb_i2pk_) :: inv(len), outv(len)
integer(psb_mpk_) :: len,type
integer(psb_mpk_) :: i
do i=1, len
if (abs(inv(i)) > abs(outv(i))) outv(i) = inv(i)
end do
end subroutine psi_i2amx_op
subroutine psi_i2amn_op(inv, outv,len,type)
integer(psb_i2pk_) :: inv(len), outv(len)
integer(psb_mpk_) :: len,type
integer(psb_mpk_) :: i
do i=1, len
if (abs(inv(i)) < abs(outv(i))) outv(i) = inv(i)
end do
end subroutine psi_i2amn_op
subroutine psi_mamx_op(inv, outv,len,type)
integer(psb_mpk_) :: inv(len), outv(len)
integer(psb_mpk_) :: len,type

@ -600,7 +600,8 @@ subroutine psb_c_coo_clean_zeros(a, info)
nzin = a%get_nzeros()
j = 0
do i=1, nzin
if (a%val(i) /= czero) then
! Always keep the diagonal, even if numerically zero
if ((a%val(i) /= czero).or.(a%ia(i) == a%ja(i))) then
j = j + 1
a%val(j) = a%val(i)
a%ia(j) = a%ia(i)
@ -5931,7 +5932,8 @@ subroutine psb_lc_coo_clean_zeros(a, info)
nzin = a%get_nzeros()
j = 0
do i=1, nzin
if (a%val(i) /= czero) then
! Always keep the diagonal, even if numerically zero
if ((a%val(i) /= czero).or.(a%ia(i) == a%ja(i))) then
j = j + 1
a%val(j) = a%val(i)
a%ia(j) = a%ia(i)

@ -2421,7 +2421,8 @@ subroutine psb_c_csc_clean_zeros(a, info)
j = a%icp(1)
do i=1, nc
do k = ilcp(i), ilcp(i+1) -1
if (a%val(k) /= czero) then
! Always keep the diagonal, even if numerically zero
if ((a%val(k) /= czero).or.(i == a%ia(k))) then
a%val(j) = a%val(k)
a%ia(j) = a%ia(k)
j = j + 1
@ -4322,7 +4323,8 @@ subroutine psb_lc_csc_clean_zeros(a, info)
j = a%icp(1)
do i=1, nc
do k = ilcp(i), ilcp(i+1) -1
if (a%val(k) /= czero) then
! Always keep the diagonal, even if numerically zero
if ((a%val(k) /= czero).or.(i == a%ia(k))) then
a%val(j) = a%val(k)
a%ia(j) = a%ia(k)
j = j + 1

@ -3642,7 +3642,8 @@ subroutine psb_c_csr_clean_zeros(a, info)
j = a%irp(1)
do i=1, nr
do k = ilrp(i), ilrp(i+1) -1
if (a%val(k) /= czero) then
! Always keep the diagonal, even if numerically zero
if ((a%val(k) /= czero).or.(i == a%ja(k))) then
a%val(j) = a%val(k)
a%ja(j) = a%ja(k)
j = j + 1
@ -6561,7 +6562,8 @@ subroutine psb_lc_csr_clean_zeros(a, info)
j = a%irp(1)
do i=1, nr
do k = ilrp(i), ilrp(i+1) -1
if (a%val(k) /= czero) then
! Always keep the diagonal, even if numerically zero
if ((a%val(k) /= czero).or.(i == a%ja(k))) then
a%val(j) = a%val(k)
a%ja(j) = a%ja(k)
j = j + 1

@ -600,7 +600,8 @@ subroutine psb_d_coo_clean_zeros(a, info)
nzin = a%get_nzeros()
j = 0
do i=1, nzin
if (a%val(i) /= dzero) then
! Always keep the diagonal, even if numerically zero
if ((a%val(i) /= dzero).or.(a%ia(i) == a%ja(i))) then
j = j + 1
a%val(j) = a%val(i)
a%ia(j) = a%ia(i)
@ -5931,7 +5932,8 @@ subroutine psb_ld_coo_clean_zeros(a, info)
nzin = a%get_nzeros()
j = 0
do i=1, nzin
if (a%val(i) /= dzero) then
! Always keep the diagonal, even if numerically zero
if ((a%val(i) /= dzero).or.(a%ia(i) == a%ja(i))) then
j = j + 1
a%val(j) = a%val(i)
a%ia(j) = a%ia(i)

@ -2421,7 +2421,8 @@ subroutine psb_d_csc_clean_zeros(a, info)
j = a%icp(1)
do i=1, nc
do k = ilcp(i), ilcp(i+1) -1
if (a%val(k) /= dzero) then
! Always keep the diagonal, even if numerically zero
if ((a%val(k) /= dzero).or.(i == a%ia(k))) then
a%val(j) = a%val(k)
a%ia(j) = a%ia(k)
j = j + 1
@ -4322,7 +4323,8 @@ subroutine psb_ld_csc_clean_zeros(a, info)
j = a%icp(1)
do i=1, nc
do k = ilcp(i), ilcp(i+1) -1
if (a%val(k) /= dzero) then
! Always keep the diagonal, even if numerically zero
if ((a%val(k) /= dzero).or.(i == a%ia(k))) then
a%val(j) = a%val(k)
a%ia(j) = a%ia(k)
j = j + 1

@ -3642,7 +3642,8 @@ subroutine psb_d_csr_clean_zeros(a, info)
j = a%irp(1)
do i=1, nr
do k = ilrp(i), ilrp(i+1) -1
if (a%val(k) /= dzero) then
! Always keep the diagonal, even if numerically zero
if ((a%val(k) /= dzero).or.(i == a%ja(k))) then
a%val(j) = a%val(k)
a%ja(j) = a%ja(k)
j = j + 1
@ -6561,7 +6562,8 @@ subroutine psb_ld_csr_clean_zeros(a, info)
j = a%irp(1)
do i=1, nr
do k = ilrp(i), ilrp(i+1) -1
if (a%val(k) /= dzero) then
! Always keep the diagonal, even if numerically zero
if ((a%val(k) /= dzero).or.(i == a%ja(k))) then
a%val(j) = a%val(k)
a%ja(j) = a%ja(k)
j = j + 1

@ -600,7 +600,8 @@ subroutine psb_s_coo_clean_zeros(a, info)
nzin = a%get_nzeros()
j = 0
do i=1, nzin
if (a%val(i) /= szero) then
! Always keep the diagonal, even if numerically zero
if ((a%val(i) /= szero).or.(a%ia(i) == a%ja(i))) then
j = j + 1
a%val(j) = a%val(i)
a%ia(j) = a%ia(i)
@ -5931,7 +5932,8 @@ subroutine psb_ls_coo_clean_zeros(a, info)
nzin = a%get_nzeros()
j = 0
do i=1, nzin
if (a%val(i) /= szero) then
! Always keep the diagonal, even if numerically zero
if ((a%val(i) /= szero).or.(a%ia(i) == a%ja(i))) then
j = j + 1
a%val(j) = a%val(i)
a%ia(j) = a%ia(i)

@ -2421,7 +2421,8 @@ subroutine psb_s_csc_clean_zeros(a, info)
j = a%icp(1)
do i=1, nc
do k = ilcp(i), ilcp(i+1) -1
if (a%val(k) /= szero) then
! Always keep the diagonal, even if numerically zero
if ((a%val(k) /= szero).or.(i == a%ia(k))) then
a%val(j) = a%val(k)
a%ia(j) = a%ia(k)
j = j + 1
@ -4322,7 +4323,8 @@ subroutine psb_ls_csc_clean_zeros(a, info)
j = a%icp(1)
do i=1, nc
do k = ilcp(i), ilcp(i+1) -1
if (a%val(k) /= szero) then
! Always keep the diagonal, even if numerically zero
if ((a%val(k) /= szero).or.(i == a%ia(k))) then
a%val(j) = a%val(k)
a%ia(j) = a%ia(k)
j = j + 1

@ -3642,7 +3642,8 @@ subroutine psb_s_csr_clean_zeros(a, info)
j = a%irp(1)
do i=1, nr
do k = ilrp(i), ilrp(i+1) -1
if (a%val(k) /= szero) then
! Always keep the diagonal, even if numerically zero
if ((a%val(k) /= szero).or.(i == a%ja(k))) then
a%val(j) = a%val(k)
a%ja(j) = a%ja(k)
j = j + 1
@ -6561,7 +6562,8 @@ subroutine psb_ls_csr_clean_zeros(a, info)
j = a%irp(1)
do i=1, nr
do k = ilrp(i), ilrp(i+1) -1
if (a%val(k) /= szero) then
! Always keep the diagonal, even if numerically zero
if ((a%val(k) /= szero).or.(i == a%ja(k))) then
a%val(j) = a%val(k)
a%ja(j) = a%ja(k)
j = j + 1

@ -600,7 +600,8 @@ subroutine psb_z_coo_clean_zeros(a, info)
nzin = a%get_nzeros()
j = 0
do i=1, nzin
if (a%val(i) /= zzero) then
! Always keep the diagonal, even if numerically zero
if ((a%val(i) /= zzero).or.(a%ia(i) == a%ja(i))) then
j = j + 1
a%val(j) = a%val(i)
a%ia(j) = a%ia(i)
@ -5931,7 +5932,8 @@ subroutine psb_lz_coo_clean_zeros(a, info)
nzin = a%get_nzeros()
j = 0
do i=1, nzin
if (a%val(i) /= zzero) then
! Always keep the diagonal, even if numerically zero
if ((a%val(i) /= zzero).or.(a%ia(i) == a%ja(i))) then
j = j + 1
a%val(j) = a%val(i)
a%ia(j) = a%ia(i)

@ -2421,7 +2421,8 @@ subroutine psb_z_csc_clean_zeros(a, info)
j = a%icp(1)
do i=1, nc
do k = ilcp(i), ilcp(i+1) -1
if (a%val(k) /= zzero) then
! Always keep the diagonal, even if numerically zero
if ((a%val(k) /= zzero).or.(i == a%ia(k))) then
a%val(j) = a%val(k)
a%ia(j) = a%ia(k)
j = j + 1
@ -4322,7 +4323,8 @@ subroutine psb_lz_csc_clean_zeros(a, info)
j = a%icp(1)
do i=1, nc
do k = ilcp(i), ilcp(i+1) -1
if (a%val(k) /= zzero) then
! Always keep the diagonal, even if numerically zero
if ((a%val(k) /= zzero).or.(i == a%ia(k))) then
a%val(j) = a%val(k)
a%ia(j) = a%ia(k)
j = j + 1

@ -3642,7 +3642,8 @@ subroutine psb_z_csr_clean_zeros(a, info)
j = a%irp(1)
do i=1, nr
do k = ilrp(i), ilrp(i+1) -1
if (a%val(k) /= zzero) then
! Always keep the diagonal, even if numerically zero
if ((a%val(k) /= zzero).or.(i == a%ja(k))) then
a%val(j) = a%val(k)
a%ja(j) = a%ja(k)
j = j + 1
@ -6561,7 +6562,8 @@ subroutine psb_lz_csr_clean_zeros(a, info)
j = a%irp(1)
do i=1, nr
do k = ilrp(i), ilrp(i+1) -1
if (a%val(k) /= zzero) then
! Always keep the diagonal, even if numerically zero
if ((a%val(k) /= zzero).or.(i == a%ja(k))) then
a%val(j) = a%val(k)
a%ja(j) = a%ja(k)
j = j + 1

@ -29,7 +29,7 @@
! POSSIBILITY OF SUCH DAMAGE.
!
!
! File: psb_scsprt.f90
! File: psb_geprt.f90
! Subroutine:
! Arguments:
@ -131,6 +131,7 @@ subroutine psb_cgeprt2(iout,a,head)
ncol = size(a,2)
write(iout,*) nrow,ncol
write(frmtv,'(a,i3.3,a)') '(',ncol,'2(es26.18,1x))'
do i=1,nrow
@ -161,7 +162,7 @@ subroutine psb_cgeprt1(iout,a,head)
write(iout,'(a)') '% '
nrow = size(a,1)
ncol = 1
write(iout,*) nrow
write(iout,*) nrow,ncol
write(frmtv,'(a,i3.3,a)') '(',ncol,'2(es26.18,1x))'

@ -29,7 +29,7 @@
! POSSIBILITY OF SUCH DAMAGE.
!
!
! File: psb_dcsprt.f90
! File: psb_geprt.f90
! Subroutine:
! Arguments:
@ -124,13 +124,14 @@ subroutine psb_dgeprt2(iout,a,head)
character(len=80) :: frmtv
integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nrow, ncol
write(iout,'(a)') '%%MatrixMarket matrix array real general'
write(iout,'(a)') '%%MatrixMarket matrix array complex general'
write(iout,'(a)') '% '//trim(head)
write(iout,'(a)') '% '
nrow = size(a,1)
ncol = size(a,2)
write(iout,*) nrow,ncol
write(frmtv,'(a,i3.3,a)') '(',ncol,'(es26.18,1x))'
do i=1,nrow
@ -156,12 +157,12 @@ subroutine psb_dgeprt1(iout,a,head)
character(len=80) :: frmtv
integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nrow, ncol
write(iout,'(a)') '%%MatrixMarket matrix array real general'
write(iout,'(a)') '%%MatrixMarket matrix array complex general'
write(iout,'(a)') '% '//trim(head)
write(iout,'(a)') '% '
nrow = size(a,1)
ncol = 1
write(iout,*) nrow
write(iout,*) nrow,ncol
write(frmtv,'(a,i3.3,a)') '(',ncol,'(es26.18,1x))'

@ -29,7 +29,7 @@
! POSSIBILITY OF SUCH DAMAGE.
!
!
! File: psb_scsprt.f90
! File: psb_geprt.f90
! Subroutine:
! Arguments:
@ -124,13 +124,14 @@ subroutine psb_sgeprt2(iout,a,head)
character(len=80) :: frmtv
integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nrow, ncol
write(iout,'(a)') '%%MatrixMarket matrix array real general'
write(iout,'(a)') '%%MatrixMarket matrix array complex general'
write(iout,'(a)') '% '//trim(head)
write(iout,'(a)') '% '
nrow = size(a,1)
ncol = size(a,2)
write(iout,*) nrow,ncol
write(frmtv,'(a,i3.3,a)') '(',ncol,'(es26.18,1x))'
do i=1,nrow
@ -156,12 +157,12 @@ subroutine psb_sgeprt1(iout,a,head)
character(len=80) :: frmtv
integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nrow, ncol
write(iout,'(a)') '%%MatrixMarket matrix array real general'
write(iout,'(a)') '%%MatrixMarket matrix array complex general'
write(iout,'(a)') '% '//trim(head)
write(iout,'(a)') '% '
nrow = size(a,1)
ncol = 1
write(iout,*) nrow
write(iout,*) nrow,ncol
write(frmtv,'(a,i3.3,a)') '(',ncol,'(es26.18,1x))'

@ -29,7 +29,7 @@
! POSSIBILITY OF SUCH DAMAGE.
!
!
! File: psb_scsprt.f90
! File: psb_geprt.f90
! Subroutine:
! Arguments:
@ -131,6 +131,7 @@ subroutine psb_zgeprt2(iout,a,head)
ncol = size(a,2)
write(iout,*) nrow,ncol
write(frmtv,'(a,i3.3,a)') '(',ncol,'2(es26.18,1x))'
do i=1,nrow
@ -161,7 +162,7 @@ subroutine psb_zgeprt1(iout,a,head)
write(iout,'(a)') '% '
nrow = size(a,1)
ncol = 1
write(iout,*) nrow
write(iout,*) nrow,ncol
write(frmtv,'(a,i3.3,a)') '(',ncol,'2(es26.18,1x))'

@ -8,7 +8,7 @@ contains
& ah,ph,bh,xh,cdh,options) bind(c) result(res)
use psb_base_mod
use psb_prec_mod
use psb_krylov_mod
use psb_linsolve_mod
use psb_objhandle_mod
use psb_prec_cbind_mod
use psb_base_string_cbind_mod
@ -33,7 +33,7 @@ contains
& ah,ph,bh,xh,eps,cdh,itmax,iter,err,itrace,irst,istop) bind(c) result(res)
use psb_base_mod
use psb_prec_mod
use psb_krylov_mod
use psb_linsolve_mod
use psb_objhandle_mod
use psb_prec_cbind_mod
use psb_base_string_cbind_mod

@ -8,7 +8,7 @@ contains
& ah,ph,bh,xh,cdh,options) bind(c) result(res)
use psb_base_mod
use psb_prec_mod
use psb_krylov_mod
use psb_linsolve_mod
use psb_objhandle_mod
use psb_prec_cbind_mod
use psb_base_string_cbind_mod
@ -33,7 +33,7 @@ contains
& ah,ph,bh,xh,eps,cdh,itmax,iter,err,itrace,irst,istop) bind(c) result(res)
use psb_base_mod
use psb_prec_mod
use psb_krylov_mod
use psb_linsolve_mod
use psb_objhandle_mod
use psb_prec_cbind_mod
use psb_base_string_cbind_mod

@ -8,7 +8,7 @@ contains
& ah,ph,bh,xh,cdh,options) bind(c) result(res)
use psb_base_mod
use psb_prec_mod
use psb_krylov_mod
use psb_linsolve_mod
use psb_objhandle_mod
use psb_prec_cbind_mod
use psb_base_string_cbind_mod
@ -33,7 +33,7 @@ contains
& ah,ph,bh,xh,eps,cdh,itmax,iter,err,itrace,irst,istop) bind(c) result(res)
use psb_base_mod
use psb_prec_mod
use psb_krylov_mod
use psb_linsolve_mod
use psb_objhandle_mod
use psb_prec_cbind_mod
use psb_base_string_cbind_mod

@ -8,7 +8,7 @@ contains
& ah,ph,bh,xh,cdh,options) bind(c) result(res)
use psb_base_mod
use psb_prec_mod
use psb_krylov_mod
use psb_linsolve_mod
use psb_objhandle_mod
use psb_prec_cbind_mod
use psb_base_string_cbind_mod
@ -33,7 +33,7 @@ contains
& ah,ph,bh,xh,eps,cdh,itmax,iter,err,itrace,irst,istop) bind(c) result(res)
use psb_base_mod
use psb_prec_mod
use psb_krylov_mod
use psb_linsolve_mod
use psb_objhandle_mod
use psb_prec_cbind_mod
use psb_base_string_cbind_mod

@ -9,7 +9,7 @@ FINCLUDES=$(FMFLAG). $(FMFLAG)$(HERE) $(FMFLAG)$(MODDIR)
CINCLUDES=-I. -I$(HERE) -I$(INCLUDEDIR)
PSBC_LIBS= -L$(LIBDIR) -lpsb_cbind
PSB_LIBS=-lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base -L$(LIBDIR)
PSB_LIBS=-lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base -L$(LIBDIR)
#
# Compilers and such

@ -0,0 +1,104 @@
# AC_OPENACC
# ---------
# Check which options need to be passed to the C compiler to support Openacc.
# Set the OPENACC_CFLAGS / OPENACC_CXXFLAGS / OPENACC_FFLAGS variable to these
# options.
# The options are necessary at compile time (so the #pragmas are understood)
# and at link time (so the appropriate library is linked with).
# This macro takes care to not produce redundant options if $CC $CFLAGS already
# supports Openacc.
#
# For each candidate option, we do a compile test first, then a link test;
# if the compile test succeeds but the link test fails, that means we have
# found the correct option but it doesn't work because the libraries are
# broken. (This can happen, for instance, with SunPRO C and a bad combination
# of operating system patches.)
#
# Several of the options in our candidate list can be misinterpreted by
# compilers that don't use them to activate Openacc support; for example,
# many compilers understand "-openacc" to mean "write output to a file
# named 'penmp'" rather than "enable Openacc". We can't completely avoid
# the possibility of clobbering files named 'penmp' or 'mp' in configure's
# working directory; therefore, this macro will bomb out if any such file
# already exists when it's invoked.
AC_DEFUN([AX_C_OPENACC],
[AC_REQUIRE([_AX_OPENACC_SAFE_WD])]dnl
[AC_ARG_ENABLE([openacc],
[AS_HELP_STRING([--disable-openacc], [do not use Openacc])])]dnl
[
OPENACC_[]_AC_LANG_PREFIX[]FLAGS=
if test "$enable_openacc" != no; then
AC_LANG_PUSH([C])
AC_CACHE_CHECK([for $[]_AC_CC[] option to support Openacc],
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc],
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='not found'
dnl Try these flags:
dnl (on by default) ''
dnl GCC >= 4.2 -fopenacc
dnl SunPRO C -xopenacc
dnl Intel C -openacc
dnl SGI C, PGI C -mp
dnl Tru64 Compaq C -omp
dnl IBM XL C (AIX, Linux) -qsmp=omp
dnl Cray CCE -homp
dnl NEC SX -Popenacc
dnl Lahey Fortran (Linux) --openacc
for ac_option in '' -fopenacc -openacc -acc; do
ac_save_[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS
_AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $ac_option"
AC_COMPILE_IFELSE([
#ifndef _OPENACC
#error "OpenACC not supported"
#endif
#include <openacc.h>
int main (void) { acc_init (0); return 0;}
],
[AC_LINK_IFELSE([
#ifndef _OPENACC
#error "OpenACC not supported"
#endif
#include <openacc.h>
int main (void) { acc_init (0); return 0;}
],
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc=$ac_option],
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'])])
_AC_LANG_PREFIX[]FLAGS=$ac_save_[]_AC_LANG_PREFIX[]FLAGS
if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'not found'; then
break
fi
done
if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = 'not found'; then
ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'
elif test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = ''; then
ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='none needed'
fi
dnl _AX_OPENACC_SAFE_WD checked that these files did not exist before we
dnl started probing for Openacc support, so if they exist now, they were
dnl created by the probe loop and it's safe to delete them.
rm -f penmp mp])
if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'unsupported' && \
test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'none needed'; then
OPENACC_[]_AC_LANG_PREFIX[]FLAGS="$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc"
fi
AC_LANG_POP([C])
fi
])
# _AC_OPENACC_SAFE_WD
# ------------------
# AC_REQUIREd by AC_OPENACC. Checks both at autoconf time and at
# configure time for files that AC_OPENACC clobbers.
AC_DEFUN([_AX_OPENACC_SAFE_WD],
[m4_syscmd([test ! -e penmp && test ! -e mp])]dnl
[m4_if(sysval, [0], [], [m4_fatal(m4_normalize(
[AX_OPENACC clobbers files named 'mp' and 'penmp'.
To use AX_OPENACC you must not have either of these files
at the top level of your source tree.]))])]dnl
[if test -e penmp || test -e mp; then
AC_MSG_ERROR(m4_normalize(
[AX@&t@_OPENACC clobbers files named 'mp' and 'penmp'.
Aborting configure because one of these files already exists.]))
fi])

@ -0,0 +1,104 @@
# AC_OPENACC
# ---------
# Check which options need to be passed to the C compiler to support Openacc.
# Set the OPENACC_CFLAGS / OPENACC_CXXFLAGS / OPENACC_FFLAGS variable to these
# options.
# The options are necessary at compile time (so the #pragmas are understood)
# and at link time (so the appropriate library is linked with).
# This macro takes care to not produce redundant options if $CC $CFLAGS already
# supports Openacc.
#
# For each candidate option, we do a compile test first, then a link test;
# if the compile test succeeds but the link test fails, that means we have
# found the correct option but it doesn't work because the libraries are
# broken. (This can happen, for instance, with SunPRO C and a bad combination
# of operating system patches.)
#
# Several of the options in our candidate list can be misinterpreted by
# compilers that don't use them to activate Openacc support; for example,
# many compilers understand "-openacc" to mean "write output to a file
# named 'penmp'" rather than "enable Openacc". We can't completely avoid
# the possibility of clobbering files named 'penmp' or 'mp' in configure's
# working directory; therefore, this macro will bomb out if any such file
# already exists when it's invoked.
AC_DEFUN([AX_CXX_OPENACC],
[AC_REQUIRE([_AX_OPENACC_SAFE_WD])]dnl
[AC_ARG_ENABLE([openacc],
[AS_HELP_STRING([--disable-openacc], [do not use Openacc])])]dnl
[
OPENACC_[]_AC_LANG_PREFIX[]FLAGS=
if test "$enable_openacc" != no; then
AC_LANG_PUSH([C++])
AC_CACHE_CHECK([for $[]_AC_CC[] option to support Openacc],
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc],
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='not found'
dnl Try these flags:
dnl (on by default) ''
dnl GCC >= 4.2 -fopenacc
dnl SunPRO C -xopenacc
dnl Intel C -openacc
dnl SGI C, PGI C -mp
dnl Tru64 Compaq C -omp
dnl IBM XL C (AIX, Linux) -qsmp=omp
dnl Cray CCE -homp
dnl NEC SX -Popenacc
dnl Lahey Fortran (Linux) --openacc
for ac_option in '' -fopenacc -openacc -acc; do
ac_save_[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS
_AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $ac_option"
AC_COMPILE_IFELSE([
#ifndef _OPENACC
#error "OpenACC not supported"
#endif
#include <openacc.h>
int main (void) { acc_init (acc_get_device_type()); return 0;}
],
[AC_LINK_IFELSE([
#ifndef _OPENACC
#error "OpenACC not supported"
#endif
#include <openacc.h>
int main (void) { acc_init (acc_get_device_type()); return 0;}
],
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc=$ac_option],
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'])])
_AC_LANG_PREFIX[]FLAGS=$ac_save_[]_AC_LANG_PREFIX[]FLAGS
if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'not found'; then
break
fi
done
if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = 'not found'; then
ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'
elif test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = ''; then
ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='none needed'
fi
dnl _AX_OPENACC_SAFE_WD checked that these files did not exist before we
dnl started probing for Openacc support, so if they exist now, they were
dnl created by the probe loop and it's safe to delete them.
rm -f penmp mp])
if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'unsupported' && \
test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'none needed'; then
OPENACC_[]_AC_LANG_PREFIX[]FLAGS="$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc"
fi
AC_LANG_POP([C++])
fi
])
dnl _AC_OPENACC_SAFE_WD
dnl ------------------
dnl AC_REQUIREd by AC_OPENACC. Checks both at autoconf time and at
dnl configure time for files that AC_OPENACC clobbers.
dnl AC_DEFUN([_AX_OPENACC_SAFE_WD],
dnl [m4_syscmd([test ! -e penmp && test ! -e mp])]dnl
dnl [m4_if(sysval, [0], [], [m4_fatal(m4_normalize(
dnl [AX_OPENACC clobbers files named 'mp' and 'penmp'.
dnl To use AX_OPENACC you must not have either of these files
dnl at the top level of your source tree.]))])]dnl
dnl [if test -e penmp || test -e mp; then
dnl AC_MSG_ERROR(m4_normalize(
dnl [AX@&t@_OPENACC clobbers files named 'mp' and 'penmp'.
dnl Aborting configure because one of these files already exists.]))
dnl fi])

@ -0,0 +1,108 @@
# AC_OPENACC
# ---------
# Check which options need to be passed to the C compiler to support Openacc.
# Set the OPENACC_CFLAGS / OPENACC_CXXFLAGS / OPENACC_FFLAGS variable to these
# options.
# The options are necessary at compile time (so the #pragmas are understood)
# and at link time (so the appropriate library is linked with).
# This macro takes care to not produce redundant options if $CC $CFLAGS already
# supports Openacc.
#
# For each candidate option, we do a compile test first, then a link test;
# if the compile test succeeds but the link test fails, that means we have
# found the correct option but it doesn't work because the libraries are
# broken. (This can happen, for instance, with SunPRO C and a bad combination
# of operating system patches.)
#
# Several of the options in our candidate list can be misinterpreted by
# compilers that don't use them to activate Openacc support; for example,
# many compilers understand "-openacc" to mean "write output to a file
# named 'penmp'" rather than "enable Openacc". We can't completely avoid
# the possibility of clobbering files named 'penmp' or 'mp' in configure's
# working directory; therefore, this macro will bomb out if any such file
# already exists when it's invoked.
AC_DEFUN([AX_FC_OPENACC],
[AC_REQUIRE([_AX_OPENACC_SAFE_WD])]dnl
[AC_ARG_ENABLE([openacc],
[AS_HELP_STRING([--disable-openacc], [do not use Openacc])])]dnl
[
OPENACC_[]_AC_LANG_PREFIX[]FLAGS=
if test "$enable_openacc" != no; then
AC_LANG_PUSH([Fortran])
AC_CACHE_CHECK([for $[]_AC_CC[] option to support Openacc],
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc],
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='not found'
dnl Try these flags:
dnl (on by default) ''
dnl GCC >= 4.2 -fopenacc
dnl SunPRO C -xopenacc
dnl Intel C -openacc
dnl SGI C, PGI C -mp
dnl Tru64 Compaq C -omp
dnl IBM XL C (AIX, Linux) -qsmp=omp
dnl Cray CCE -homp
dnl NEC SX -Popenacc
dnl Lahey Fortran (Linux) --openacc
for ac_option in '' -fopenacc -openacc -acc; do
ac_save_[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS
_AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $ac_option"
AC_COMPILE_IFELSE([
program main
use openacc
implicit none
integer tid, np
tid = 42
call acc_init(0)
end
],
[AC_LINK_IFELSE([
program main
use openacc
implicit none
integer tid, np
tid = 42
call acc_init(0)
end
],
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc=$ac_option],
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'])])
_AC_LANG_PREFIX[]FLAGS=$ac_save_[]_AC_LANG_PREFIX[]FLAGS
if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'unsupported'; then
break
fi
done
if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = 'not found'; then
ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'
elif test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = ''; then
ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='none needed'
fi
dnl _AX_OPENACC_SAFE_WD checked that these files did not exist before we
dnl started probing for Openacc support, so if they exist now, they were
dnl created by the probe loop and it's safe to delete them.
rm -f penmp mp])
if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'unsupported' && \
test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'none needed'; then
OPENACC_[]_AC_LANG_PREFIX[]FLAGS="$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc"
fi
AC_LANG_POP([Fortran])
fi
])
# _AC_OPENACC_SAFE_WD
# ------------------
# AC_REQUIREd by AC_OPENACC. Checks both at autoconf time and at
# configure time for files that AC_OPENACC clobbers.
AC_DEFUN([_AX_OPENACC_SAFE_WD],
[m4_syscmd([test ! -e penmp && test ! -e mp])]dnl
[m4_if(sysval, [0], [], [m4_fatal(m4_normalize(
[AX_OPENACC clobbers files named 'mp' and 'penmp'.
To use AX_OPENACC you must not have either of these files
at the top level of your source tree.]))])]dnl
[if test -e penmp || test -e mp; then
AC_MSG_ERROR(m4_normalize(
[AX@&t@_OPENACC clobbers files named 'mp' and 'penmp'.
Aborting configure because one of these files already exists.]))
fi])

@ -2157,6 +2157,31 @@ CPPFLAGS="$SAVE_CPPFLAGS"
dnl @synopsis PAC_ARG_CUDA
dnl
dnl Test for --enable-cuda
dnl
dnl
dnl
dnl Example use:
dnl
dnl
dnl @author Salvatore Filippone <salvatore.filippone@uniroma2.it>
dnl
AC_DEFUN([PAC_ARG_CUDA],
[AC_MSG_CHECKING([whether we want cuda ])
AC_ARG_ENABLE(cuda,
AS_HELP_STRING([--enable-cuda],
[Specify whether to enable cuda. ]),
[
pac_cv_cuda="yes";
]
dnl ,
dnl [pac_cv_cuda="no";]
)
]
)
dnl @synopsis PAC_CHECK_CUDA
dnl
@ -2173,7 +2198,7 @@ dnl
dnl @author Salvatore Filippone <salvatore.filippone@uniroma2.it>
dnl
AC_DEFUN(PAC_CHECK_CUDA,
[AC_ARG_WITH(cuda, AC_HELP_STRING([--with-cuda=DIR], [Specify the CUDA install directory.]),
[AC_ARG_WITH(cudadir, AC_HELP_STRING([--with-cudadir=DIR], [Specify the CUDA install directory.]),
[pac_cv_cuda_dir=$withval],
[pac_cv_cuda_dir=''])
@ -2234,56 +2259,6 @@ AC_HELP_STRING([--with-cudacc], [A comma-separated list of CCs to compile to, fo
[pac_cv_cudacc=''])
])
AC_DEFUN(PAC_ARG_WITH_LIBRSB,
[SAVE_LIBS="$LIBS"
SAVE_CPPFLAGS="$CPPFLAGS"
AC_ARG_WITH(librsb,
AC_HELP_STRING([--with-librsb], [The directory for LIBRSB, for example,
--with-librsb=/opt/packages/librsb]),
[pac_cv_librsb_dir=$withval],
[pac_cv_librsb_dir=''])
if test "x$pac_cv_librsb_dir" != "x"; then
LIBS="-L$pac_cv_librsb_dir $LIBS"
RSB_INCLUDES="-I$pac_cv_librsb_dir"
# CPPFLAGS="$GPU_INCLUDES $CUDA_INCLUDES $CPPFLAGS"
RSB_LIBDIR="-L$pac_cv_librsb_dir"
fi
#AC_MSG_CHECKING([librsb dir $pac_cv_librsb_dir])
AC_CHECK_HEADER([$pac_cv_librsb_dir/rsb.h],
[pac_rsb_header_ok=yes],
[pac_rsb_header_ok=no; RSB_INCLUDES=""])
if test "x$pac_rsb_header_ok" == "xyes" ; then
RSB_LIBS="-lrsb $RSB_LIBDIR"
# LIBS="$GPU_LIBS $CUDA_LIBS -lm $LIBS";
# AC_MSG_CHECKING([for spgpuCreate in $GPU_LIBS])
# AC_TRY_LINK_FUNC(spgpuCreate,
# [pac_cv_have_spgpu=yes;pac_gpu_lib_ok=yes; ],
# [pac_cv_have_spgpu=no;pac_gpu_lib_ok=no; GPU_LIBS=""])
# AC_MSG_RESULT($pac_gpu_lib_ok)
# if test "x$pac_cv_have_spgpu" == "xyes" ; then
# AC_MSG_NOTICE([Have found SPGPU])
RSBLIBNAME="librsb.a";
LIBRSB_DIR="$pac_cv_librsb_dir";
# SPGPU_DEFINES="-DHAVE_SPGPU";
LIBRSB_INCDIR="$LIBRSB_DIR";
LIBRSB_INCLUDES="-I$LIBRSB_INCDIR";
LIBRSB_LIBS="-lrsb -L$LIBRSB_DIR";
# CUDA_DIR="$pac_cv_cuda_dir";
LIBRSB_DEFINES="-DHAVE_RSB";
LRSB=-lpsb_rsb
# CUDA_INCLUDES="-I$pac_cv_cuda_dir/include"
# CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib"
FDEFINES="$LIBRSB_DEFINES $psblas_cv_define_prepend $FDEFINES";
CDEFINES="$LIBRSB_DEFINES $CDEFINES";#CDEFINES="-DHAVE_SPGPU -DHAVE_CUDA $CDEFINES";
fi
# fi
LIBS="$SAVE_LIBS"
CPPFLAGS="$SAVE_CPPFLAGS"
])
dnl
dnl @synopsis PAC_CHECK_CUDA_VERSION
dnl
@ -2336,3 +2311,47 @@ CPPFLAGS="$SAVE_CPPFLAGS"
])dnl
dnl @synopsis PAC_ARG_OPENACC
dnl
dnl Test for --enable-openacc
dnl
dnl
dnl
dnl Example use:
dnl
dnl
dnl @author Salvatore Filippone <salvatore.filippone@uniroma2.it>
dnl
AC_DEFUN([PAC_ARG_OPENACC],
[AC_MSG_CHECKING([whether we want openacc ])
AC_ARG_ENABLE(openacc,
AS_HELP_STRING([--enable-openacc],
[Specify whether to enable openacc. ]),
[
pac_cv_openacc="yes";
]
dnl ,
dnl [pac_cv_openacc="no";]
)
if test x"$pac_cv_openacc" == x"yes" ; then
AC_MSG_RESULT([yes.])
# AC_LANG_PUSH([Fortran])
# AC_OPENACC()
# pac_cv_openacc_fcopt="$OPENACC_FCFLAGS";
# AC_LANG_POP()
# AC_LANG_PUSH([C])
# AC_OPENACC()
# pac_cv_openacc_ccopt="$OPENACC_CFLAGS";
# AC_LANG_POP()
# AC_LANG_PUSH([C++])
# AC_OPENACC()
# pac_cv_openacc_cxxopt="$OPENACC_CXXFLAGS";
# AC_LANG_POP()
else
pac_cv_openacc="no";
AC_MSG_RESULT([no.])
fi
]
)

530
configure vendored

@ -653,6 +653,12 @@ ac_subst_vars='am__EXEEXT_FALSE
am__EXEEXT_TRUE
LTLIBOBJS
LIBOBJS
LRSB
LIBRSB_DEFINES
LIBRSB_DIR
LIBRSB_INCDIR
LIBRSB_INCLUDES
LIBRSB_LIBS
LCUDA
CUDALD
CUDAD
@ -662,11 +668,19 @@ CUDA_SHORT_VERSION
CUDA_VERSION
CUDA_LIBS
CUDA_INCLUDES
CXXCUDEFINES
CCUDEFINES
FCUDEFINES
CUDA_DEFINES
CUDA_DIR
EXTRALDLIBS
SPGPU_LIBS
SPGPU_FLAGS
CXXOPENACC
CCOPENACC
FCOPENACC
OACCLD
OACCD
METISINCFILE
UTILLIBNAME
METHDLIBNAME
@ -838,8 +852,14 @@ with_amd
with_amddir
with_amdincdir
with_amdlibdir
with_cuda
enable_cuda
with_cudadir
with_cudacc
enable_openacc
with_extraopenacc
with_ccopenacc
with_cxxopenacc
with_fcopenacc
'
ac_precious_vars='build_alias
host_alias
@ -1490,6 +1510,9 @@ Optional Features:
--disable-silent-rules verbose build output (undo: "make V=0")
--enable-openmp Specify whether to enable openmp.
--disable-openmp do not use OpenMP
--enable-cuda Specify whether to enable cuda.
--enable-openacc Specify whether to enable openacc.
--disable-openacc do not use Openacc
Optional Packages:
--with-PACKAGE[=ARG] use PACKAGE [ARG=yes]
@ -1532,9 +1555,17 @@ Optional Packages:
--with-amddir=DIR Specify the directory for AMD library and includes.
--with-amdincdir=DIR Specify the directory for AMD includes.
--with-amdlibdir=DIR Specify the directory for AMD library.
--with-cuda=DIR Specify the CUDA install directory.
--with-cudadir=DIR Specify the CUDA install directory.
--with-cudacc A comma-separated list of CCs to compile to, for
example, --with-cudacc=50,60,70,75
--with-extraopenacc additional [EXTRAOPENACC] flags to be added: will
prepend to [EXTRAOPENACC]
--with-ccopenacc additional [CCOPENACC] flags to be added: will
prepend to [CCOPENACC]
--with-cxxopenacc additional [CXXOPENACC] flags to be added: will
prepend to [CXXOPENACC]
--with-fcopenacc additional [FCOPENACC] flags to be added: will
prepend to [FCOPENACC]
Some influential environment variables:
FC Fortran compiler command
@ -5825,7 +5856,7 @@ fi
# we just gave the user the chance to append values to these variables
###############################################################################
if test -n "$ac_tool_prefix"; then
@ -7392,7 +7423,7 @@ fi
##############################################################################
BASEMODNAME=psb_base_mod
PRECMODNAME=psb_prec_mod
METHDMODNAME=psb_krylov_mod
METHDMODNAME=psb_linsolve_mod
UTILMODNAME=psb_util_mod
if test "X$psblas_cv_fc" == X"cray"
@ -7402,7 +7433,7 @@ then
FIFLAG="-I"
BASEMODNAME=PSB_BASE_MOD
PRECMODNAME=PSB_PREC_MOD
METHDMODNAME=PSB_KRYLOV_MOD
METHDMODNAME=PSB_LINSOLVE_MOD
UTILMODNAME=PSB_UTIL_MOD
else
@ -10602,12 +10633,26 @@ if test "x$psblas_cv_have_amd" == "xyes" ; then
fi
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether we want cuda " >&5
printf %s "checking whether we want cuda ... " >&6; }
# Check whether --enable-cuda was given.
if test ${enable_cuda+y}
then :
enableval=$enable_cuda;
pac_cv_cuda="yes";
fi
if test "x$pac_cv_cuda" == "xyes"; then
# Check whether --with-cuda was given.
if test ${with_cuda+y}
# Check whether --with-cudadir was given.
if test ${with_cudadir+y}
then :
withval=$with_cuda; pac_cv_cuda_dir=$withval
withval=$with_cudadir; pac_cv_cuda_dir=$withval
else $as_nop
pac_cv_cuda_dir=''
fi
@ -10693,10 +10738,10 @@ SAVE_LIBS="$LIBS"
SAVE_CPPFLAGS="$CPPFLAGS"
if test "x$pac_cv_have_cuda" == "x"; then
# Check whether --with-cuda was given.
if test ${with_cuda+y}
# Check whether --with-cudadir was given.
if test ${with_cudadir+y}
then :
withval=$with_cuda; pac_cv_cuda_dir=$withval
withval=$with_cudadir; pac_cv_cuda_dir=$withval
else $as_nop
pac_cv_cuda_dir=''
fi
@ -10814,9 +10859,10 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
LIBS="$SAVE_LIBS"
CPPFLAGS="$SAVE_CPPFLAGS"
HAVE_CUDA="yes";
CUDA_VERSION="$pac_cv_cuda_version";
CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000);
HAVE_CUDA="yes";
CUDA_DEFINES="-DHAVE_CUDA -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}";
SPGPU_LIBS="-lspgpu";
CUDAD=cudad;
CUDALD=cudald;
@ -10837,7 +10883,7 @@ fi
if test "x$pac_cv_cudacc" == "x"; then
pac_cv_cudacc="50,60,70,75";
pac_cv_cudacc="50,60,70,75,80,86";
CUDA_CC="$pac_cv_cudacc";
fi
if (( $pac_cv_cuda_version >= 11070 ))
@ -10849,24 +10895,432 @@ fi
CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc";
done
if test "x$pac_cv_cuda_version" != "xunknown"; then
CUDEFINES="$CUDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}"
FDEFINES="$FDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}"
CDEFINES="$CDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}"
CUDEFINES="$CUDEFINES ${CUDA_DEFINES}"
FCUDEFINES=" ${CUDA_DEFINES}"
CCUDEFINES=" ${CUDA_DEFINES}"
CXXCUDEFINES=" ${CUDA_DEFINES}"
fi
fi
if test "x$pac_cv_ipk_size" != "x4"; then
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size, disabling CUDA/SPGPU" >&5
printf "%s\n" "$as_me: For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size, disabling CUDA/SPGPU" >&6;}
pac_cv_cuda="no";
HAVE_CUDA="no";
CUDA_CC="";
SPGPU_LIBS="";
CUDAD="";
CUDALD="";
CUDEFINES="";
CUDA_DEFINES="";
CUDA_INCLUDES="";
CUDA_LIBS="";
FCUDEFINES="";
CCUDEFINES="";
CXXCUDEFINES="";
fi
fi
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether we want openacc " >&5
printf %s "checking whether we want openacc ... " >&6; }
# Check whether --enable-openacc was given.
if test ${enable_openacc+y}
then :
enableval=$enable_openacc;
pac_cv_openacc="yes";
fi
if test x"$pac_cv_openacc" == x"yes" ; then
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes." >&5
printf "%s\n" "yes." >&6; }
# AC_LANG_PUSH([Fortran])
# AC_OPENACC()
# pac_cv_openacc_fcopt="$OPENACC_FCFLAGS";
# AC_LANG_POP()
# AC_LANG_PUSH([C])
# AC_OPENACC()
# pac_cv_openacc_ccopt="$OPENACC_CFLAGS";
# AC_LANG_POP()
# AC_LANG_PUSH([C++])
# AC_OPENACC()
# pac_cv_openacc_cxxopt="$OPENACC_CXXFLAGS";
# AC_LANG_POP()
else
pac_cv_openacc="no";
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no." >&5
printf "%s\n" "no." >&6; }
fi
if test x"$pac_cv_openacc" == x"yes" ; then
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether additional EXTRAOPENACC flags should be added (should be invoked only once)" >&5
printf %s "checking whether additional EXTRAOPENACC flags should be added (should be invoked only once)... " >&6; }
# Check whether --with-extraopenacc was given.
if test ${with_extraopenacc+y}
then :
withval=$with_extraopenacc;
EXTRAOPENACC="${withval} ${EXTRAOPENACC}"
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: EXTRAOPENACC = ${EXTRAOPENACC}" >&5
printf "%s\n" "EXTRAOPENACC = ${EXTRAOPENACC}" >&6; }
else $as_nop
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
printf "%s\n" "no" >&6; }
fi
if test -e penmp || test -e mp; then
as_fn_error $? "AX_OPENACC clobbers files named 'mp' and 'penmp'. Aborting configure because one of these files already exists." "$LINENO" 5
fi
# Check whether --enable-openacc was given.
if test ${enable_openacc+y}
then :
enableval=$enable_openacc;
fi
OPENACC_CFLAGS=
if test "$enable_openacc" != no; then
ac_ext=c
ac_cpp='$CPP $CPPFLAGS'
ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_c_compiler_gnu
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to support Openacc" >&5
printf %s "checking for $CC option to support Openacc... " >&6; }
if test ${ax_cv_prog_c_openacc+y}
then :
printf %s "(cached) " >&6
else $as_nop
ax_cv_prog_c_openacc='not found'
for ac_option in '' -fopenacc -openacc -acc; do
ac_save_CFLAGS=$CFLAGS
CFLAGS="$CFLAGS $ac_option"
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#ifndef _OPENACC
#error "OpenACC not supported"
#endif
#include <openacc.h>
int main (void) { acc_init (0); return 0;}
_ACEOF
if ac_fn_c_try_compile "$LINENO"
then :
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#ifndef _OPENACC
#error "OpenACC not supported"
#endif
#include <openacc.h>
int main (void) { acc_init (0); return 0;}
_ACEOF
if ac_fn_c_try_link "$LINENO"
then :
ax_cv_prog_c_openacc=$ac_option
else $as_nop
ax_cv_prog_c_openacc='unsupported'
fi
rm -f core conftest.err conftest.$ac_objext conftest.beam \
conftest$ac_exeext conftest.$ac_ext
fi
rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
CFLAGS=$ac_save_CFLAGS
if test "$ax_cv_prog_c_openacc" != 'not found'; then
break
fi
done
if test "$ax_cv_prog_c_openacc" = 'not found'; then
ac_cv_prog_c_openacc='unsupported'
elif test "$ax_cv_prog_c_openacc" = ''; then
ac_cv_prog_c_openacc='none needed'
fi
rm -f penmp mp
fi
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_prog_c_openacc" >&5
printf "%s\n" "$ax_cv_prog_c_openacc" >&6; }
if test "$ax_cv_prog_c_openacc" != 'unsupported' && \
test "$ax_cv_prog_c_openacc" != 'none needed'; then
OPENACC_CFLAGS="$ax_cv_prog_c_openacc"
fi
ac_ext=c
ac_cpp='$CPP $CPPFLAGS'
ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_c_compiler_gnu
fi
CCOPENACC="$ax_cv_prog_c_openacc";
# Check whether --enable-openacc was given.
if test ${enable_openacc+y}
then :
enableval=$enable_openacc;
fi
OPENACC_CFLAGS=
if test "$enable_openacc" != no; then
ac_ext=cpp
ac_cpp='$CXXCPP $CPPFLAGS'
ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CXX option to support Openacc" >&5
printf %s "checking for $CXX option to support Openacc... " >&6; }
if test ${ax_cv_prog_cxx_openacc+y}
then :
printf %s "(cached) " >&6
else $as_nop
ax_cv_prog_cxx_openacc='not found'
for ac_option in '' -fopenacc -openacc -acc; do
ac_save_CXXFLAGS=$CXXFLAGS
CXXFLAGS="$CXXFLAGS $ac_option"
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#ifndef _OPENACC
#error "OpenACC not supported"
#endif
#include <openacc.h>
int main (void) { acc_init (acc_get_device_type()); return 0;}
_ACEOF
if ac_fn_cxx_try_compile "$LINENO"
then :
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#ifndef _OPENACC
#error "OpenACC not supported"
#endif
#include <openacc.h>
int main (void) { acc_init (acc_get_device_type()); return 0;}
_ACEOF
if ac_fn_cxx_try_link "$LINENO"
then :
ax_cv_prog_cxx_openacc=$ac_option
else $as_nop
ax_cv_prog_cxx_openacc='unsupported'
fi
rm -f core conftest.err conftest.$ac_objext conftest.beam \
conftest$ac_exeext conftest.$ac_ext
fi
rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
CXXFLAGS=$ac_save_CXXFLAGS
if test "$ax_cv_prog_cxx_openacc" != 'not found'; then
break
fi
done
if test "$ax_cv_prog_cxx_openacc" = 'not found'; then
ac_cv_prog_cxx_openacc='unsupported'
elif test "$ax_cv_prog_cxx_openacc" = ''; then
ac_cv_prog_cxx_openacc='none needed'
fi
rm -f penmp mp
fi
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_prog_cxx_openacc" >&5
printf "%s\n" "$ax_cv_prog_cxx_openacc" >&6; }
if test "$ax_cv_prog_cxx_openacc" != 'unsupported' && \
test "$ax_cv_prog_cxx_openacc" != 'none needed'; then
OPENACC_CXXFLAGS="$ax_cv_prog_cxx_openacc"
fi
ac_ext=c
ac_cpp='$CPP $CPPFLAGS'
ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_c_compiler_gnu
fi
CXXOPENACC="$ax_cv_prog_cxx_openacc";
# Check whether --enable-openacc was given.
if test ${enable_openacc+y}
then :
enableval=$enable_openacc;
fi
OPENACC_CFLAGS=
if test "$enable_openacc" != no; then
ac_ext=${ac_fc_srcext-f}
ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5'
ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_fc_compiler_gnu
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $FC option to support Openacc" >&5
printf %s "checking for $FC option to support Openacc... " >&6; }
if test ${ax_cv_prog_fc_openacc+y}
then :
printf %s "(cached) " >&6
else $as_nop
ax_cv_prog_fc_openacc='not found'
for ac_option in '' -fopenacc -openacc -acc; do
ac_save_FCFLAGS=$FCFLAGS
FCFLAGS="$FCFLAGS $ac_option"
cat > conftest.$ac_ext <<_ACEOF
program main
use openacc
implicit none
integer tid, np
tid = 42
call acc_init(0)
end
_ACEOF
if ac_fn_fc_try_compile "$LINENO"
then :
cat > conftest.$ac_ext <<_ACEOF
program main
use openacc
implicit none
integer tid, np
tid = 42
call acc_init(0)
end
_ACEOF
if ac_fn_fc_try_link "$LINENO"
then :
ax_cv_prog_fc_openacc=$ac_option
else $as_nop
ax_cv_prog_fc_openacc='unsupported'
fi
rm -f core conftest.err conftest.$ac_objext conftest.beam \
conftest$ac_exeext conftest.$ac_ext
fi
rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
FCFLAGS=$ac_save_FCFLAGS
if test "$ax_cv_prog_fc_openacc" != 'unsupported'; then
break
fi
done
if test "$ax_cv_prog_fc_openacc" = 'not found'; then
ac_cv_prog_fc_openacc='unsupported'
elif test "$ax_cv_prog_fc_openacc" = ''; then
ac_cv_prog_fc_openacc='none needed'
fi
rm -f penmp mp
fi
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_prog_fc_openacc" >&5
printf "%s\n" "$ax_cv_prog_fc_openacc" >&6; }
if test "$ax_cv_prog_fc_openacc" != 'unsupported' && \
test "$ax_cv_prog_fc_openacc" != 'none needed'; then
OPENACC_FCFLAGS="$ax_cv_prog_fc_openacc"
fi
ac_ext=c
ac_cpp='$CPP $CPPFLAGS'
ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_c_compiler_gnu
fi
FCOPENACC="$ax_cv_prog_fc_openacc";
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether additional CCOPENACC flags should be added (should be invoked only once)" >&5
printf %s "checking whether additional CCOPENACC flags should be added (should be invoked only once)... " >&6; }
# Check whether --with-ccopenacc was given.
if test ${with_ccopenacc+y}
then :
withval=$with_ccopenacc;
CCOPENACC="${withval} ${CCOPENACC}"
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: CCOPENACC = ${CCOPENACC}" >&5
printf "%s\n" "CCOPENACC = ${CCOPENACC}" >&6; }
else $as_nop
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
printf "%s\n" "no" >&6; }
fi
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether additional CXXOPENACC flags should be added (should be invoked only once)" >&5
printf %s "checking whether additional CXXOPENACC flags should be added (should be invoked only once)... " >&6; }
# Check whether --with-cxxopenacc was given.
if test ${with_cxxopenacc+y}
then :
withval=$with_cxxopenacc;
CXXOPENACC="${withval} ${CXXOPENACC}"
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: CXXOPENACC = ${CXXOPENACC}" >&5
printf "%s\n" "CXXOPENACC = ${CXXOPENACC}" >&6; }
else $as_nop
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
printf "%s\n" "no" >&6; }
fi
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether additional FCOPENACC flags should be added (should be invoked only once)" >&5
printf %s "checking whether additional FCOPENACC flags should be added (should be invoked only once)... " >&6; }
# Check whether --with-fcopenacc was given.
if test ${with_fcopenacc+y}
then :
withval=$with_fcopenacc;
FCOPENACC="${withval} ${FCOPENACC}"
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: FCOPENACC = ${FCOPENACC}" >&5
printf "%s\n" "FCOPENACC = ${FCOPENACC}" >&6; }
else $as_nop
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
printf "%s\n" "no" >&6; }
fi
CCOPENACC="$CCOPENACC $EXTRAOPENACC";
CXXOPENACC="$CXXOPENACC $EXTRAOPENACC";
FCOPENACC="$FCOPENACC $EXTRAOPENACC";
OACCD=oaccd;
OACCLD=oaccld;
#FCOPT="$FCOPT $FCOPENACC";
#CCOPT="$CCOPT $CCOPENACC"
#CXXOPT="$CXXOPT $CXXOPENACC"
#FLINK="$FLINK $FCOPENACC";
fi
###############################################################################
LIBRSB_DIR="$pac_cv_librsb_dir";
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for LIBRSB install dir" >&5
printf %s "checking for LIBRSB install dir... " >&6; }
case $LIBRSB_DIR in
/*) ;;
*) esac
pac_cv_status_file="$LIBRSB_DIR/librsb.a"
if test ! -f "$pac_cv_status_file" ; then
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
printf "%s\n" "no" >&6; }
#AC_MSG_ERROR([Could not find an installation in $LIBRSB_DIR.])
else
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $LIBRSB_DIR" >&5
printf "%s\n" "$LIBRSB_DIR" >&6; }
RSBTARGETLIB=rsbd;
RSBTARGETOBJ=rsbobj;
fi
@ -10878,7 +11332,7 @@ fi
LIBDIR=lib
BASELIBNAME=libpsb_base.a
PRECLIBNAME=libpsb_prec.a
METHDLIBNAME=libpsb_krylov.a
METHDLIBNAME=libpsb_linsolve.a
UTILLIBNAME=libpsb_util.a
###############################################################################
@ -10928,9 +11382,9 @@ UTILLIBNAME=libpsb_util.a
PSBLASRULES='
PSBLDLIBS=$(LAPACK) $(BLAS) $(METIS_LIB) $(AMD_LIB) $(LIBS)
CXXDEFINES=$(PSBCXXDEFINES) $(CUDA_DEFINES)
CDEFINES=$(PSBCDEFINES) $(CUDA_DEFINES)
FDEFINES=$(PSBFDEFINES) $(CUDA_DEFINES)
CXXDEFINES=$(PSBCXXDEFINES)
CDEFINES=$(PSBCDEFINES)
FDEFINES=$(PSBFDEFINES)
# These should be portable rules, arent they?
@ -10961,6 +11415,22 @@ FDEFINES=$(PSBFDEFINES) $(CUDA_DEFINES)
@ -12299,6 +12769,11 @@ fi
CUDA : ${HAVE_CUDA}
CUDA_CC : ${pac_cv_cudacc}
OPENACC : ${pac_cv_openacc}
FCOPENACC : ${FCOPENACC}
OACCD : ${OACCD}
OACCLD : ${OACCLD}
BLAS : ${BLAS_LIBS}
METIS usable : ${psblas_cv_have_metis}
@ -12332,6 +12807,11 @@ printf "%s\n" "$as_me:
CUDA : ${HAVE_CUDA}
CUDA_CC : ${pac_cv_cudacc}
OPENACC : ${pac_cv_openacc}
FCOPENACC : ${FCOPENACC}
OACCD : ${OACCD}
OACCLD : ${OACCLD}
BLAS : ${BLAS_LIBS}
METIS usable : ${psblas_cv_have_metis}
@ -12348,6 +12828,16 @@ printf "%s\n" "$as_me:
If you are satisfied, run 'make' to build ${PACKAGE_NAME} and its documentation; otherwise
type ./configure --help=short for a complete list of configure options specific to ${PACKAGE_NAME}.
" >&6;}
if test x"${pac_cv_openacc}" == x"yes" ; then
if test x"${FCOPENACC}" == x ; then
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}:
WARNING: OpenACC enabled, but no choice for FCOPENACC compile flag.
You may want to rerun configure with --with-fcopenacc= " >&5
printf "%s\n" "$as_me:
WARNING: OpenACC enabled, but no choice for FCOPENACC compile flag.
You may want to rerun configure with --with-fcopenacc= " >&6;}
fi
fi
###############################################################################

@ -483,7 +483,7 @@ fi
##############################################################################
BASEMODNAME=psb_base_mod
PRECMODNAME=psb_prec_mod
METHDMODNAME=psb_krylov_mod
METHDMODNAME=psb_linsolve_mod
UTILMODNAME=psb_util_mod
if test "X$psblas_cv_fc" == X"cray"
@ -493,7 +493,7 @@ then
FIFLAG="-I"
BASEMODNAME=PSB_BASE_MOD
PRECMODNAME=PSB_PREC_MOD
METHDMODNAME=PSB_KRYLOV_MOD
METHDMODNAME=PSB_LINSOLVE_MOD
UTILMODNAME=PSB_UTIL_MOD
else
@ -795,16 +795,19 @@ if test "x$psblas_cv_have_amd" == "xyes" ; then
fi
PAC_ARG_CUDA()
if test "x$pac_cv_cuda" == "xyes"; then
PAC_CHECK_CUDA()
if test "x$pac_cv_have_cuda" == "xyes"; then
PAC_CHECK_CUDA_VERSION()
CUDA_VERSION="$pac_cv_cuda_version";
CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000);
dnl PAC_CHECK_SPGPU()
HAVE_CUDA="yes";
CUDA_VERSION="$pac_cv_cuda_version";
CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000);
CUDA_DEFINES="-DHAVE_CUDA -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}";
SPGPU_LIBS="-lspgpu";
CUDAD=cudad;
CUDALD=cudald;
@ -814,7 +817,7 @@ if test "x$pac_cv_have_cuda" == "xyes"; then
PAC_ARG_WITH_CUDACC()
if test "x$pac_cv_cudacc" == "x"; then
pac_cv_cudacc="50,60,70,75";
pac_cv_cudacc="50,60,70,75,80,86";
CUDA_CC="$pac_cv_cudacc";
fi
if (( $pac_cv_cuda_version >= 11070 ))
@ -826,27 +829,72 @@ if test "x$pac_cv_have_cuda" == "xyes"; then
CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc";
done
if test "x$pac_cv_cuda_version" != "xunknown"; then
CUDEFINES="$CUDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}"
FDEFINES="$FDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}"
CDEFINES="$CDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}"
CUDEFINES="$CUDEFINES ${CUDA_DEFINES}"
FCUDEFINES=" ${CUDA_DEFINES}"
CCUDEFINES=" ${CUDA_DEFINES}"
CXXCUDEFINES=" ${CUDA_DEFINES}"
fi
fi
if test "x$pac_cv_ipk_size" != "x4"; then
AC_MSG_NOTICE([For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size, disabling CUDA/SPGPU])
pac_cv_cuda="no";
HAVE_CUDA="no";
CUDA_CC="";
SPGPU_LIBS="";
CUDAD="";
CUDALD="";
CUDEFINES="";
CUDA_DEFINES="";
CUDA_INCLUDES="";
CUDA_LIBS="";
FCUDEFINES="";
CCUDEFINES="";
CXXCUDEFINES="";
fi
fi
PAC_ARG_OPENACC()
dnl AC_ARG_ENABLE([openacc],
dnl [AS_HELP_STRING([--disable-openacc], [do not use Openacc])])
if test x"$pac_cv_openacc" == x"yes" ; then
PAC_ARG_WITH_FLAGS(extraopenacc,EXTRAOPENACC)
dnl if test false; then
AX_C_OPENACC()
CCOPENACC="$ax_cv_prog_c_openacc";
AX_CXX_OPENACC()
CXXOPENACC="$ax_cv_prog_cxx_openacc";
AX_FC_OPENACC()
FCOPENACC="$ax_cv_prog_fc_openacc";
dnl AX_OPENACC()
dnl
dnl CXXOPENACC="$ax_cv_prog_cxx_openacc";
dnl FCOPENACC="$ax_cv_prog_fc_openacc";
dnl else
dnl AC_MSG_NOTICE([OpenACC 1 flags CC $CCOPENACC CXX $CXXOPENACC FC $FCOPENACC])
PAC_ARG_WITH_FLAGS(ccopenacc,CCOPENACC)
PAC_ARG_WITH_FLAGS(cxxopenacc,CXXOPENACC)
PAC_ARG_WITH_FLAGS(fcopenacc,FCOPENACC)
dnl AC_MSG_NOTICE([OpenACC 2 flags CC $CCOPENACC CXX $CXXOPENACC FC $FCOPENACC])
dnl CCOPENACC="$ax_cv_prog_c_openacc";
dnl CXXOPENACC="$ax_cv_prog_cxx_openacc";
dnl FCOPENACC="$ax_cv_prog_fc_openacc";
dnl fi
CCOPENACC="$CCOPENACC $EXTRAOPENACC";
CXXOPENACC="$CXXOPENACC $EXTRAOPENACC";
FCOPENACC="$FCOPENACC $EXTRAOPENACC";
dnl AC_MSG_NOTICE([OpenACC 3 flags CC $CCOPENACC CXX $CXXOPENACC FC $FCOPENACC])
OACCD=oaccd;
OACCLD=oaccld;
#FCOPT="$FCOPT $FCOPENACC";
#CCOPT="$CCOPT $CCOPENACC"
#CXXOPT="$CXXOPT $CXXOPENACC"
#FLINK="$FLINK $FCOPENACC";
fi
###############################################################################
PAC_ARG_WITH_LIBRSB
dnl PAC_ARG_WITH_LIBRSB()
LIBRSB_DIR="$pac_cv_librsb_dir";
AC_MSG_CHECKING([for LIBRSB install dir])
case $LIBRSB_DIR in
@ -878,7 +926,7 @@ fi
LIBDIR=lib
BASELIBNAME=libpsb_base.a
PRECLIBNAME=libpsb_prec.a
METHDLIBNAME=libpsb_krylov.a
METHDLIBNAME=libpsb_linsolve.a
UTILLIBNAME=libpsb_util.a
###############################################################################
@ -928,9 +976,9 @@ AC_SUBST(FINCLUDES)
PSBLASRULES='
PSBLDLIBS=$(LAPACK) $(BLAS) $(METIS_LIB) $(AMD_LIB) $(LIBS)
CXXDEFINES=$(PSBCXXDEFINES) $(CUDA_DEFINES)
CDEFINES=$(PSBCDEFINES) $(CUDA_DEFINES)
FDEFINES=$(PSBFDEFINES) $(CUDA_DEFINES)
CXXDEFINES=$(PSBCXXDEFINES)
CDEFINES=$(PSBCDEFINES)
FDEFINES=$(PSBFDEFINES)
# These should be portable rules, arent they?
@ -952,6 +1000,12 @@ AC_SUBST(PRECLIBNAME)
AC_SUBST(METHDLIBNAME)
AC_SUBST(UTILLIBNAME)
AC_SUBST(METISINCFILE)
AC_SUBST(OACCD)
AC_SUBST(OACCLD)
AC_SUBST(FCOPENACC)
AC_SUBST(CCOPENACC)
AC_SUBST(CXXOPENACC)
AC_SUBST(SPGPU_FLAGS)
AC_SUBST(SPGPU_LIBS)
dnl AC_SUBST(SPGPU_DIR)
@ -960,6 +1014,9 @@ dnl AC_SUBST(SPGPU_INCDIR)
AC_SUBST(EXTRALDLIBS)
AC_SUBST(CUDA_DIR)
AC_SUBST(CUDA_DEFINES)
AC_SUBST(FCUDEFINES)
AC_SUBST(CCUDEFINES)
AC_SUBST(CXXCUDEFINES)
AC_SUBST(CUDA_INCLUDES)
AC_SUBST(CUDA_LIBS)
AC_SUBST(CUDA_VERSION)
@ -976,7 +1033,6 @@ AC_SUBST(LIBRSB_DIR)
AC_SUBST(LIBRSB_DEFINES)
AC_SUBST(LRSB)
###############################################################################
# the following files will be created by Automake
@ -1004,6 +1060,11 @@ AC_MSG_NOTICE([
CUDA : ${HAVE_CUDA}
CUDA_CC : ${pac_cv_cudacc}
OPENACC : ${pac_cv_openacc}
FCOPENACC : ${FCOPENACC}
OACCD : ${OACCD}
OACCLD : ${OACCLD}
BLAS : ${BLAS_LIBS}
METIS usable : ${psblas_cv_have_metis}
@ -1023,6 +1084,13 @@ dnl Note : we should use LDLIBS sooner or later!
dnl To install the program and its documentation, run 'make install' if you are root,
dnl or run 'su -c "make install"' if you are not root.
])
if test x"${pac_cv_openacc}" == x"yes" ; then
if test x"${FCOPENACC}" == x ; then
AC_MSG_NOTICE([
WARNING: OpenACC enabled, but no choice for FCOPENACC compile flag.
You may want to rerun configure with --with-fcopenacc= ])
fi
fi
###############################################################################

@ -141,3 +141,12 @@ spgpuclean:
$(MAKE) -C spgpu clean
veryclean: clean
.c.o:
$(CC) $(CCOPT) $(CCUDEFINES) $(CINCLUDES) $(CDEFINES) -c $< -o $@
.f90.o:
$(FC) $(FCOPT) $(FCUDEFINES) $(FINCLUDES) -c $< -o $@
.F90.o:
$(FC) $(FCOPT) $(FCUDEFINES) $(FINCLUDES) $(FDEFINES) -c $< -o $@
.cpp.o:
$(CXX) $(CXXOPT) $(CXXCUDEFINES) $(CXXINCLUDES) $(CXXDEFINES) -c $< -o $@

@ -184,7 +184,7 @@ int iscatMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n,
}
int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA)
int nrm2MultiVecDeviceFloatComplex(float* y_res, int n, void* devMultiVecA)
{ int i=0;
spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
@ -194,7 +194,7 @@ int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiV
return(i);
}
int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA)
int amaxMultiVecDeviceFloatComplex(float* y_res, int n, void* devMultiVecA)
{ int i=0;
spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
@ -204,7 +204,7 @@ int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiV
return(i);
}
int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA)
int asumMultiVecDeviceFloatComplex(float* y_res, int n, void* devMultiVecA)
{ int i=0;
spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;

@ -37,6 +37,7 @@
#include "vectordev.h"
#include "cuda_runtime.h"
#include "core.h"
#include "vector.h"
int registerMappedFloatComplex(void *, void **, int, cuFloatComplex);
int writeMultiVecDeviceFloatComplex(void* deviceMultiVec, cuFloatComplex* hostMultiVec);
@ -63,9 +64,9 @@ int iscatMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n, int fi
int hfirst, void* host_values, int indexBase, cuFloatComplex beta);
int scalMultiVecDeviceFloatComplex(cuFloatComplex alpha, void* devMultiVecA);
int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA);
int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA);
int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA);
int nrm2MultiVecDeviceFloatComplex(float* y_res, int n, void* devVecA);
int amaxMultiVecDeviceFloatComplex(float* y_res, int n, void* devVecA);
int asumMultiVecDeviceFloatComplex(float* y_res, int n, void* devVecA);
int dotMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA, void* devVecB);
int axpbyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void* devVecX, cuFloatComplex beta, void* devVecY);

@ -178,12 +178,12 @@ int spmvDnsDeviceFloatComplex(char transa, int m, int n, int k, float complex *a
/* Note: the M,N,K choices according to TRANS have already been handled in the caller */
if (n == 1) {
status = cublasCgemv(handle, trans, m,k,
alpha, devMat->cM,devMat->pitch, x->v_,1,
beta, y->v_,1);
(const cuComplex *) alpha, devMat->cM,devMat->pitch, x->v_,1,
(const cuComplex *) beta, y->v_,1);
} else {
status = cublasCgemm(handle, trans, CUBLAS_OP_N, m,n,k,
alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_,
beta, y->v_,y->pitch_);
(const cuComplex *) alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_,
(const cuComplex *) beta, y->v_,y->pitch_);
}
if (status == CUBLAS_STATUS_SUCCESS)
@ -205,12 +205,12 @@ int spmvDnsDeviceDoubleComplex(char transa, int m, int n, int k, double complex
/* Note: the M,N,K choices according to TRANS have already been handled in the caller */
if (n == 1) {
status = cublasZgemv(handle, trans, m,k,
alpha, devMat->cM,devMat->pitch, x->v_,1,
beta, y->v_,1);
(const cuDoubleComplex *) alpha, devMat->cM,devMat->pitch, x->v_,1,
(const cuDoubleComplex *) beta, y->v_,1);
} else {
status = cublasZgemm(handle, trans, CUBLAS_OP_N, m,n,k,
alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_,
beta, y->v_,y->pitch_);
(const cuDoubleComplex *) alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_,
(const cuDoubleComplex *) beta, y->v_,y->pitch_);
}
if (status == CUBLAS_STATUS_SUCCESS)

@ -220,7 +220,8 @@ int dotMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA, void* devM
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB;
spgpuHandle_t handle=psb_cudaGetHandle();
spgpuDmdot(handle, y_res, n, (double*)devVecA->v_, (double*)devVecB->v_,devVecA->count_,devVecB->pitch_);
spgpuDmdot(handle, y_res, n, (double*)devVecA->v_, (double*)devVecB->v_,
devVecA->count_,devVecB->pitch_);
return(i);
}

@ -35,6 +35,7 @@
#include "vectordev.h"
#include "cuda_runtime.h"
#include "core.h"
#include "vector.h"
int registerMappedDouble(void *, void **, int, double);
int writeMultiVecDeviceDouble(void* deviceMultiVec, double* hostMultiVec);

@ -439,7 +439,7 @@ int T_CSRGDeviceSetMatFillMode(T_Cmat *Matrix, int type)
T_CSRGDeviceMat *cMat= Matrix->mat;
cusparseFillMode_t mode=type;
CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->spmvDescr,
CHECK_CUSPARSE(cusparseSpMatSetAttribute((*(cMat->spmvDescr)),
CUSPARSE_SPMAT_FILL_MODE,
(const void*) &mode,
sizeof(cusparseFillMode_t)));
@ -450,7 +450,7 @@ int T_CSRGDeviceSetMatDiagType(T_Cmat *Matrix, int type)
{
T_CSRGDeviceMat *cMat= Matrix->mat;
cusparseDiagType_t cutype=type;
CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->spmvDescr,
CHECK_CUSPARSE(cusparseSpMatSetAttribute((*(cMat->spmvDescr)),
CUSPARSE_SPMAT_DIAG_TYPE,
(const void*) &cutype,
sizeof(cusparseDiagType_t)));

@ -295,3 +295,12 @@ lib: objs
clean:
/bin/rm -f $(OBJS)
.c.o:
$(CC) $(CCOPT) $(CCUDEFINES) $(CINCLUDES) $(CDEFINES) -c $< -o $@
.f90.o:
$(FC) $(FCOPT) $(FCUDEFINES) $(FINCLUDES) -c $< -o $@
.F90.o:
$(FC) $(FCOPT) $(FCUDEFINES) $(FINCLUDES) $(FDEFINES) -c $< -o $@
.cpp.o:
$(CXX) $(CXXOPT) $(CXXCUDEFINES) $(CXXINCLUDES) $(CXXDEFINES) -c $< -o $@

@ -35,6 +35,7 @@
#include "vectordev.h"
#include "cuda_runtime.h"
#include "core.h"
#include "vector.h"
int registerMappedInt(void *, void **, int, int);
int writeMultiVecDeviceInt(void* deviceMultiVec, int* hostMultiVec);

@ -813,18 +813,6 @@ contains
call x%set_dev()
end subroutine c_cuda_set_scal
!!$
!!$ subroutine c_cuda_set_vect(x,val)
!!$ class(psb_c_vect_cuda), intent(inout) :: x
!!$ complex(psb_spk_), intent(in) :: val(:)
!!$ integer(psb_ipk_) :: nr
!!$ integer(psb_ipk_) :: info
!!$
!!$ if (x%is_dev()) call x%sync()
!!$ call x%psb_c_base_vect_type%set_vect(val)
!!$ call x%set_host()
!!$
!!$ end subroutine c_cuda_set_vect
@ -834,7 +822,6 @@ contains
class(psb_c_base_vect_type), intent(inout) :: y
integer(psb_ipk_), intent(in) :: n
complex(psb_spk_) :: res
complex(psb_spk_), external :: ddot
integer(psb_ipk_) :: info
res = czero
@ -844,9 +831,6 @@ contains
! TYPE psb_c_vect
!
select type(yy => y)
type is (psb_c_base_vect_type)
if (x%is_dev()) call x%sync()
res = ddot(n,x%v,1,yy%v,1)
type is (psb_c_vect_cuda)
if (x%is_host()) call x%sync()
if (yy%is_host()) call yy%sync()
@ -858,7 +842,7 @@ contains
class default
! y%sync is done in dot_a
call x%sync()
if (x%is_dev()) call x%sync()
res = y%dot(n,x%v)
end select
@ -870,10 +854,10 @@ contains
complex(psb_spk_), intent(in) :: y(:)
integer(psb_ipk_), intent(in) :: n
complex(psb_spk_) :: res
complex(psb_spk_), external :: ddot
complex(psb_spk_), external :: cdot
if (x%is_dev()) call x%sync()
res = ddot(n,y,1,x%v,1)
res = cdot(n,y,1,x%v,1)
end function c_cuda_dot_a
@ -1393,14 +1377,14 @@ module psb_c_cuda_multivect_mod
end type psb_c_multivect_cuda
public :: psb_c_multivect_cuda
private :: constructor
private :: mconstructor
interface psb_c_multivect_cuda
module procedure constructor
module procedure mconstructor
end interface
contains
function constructor(x) result(this)
function mconstructor(x) result(this)
complex(psb_spk_) :: x(:,:)
type(psb_c_multivect_cuda) :: this
integer(psb_ipk_) :: info
@ -1408,7 +1392,7 @@ contains
this%v = x
call this%asb(size(x,1),size(x,2),info)
end function constructor
end function mconstructor
!!$ subroutine c_cuda_multi_gthzv_x(i,n,idx,x,y)

@ -813,18 +813,6 @@ contains
call x%set_dev()
end subroutine d_cuda_set_scal
!!$
!!$ subroutine d_cuda_set_vect(x,val)
!!$ class(psb_d_vect_cuda), intent(inout) :: x
!!$ real(psb_dpk_), intent(in) :: val(:)
!!$ integer(psb_ipk_) :: nr
!!$ integer(psb_ipk_) :: info
!!$
!!$ if (x%is_dev()) call x%sync()
!!$ call x%psb_d_base_vect_type%set_vect(val)
!!$ call x%set_host()
!!$
!!$ end subroutine d_cuda_set_vect
@ -834,7 +822,6 @@ contains
class(psb_d_base_vect_type), intent(inout) :: y
integer(psb_ipk_), intent(in) :: n
real(psb_dpk_) :: res
real(psb_dpk_), external :: ddot
integer(psb_ipk_) :: info
res = dzero
@ -844,9 +831,6 @@ contains
! TYPE psb_d_vect
!
select type(yy => y)
type is (psb_d_base_vect_type)
if (x%is_dev()) call x%sync()
res = ddot(n,x%v,1,yy%v,1)
type is (psb_d_vect_cuda)
if (x%is_host()) call x%sync()
if (yy%is_host()) call yy%sync()
@ -858,7 +842,7 @@ contains
class default
! y%sync is done in dot_a
call x%sync()
if (x%is_dev()) call x%sync()
res = y%dot(n,x%v)
end select
@ -1393,14 +1377,14 @@ module psb_d_cuda_multivect_mod
end type psb_d_multivect_cuda
public :: psb_d_multivect_cuda
private :: constructor
private :: mconstructor
interface psb_d_multivect_cuda
module procedure constructor
module procedure mconstructor
end interface
contains
function constructor(x) result(this)
function mconstructor(x) result(this)
real(psb_dpk_) :: x(:,:)
type(psb_d_multivect_cuda) :: this
integer(psb_ipk_) :: info
@ -1408,7 +1392,7 @@ contains
this%v = x
call this%asb(size(x,1),size(x,2),info)
end function constructor
end function mconstructor
!!$ subroutine d_cuda_multi_gthzv_x(i,n,idx,x,y)

@ -795,18 +795,6 @@ contains
call x%set_dev()
end subroutine i_cuda_set_scal
!!$
!!$ subroutine i_cuda_set_vect(x,val)
!!$ class(psb_i_vect_cuda), intent(inout) :: x
!!$ integer(psb_ipk_), intent(in) :: val(:)
!!$ integer(psb_ipk_) :: nr
!!$ integer(psb_ipk_) :: info
!!$
!!$ if (x%is_dev()) call x%sync()
!!$ call x%psb_i_base_vect_type%set_vect(val)
!!$ call x%set_host()
!!$
!!$ end subroutine i_cuda_set_vect
@ -949,14 +937,14 @@ module psb_i_cuda_multivect_mod
end type psb_i_multivect_cuda
public :: psb_i_multivect_cuda
private :: constructor
private :: mconstructor
interface psb_i_multivect_cuda
module procedure constructor
module procedure mconstructor
end interface
contains
function constructor(x) result(this)
function mconstructor(x) result(this)
integer(psb_ipk_) :: x(:,:)
type(psb_i_multivect_cuda) :: this
integer(psb_ipk_) :: info
@ -964,7 +952,7 @@ contains
this%v = x
call this%asb(size(x,1),size(x,2),info)
end function constructor
end function mconstructor
!!$ subroutine i_cuda_multi_gthzv_x(i,n,idx,x,y)

@ -813,18 +813,6 @@ contains
call x%set_dev()
end subroutine s_cuda_set_scal
!!$
!!$ subroutine s_cuda_set_vect(x,val)
!!$ class(psb_s_vect_cuda), intent(inout) :: x
!!$ real(psb_spk_), intent(in) :: val(:)
!!$ integer(psb_ipk_) :: nr
!!$ integer(psb_ipk_) :: info
!!$
!!$ if (x%is_dev()) call x%sync()
!!$ call x%psb_s_base_vect_type%set_vect(val)
!!$ call x%set_host()
!!$
!!$ end subroutine s_cuda_set_vect
@ -834,7 +822,6 @@ contains
class(psb_s_base_vect_type), intent(inout) :: y
integer(psb_ipk_), intent(in) :: n
real(psb_spk_) :: res
real(psb_spk_), external :: ddot
integer(psb_ipk_) :: info
res = szero
@ -844,9 +831,6 @@ contains
! TYPE psb_s_vect
!
select type(yy => y)
type is (psb_s_base_vect_type)
if (x%is_dev()) call x%sync()
res = ddot(n,x%v,1,yy%v,1)
type is (psb_s_vect_cuda)
if (x%is_host()) call x%sync()
if (yy%is_host()) call yy%sync()
@ -858,7 +842,7 @@ contains
class default
! y%sync is done in dot_a
call x%sync()
if (x%is_dev()) call x%sync()
res = y%dot(n,x%v)
end select
@ -870,10 +854,10 @@ contains
real(psb_spk_), intent(in) :: y(:)
integer(psb_ipk_), intent(in) :: n
real(psb_spk_) :: res
real(psb_spk_), external :: ddot
real(psb_spk_), external :: sdot
if (x%is_dev()) call x%sync()
res = ddot(n,y,1,x%v,1)
res = sdot(n,y,1,x%v,1)
end function s_cuda_dot_a
@ -1393,14 +1377,14 @@ module psb_s_cuda_multivect_mod
end type psb_s_multivect_cuda
public :: psb_s_multivect_cuda
private :: constructor
private :: mconstructor
interface psb_s_multivect_cuda
module procedure constructor
module procedure mconstructor
end interface
contains
function constructor(x) result(this)
function mconstructor(x) result(this)
real(psb_spk_) :: x(:,:)
type(psb_s_multivect_cuda) :: this
integer(psb_ipk_) :: info
@ -1408,7 +1392,7 @@ contains
this%v = x
call this%asb(size(x,1),size(x,2),info)
end function constructor
end function mconstructor
!!$ subroutine s_cuda_multi_gthzv_x(i,n,idx,x,y)

@ -813,18 +813,6 @@ contains
call x%set_dev()
end subroutine z_cuda_set_scal
!!$
!!$ subroutine z_cuda_set_vect(x,val)
!!$ class(psb_z_vect_cuda), intent(inout) :: x
!!$ complex(psb_dpk_), intent(in) :: val(:)
!!$ integer(psb_ipk_) :: nr
!!$ integer(psb_ipk_) :: info
!!$
!!$ if (x%is_dev()) call x%sync()
!!$ call x%psb_z_base_vect_type%set_vect(val)
!!$ call x%set_host()
!!$
!!$ end subroutine z_cuda_set_vect
@ -834,7 +822,6 @@ contains
class(psb_z_base_vect_type), intent(inout) :: y
integer(psb_ipk_), intent(in) :: n
complex(psb_dpk_) :: res
complex(psb_dpk_), external :: ddot
integer(psb_ipk_) :: info
res = zzero
@ -844,9 +831,6 @@ contains
! TYPE psb_z_vect
!
select type(yy => y)
type is (psb_z_base_vect_type)
if (x%is_dev()) call x%sync()
res = ddot(n,x%v,1,yy%v,1)
type is (psb_z_vect_cuda)
if (x%is_host()) call x%sync()
if (yy%is_host()) call yy%sync()
@ -858,7 +842,7 @@ contains
class default
! y%sync is done in dot_a
call x%sync()
if (x%is_dev()) call x%sync()
res = y%dot(n,x%v)
end select
@ -870,10 +854,10 @@ contains
complex(psb_dpk_), intent(in) :: y(:)
integer(psb_ipk_), intent(in) :: n
complex(psb_dpk_) :: res
complex(psb_dpk_), external :: ddot
complex(psb_dpk_), external :: zdot
if (x%is_dev()) call x%sync()
res = ddot(n,y,1,x%v,1)
res = zdot(n,y,1,x%v,1)
end function z_cuda_dot_a
@ -1393,14 +1377,14 @@ module psb_z_cuda_multivect_mod
end type psb_z_multivect_cuda
public :: psb_z_multivect_cuda
private :: constructor
private :: mconstructor
interface psb_z_multivect_cuda
module procedure constructor
module procedure mconstructor
end interface
contains
function constructor(x) result(this)
function mconstructor(x) result(this)
complex(psb_dpk_) :: x(:,:)
type(psb_z_multivect_cuda) :: this
integer(psb_ipk_) :: info
@ -1408,7 +1392,7 @@ contains
this%v = x
call this%asb(size(x,1),size(x,2),info)
end function constructor
end function mconstructor
!!$ subroutine z_cuda_multi_gthzv_x(i,n,idx,x,y)

@ -96,7 +96,6 @@ __global__ void spgpuSdot_kern(int n, float* x, float* y)
{
#endif
#ifdef ASSUME_LOCK_SYNC_PARALLELISM
volatile float* vsSum = sSum;
vsSum[threadIdx.x] = res;

@ -220,7 +220,8 @@ int dotMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA, void* devMul
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB;
spgpuHandle_t handle=psb_cudaGetHandle();
spgpuSmdot(handle, y_res, n, (float*)devVecA->v_, (float*)devVecB->v_,devVecA->count_,devVecB->pitch_);
spgpuSmdot(handle, y_res, n, (float*)devVecA->v_, (float*)devVecB->v_,
devVecA->count_,devVecB->pitch_);
return(i);
}

@ -35,6 +35,7 @@
#include "vectordev.h"
#include "cuda_runtime.h"
#include "core.h"
#include "vector.h"
int registerMappedFloat(void *, void **, int, float);
int writeMultiVecDeviceFloat(void* deviceMultiVec, float* hostMultiVec);

@ -34,6 +34,7 @@
#include "cuda_runtime.h"
//#include "common.h"
//#include "cintrf.h"
#include "cuda_util.h"
#include <complex.h>
struct MultiVectDevice

@ -183,7 +183,7 @@ int iscatMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n,
}
int nrm2MultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA)
int nrm2MultiVecDeviceDoubleComplex(double* y_res, int n, void* devMultiVecA)
{ int i=0;
spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
@ -192,7 +192,7 @@ int nrm2MultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMult
return(i);
}
int amaxMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA)
int amaxMultiVecDeviceDoubleComplex(double* y_res, int n, void* devMultiVecA)
{ int i=0;
spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
@ -202,7 +202,7 @@ int amaxMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMult
return(i);
}
int asumMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA)
int asumMultiVecDeviceDoubleComplex(double* y_res, int n, void* devMultiVecA)
{ int i=0;
spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
@ -223,7 +223,8 @@ int scalMultiVecDeviceDoubleComplex(cuDoubleComplex alpha, void* devMultiVecA)
return(i);
}
int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA, void* devMultiVecB)
int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n,
void* devMultiVecA, void* devMultiVecB)
{int i=0;
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB;

@ -37,6 +37,7 @@
#include "vectordev.h"
#include "cuda_runtime.h"
#include "core.h"
#include "vector.h"
int registerMappedDoubleComplex(void *, void **, int, cuDoubleComplex);
int writeMultiVecDeviceDoubleComplex(void* deviceMultiVec, cuDoubleComplex* hostMultiVec);
@ -69,9 +70,9 @@ int iscatMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n,
int indexBase, cuDoubleComplex beta);
int scalMultiVecDeviceDoubleComplex(cuDoubleComplex alpha, void* devMultiVecA);
int nrm2MultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devVecA);
int amaxMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devVecA);
int asumMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devVecA);
int nrm2MultiVecDeviceDoubleComplex(double* y_res, int n, void* devVecA);
int amaxMultiVecDeviceDoubleComplex(double* y_res, int n, void* devVecA);
int asumMultiVecDeviceDoubleComplex(double* y_res, int n, void* devVecA);
int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n,
void* devVecA, void* devVecB);

@ -1,7 +1,7 @@
all: guide
guide:
cd src && $(MAKE)
cd src && $(MAKE) clean all
doxy:
doxygen doxypsb

@ -52,7 +52,7 @@ PROJECT_LOGO =
# If a relative path is entered, it will be relative to the location
# where doxygen was started. If left blank the current directory will be used.
OUTPUT_DIRECTORY = ../../psblas-3.4-doxygen
OUTPUT_DIRECTORY = ../../psblas-3.9-doxygen
# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
# 4096 sub-directories (in 2 levels) under the output directory of each output

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 58 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 61 KiB

@ -10,16 +10,16 @@
<link rel="stylesheet" type="text/css" href="userhtml.css">
</head><body
>
<!--l. 91--><p class="noindent" ><span
class="cmbx-12x-x-144">PSBLAS</span><br
<!--l. 99--><p class="noindent" ><span
class="pplb7t-x-x-172">PSBLAS</span><br
class="newline" /> <span
class="cmbx-12x-x-144">User&#8217;s and Reference Guide</span><br
class="pplb7t-x-x-172">User&#8217;s and Reference Guide</span><br
class="newline" /> <span
class="cmti-12">A reference guide for the Parallel Sparse BLAS library</span><br
class="pplri7t-x-x-120">A reference guide for the Parallel Sparse BLAS library</span><br
class="newline" /> <span
class="cmbx-10">Salvatore Filippone</span><br
class="pplb7t-">Salvatore Filippone</span><br
class="newline" /><span
class="cmbx-10">Alfredo Buttari </span><br
class="pplb7t-">Alfredo Buttari </span><br
class="newline" />Software version: 3.9.0<br
class="newline" />Aug 1st, 2024
@ -52,13 +52,13 @@ href="userhtmlse9.html#x14-1280009" id="QQ2-14-158">Utilities</a></span>
<br /> &#x00A0;<span class="sectionToc" >10 <a
href="userhtmlse10.html#x15-13500010" id="QQ2-15-165">Preconditioner routines</a></span>
<br /> &#x00A0;<span class="sectionToc" >11 <a
href="userhtmlse11.html#x17-14200011" id="QQ2-17-172">Iterative Methods</a></span>
href="userhtmlse11.html#x17-14300011" id="QQ2-17-174">Iterative Methods</a></span>
<br /> &#x00A0;<span class="sectionToc" >12 <a
href="userhtmlse12.html#x19-14400012" id="QQ2-19-174">Extensions</a></span>
href="userhtmlse12.html#x19-14600012" id="QQ2-19-177">Extensions</a></span>
<br /> &#x00A0;<span class="sectionToc" >13 <a
href="userhtmlse13.html#x20-15300013" id="QQ2-20-189">CUDA Environment Routines</a></span>
href="userhtmlse13.html#x20-15500013" id="QQ2-20-192">CUDA Environment Routines</a></span>
<br /> &#x00A0;<span class="likesectionToc" ><a
href="userhtmlli2.html#x21-168000" id="QQ2-21-218">References</a></span>
href="userhtmlli2.html#x21-170000" id="QQ2-21-221">References</a></span>
</div>

Binary file not shown.

After

Width:  |  Height:  |  Size: 90 KiB

@ -1,33 +1,62 @@
/* start css.sty */
.cmr-7{font-size:70%;}
.cmmi-5{font-size:50%;font-style: italic;}
.cmmi-7{font-size:70%;font-style: italic;}
.cmmi-10{font-style: italic;}
.cmsy-7{font-size:70%;}
.cmbx-12x-x-144{font-size:172%; font-weight: bold;}
.cmbx-12x-x-144{ font-weight: bold;}
.cmbx-12x-x-144{ font-weight: bold;}
.cmti-10{ font-style: italic;}
.cmti-12{font-size:120%; font-style: italic;}
.cmbx-10{ font-weight: bold;}
.cmbx-10{ font-weight: bold;}
.cmbx-10{ font-weight: bold;}
.pplb7t-x-x-172{font-size:172%;font-weight: bold;}
.pplb7t-x-x-172{font-weight: bold;}
.pplb7t-x-x-172{font-weight: bold;}
.pplri7t-{font-style: italic;}
.pplri7t-{font-style: italic;}
.pplri7t-x-x-120{font-size:120%;font-style: italic;}
.pplri7t-x-x-120{font-style: italic;}
.pplb7t-{font-weight: bold;}
.pplb7t-{font-weight: bold;}
.pplb7t-{font-weight: bold;}
.cmtt-10{font-family: monospace,monospace;}
.cmtt-10{font-family: monospace,monospace;}
.cmtt-10{font-family: monospace,monospace;}
.cmr-9{font-size:90%;}
.cmr-8{font-size:80%;}
.cmbx-12{font-size:120%; font-weight: bold;}
.cmbx-12{ font-weight: bold;}
.cmbx-12{ font-weight: bold;}
.pplr7t-x-x-76{font-size:76%;}
.zplmr7m-{font-style: italic;}
.zplmr7m-{font-style: italic;}
.zplmr7m-{font-style: italic;}
.zplmr7m-{font-style: italic;}
.zplmr7m-{font-style: italic;}
.zplmr7m-x-x-76{font-size:76%;font-style: italic;}
.zplmr7m-x-x-76{font-style: italic;}
.zplmr7m-x-x-76{font-style: italic;}
.zplmr7m-x-x-76{font-style: italic;}
.zplmr7m-x-x-76{font-style: italic;}
.zplmr7m-x-x-60{font-size:60%;font-style: italic;}
.zplmr7m-x-x-60{font-style: italic;}
.zplmr7m-x-x-60{font-style: italic;}
.zplmr7m-x-x-60{font-style: italic;}
.zplmr7m-x-x-60{font-style: italic;}
.zplmr7y-x-x-76{font-size:76%;}
.zplmr7t-x-x-76{font-size:76%;}
.pplr7t-x-x-90{font-size:90%;}
.pplr7t-x-x-80{font-size:80%;}
.pplb7t-x-x-120{font-size:120%;font-weight: bold;}
.pplb7t-x-x-120{font-weight: bold;}
.pplb7t-x-x-120{font-weight: bold;}
.cmtt-8{font-size:80%;font-family: monospace,monospace;}
.cmtt-8{font-family: monospace,monospace;}
.cmtt-8{font-family: monospace,monospace;}
.cmtt-9{font-size:90%;font-family: monospace,monospace;}
.cmtt-9{font-family: monospace,monospace;}
.cmtt-9{font-family: monospace,monospace;}
.cmmi-8{font-size:80%;font-style: italic;}
.pplr7t-x-x-70{font-size:70%;}
.zplmr7m-x-x-90{font-size:90%;font-style: italic;}
.zplmr7m-x-x-90{font-style: italic;}
.zplmr7m-x-x-90{font-style: italic;}
.zplmr7m-x-x-90{font-style: italic;}
.zplmr7m-x-x-90{font-style: italic;}
.zplmr7y-x-x-90{font-size:90%;}
.zplmr7m-x-x-80{font-size:80%;font-style: italic;}
.zplmr7m-x-x-80{font-style: italic;}
.zplmr7m-x-x-80{font-style: italic;}
.zplmr7m-x-x-80{font-style: italic;}
.zplmr7m-x-x-80{font-style: italic;}
.zplmr7t-x-x-80{font-size:80%;}
.pplrc7t-x-x-90{font-size:90%;}
.small-caps{font-variant: small-caps; }
p{margin-top:0;margin-bottom:0}
p.indent{text-indent:0;}
p + p{margin-top:1em;}
@ -158,5 +187,11 @@ pre.listings{font-family: monospace,monospace; white-space: pre-wrap; margin-top
pre.lstlisting{font-family: monospace,monospace; white-space: pre-wrap; margin-top:0.5em; margin-bottom:0.5em; }
pre.lstinputlisting{ font-family: monospace,monospace; white-space: pre-wrap; }
.lstinputlisting .label{margin-right:0.5em;}
#TBL-24-1{border-left: 1px solid black;}
#TBL-24-1{border-right:1px solid black;}
#TBL-24-2{border-right:1px solid black;}
#TBL-24-3{border-right:1px solid black;}
#TBL-24-4{border-right:1px solid black;}
#TBL-24-5{border-right:1px solid black;}
/* end css.sty */

@ -10,16 +10,16 @@
<link rel="stylesheet" type="text/css" href="userhtml.css">
</head><body
>
<!--l. 91--><p class="noindent" ><span
class="cmbx-12x-x-144">PSBLAS</span><br
<!--l. 99--><p class="noindent" ><span
class="pplb7t-x-x-172">PSBLAS</span><br
class="newline" /> <span
class="cmbx-12x-x-144">User&#8217;s and Reference Guide</span><br
class="pplb7t-x-x-172">User&#8217;s and Reference Guide</span><br
class="newline" /> <span
class="cmti-12">A reference guide for the Parallel Sparse BLAS library</span><br
class="pplri7t-x-x-120">A reference guide for the Parallel Sparse BLAS library</span><br
class="newline" /> <span
class="cmbx-10">Salvatore Filippone</span><br
class="pplb7t-">Salvatore Filippone</span><br
class="newline" /><span
class="cmbx-10">Alfredo Buttari </span><br
class="pplb7t-">Alfredo Buttari </span><br
class="newline" />Software version: 3.9.0<br
class="newline" />Aug 1st, 2024
@ -52,13 +52,13 @@ href="userhtmlse9.html#x14-1280009" id="QQ2-14-158">Utilities</a></span>
<br /> &#x00A0;<span class="sectionToc" >10 <a
href="userhtmlse10.html#x15-13500010" id="QQ2-15-165">Preconditioner routines</a></span>
<br /> &#x00A0;<span class="sectionToc" >11 <a
href="userhtmlse11.html#x17-14200011" id="QQ2-17-172">Iterative Methods</a></span>
href="userhtmlse11.html#x17-14300011" id="QQ2-17-174">Iterative Methods</a></span>
<br /> &#x00A0;<span class="sectionToc" >12 <a
href="userhtmlse12.html#x19-14400012" id="QQ2-19-174">Extensions</a></span>
href="userhtmlse12.html#x19-14600012" id="QQ2-19-177">Extensions</a></span>
<br /> &#x00A0;<span class="sectionToc" >13 <a
href="userhtmlse13.html#x20-15300013" id="QQ2-20-189">CUDA Environment Routines</a></span>
href="userhtmlse13.html#x20-15500013" id="QQ2-20-192">CUDA Environment Routines</a></span>
<br /> &#x00A0;<span class="likesectionToc" ><a
href="userhtmlli2.html#x21-168000" id="QQ2-21-218">References</a></span>
href="userhtmlli2.html#x21-170000" id="QQ2-21-221">References</a></span>
</div>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.5 KiB

After

Width:  |  Height:  |  Size: 1.5 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.8 KiB

After

Width:  |  Height:  |  Size: 1.9 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.1 KiB

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.7 KiB

After

Width:  |  Height:  |  Size: 1.7 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.2 KiB

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.2 KiB

After

Width:  |  Height:  |  Size: 1.3 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 KiB

After

Width:  |  Height:  |  Size: 1.4 KiB

@ -14,6 +14,6 @@
<!--l. 72--><p class="indent" > <span class="footnote-mark"><a
id="fn4x0"><a
id="x16-136002x10.1"></a> <sup class="textsuperscript">4</sup></a></span><span
class="cmr-8">The string is case-insensitive</span></div>
class="pplr7t-x-x-80">The string is case-insensitive</span></div>
</body></html>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 968 B

After

Width:  |  Height:  |  Size: 1021 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.1 KiB

After

Width:  |  Height:  |  Size: 1.1 KiB

@ -13,8 +13,11 @@
<div class="footnote-text">
<!--l. 53--><p class="noindent" ><span class="footnote-mark"><a
id="fn5x0"><a
id="x18-143004x11.1"></a> <sup class="textsuperscript">5</sup></a></span><span
class="cmr-8">Note: the implementation is for </span><span
class="cmmi-8">FCG</span><span
class="cmr-8">(1).</span></div>
id="x18-144004x11.1"></a> <sup class="textsuperscript">5</sup></a></span><span
class="pplr7t-x-x-80">Note: the implementation is for </span><span
class="zplmr7m-x-x-80">FCG</span><span
class="zplmr7t-x-x-80">(</span><span
class="pplr7t-x-x-80">1</span><span
class="zplmr7t-x-x-80">)</span><span
class="pplr7t-x-x-80">.</span></div>
</body></html>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.1 KiB

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 KiB

After

Width:  |  Height:  |  Size: 1.3 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.0 KiB

After

Width:  |  Height:  |  Size: 1.0 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 KiB

After

Width:  |  Height:  |  Size: 1.4 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.5 KiB

After

Width:  |  Height:  |  Size: 8.3 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 KiB

After

Width:  |  Height:  |  Size: 1.4 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.0 KiB

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 970 B

After

Width:  |  Height:  |  Size: 985 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 420 B

After

Width:  |  Height:  |  Size: 399 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 710 B

After

Width:  |  Height:  |  Size: 700 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.7 KiB

After

Width:  |  Height:  |  Size: 1.8 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.8 KiB

After

Width:  |  Height:  |  Size: 2.0 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.0 KiB

After

Width:  |  Height:  |  Size: 1.9 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1016 B

After

Width:  |  Height:  |  Size: 1.0 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.1 KiB

After

Width:  |  Height:  |  Size: 2.2 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.5 KiB

After

Width:  |  Height:  |  Size: 1.6 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.5 KiB

After

Width:  |  Height:  |  Size: 1.6 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.8 KiB

After

Width:  |  Height:  |  Size: 1.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.0 KiB

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.6 KiB

After

Width:  |  Height:  |  Size: 1.7 KiB

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save