Merge branch 'cmake2' into cmake

cmake
Luca Pepè Sciarria 2 months ago
commit ef71a32484

@ -0,0 +1,12 @@
$Format:%d%n%n$
# Fall back version, probably last release:
3.9.0
# PSBLAS version file.
#
# Release archive created from commit:
# $Format:%H %d$
# $Format:Created on %ci by %cN, and$
# $Format:signed by %GS using %GK.$
# $Format:Signature status: %G?$
$Format:%GG$

1
.gitignore vendored

@ -1,6 +1,7 @@
*.a *.a
*.o *.o
*.mod *.mod
*.smod
*~ *~
# header files generated # header files generated

@ -430,7 +430,11 @@ endif()
#--------------------------------------- #---------------------------------------
# Link order, left to right: # Link order, left to right:
<<<<<<< HEAD
# cbind.a, util.a krylov.a prec.a base.a # cbind.a, util.a krylov.a prec.a base.a
=======
# cbind.a, util.a linsolve.a prec.a base.a
>>>>>>> cmake2
include(${CMAKE_CURRENT_LIST_DIR}/base/CMakeLists.txt) include(${CMAKE_CURRENT_LIST_DIR}/base/CMakeLists.txt)
@ -515,6 +519,7 @@ target_include_directories(prec PUBLIC
$<INSTALL_INTERFACE:modules>) $<INSTALL_INTERFACE:modules>)
target_link_libraries(prec PUBLIC base) target_link_libraries(prec PUBLIC base)
<<<<<<< HEAD
include(${CMAKE_CURRENT_LIST_DIR}/krylov/CMakeLists.txt) include(${CMAKE_CURRENT_LIST_DIR}/krylov/CMakeLists.txt)
add_library(krylov ${krylov_source_files}) add_library(krylov ${krylov_source_files})
set_target_properties(krylov set_target_properties(krylov
@ -528,6 +533,21 @@ target_include_directories(krylov PUBLIC
$<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/modules> $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/modules>
$<INSTALL_INTERFACE:modules>) $<INSTALL_INTERFACE:modules>)
target_link_libraries(krylov PUBLIC base prec) target_link_libraries(krylov PUBLIC base prec)
=======
include(${CMAKE_CURRENT_LIST_DIR}/linsolve/CMakeLists.txt)
add_library(linsolve ${linsolve_source_files})
set_target_properties(linsolve
PROPERTIES
Fortran_MODULE_DIRECTORY "${CMAKE_BINARY_DIR}/modules"
POSITION_INDEPENDENT_CODE TRUE
OUTPUT_NAME psb_linsolve
LINKER_LANGUAGE Fortran
)
target_include_directories(linsolve PUBLIC
$<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/modules>
$<INSTALL_INTERFACE:modules>)
target_link_libraries(linsolve PUBLIC base prec)
>>>>>>> cmake2
include(${CMAKE_CURRENT_LIST_DIR}/ext/CMakeLists.txt) include(${CMAKE_CURRENT_LIST_DIR}/ext/CMakeLists.txt)
@ -644,7 +664,11 @@ target_include_directories(cbind PUBLIC
$<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/modules> # Path for building $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/modules> # Path for building
$<INSTALL_INTERFACE:modules> # Path for installation $<INSTALL_INTERFACE:modules> # Path for installation
) )
<<<<<<< HEAD
target_link_libraries(cbind PUBLIC base prec krylov ext util) target_link_libraries(cbind PUBLIC base prec krylov ext util)
=======
target_link_libraries(cbind PUBLIC base prec linsolve ext util)
>>>>>>> cmake2
# Custom command to copy all header files # Custom command to copy all header files
@ -706,13 +730,21 @@ if(MPI_FOUND)
endif() endif()
<<<<<<< HEAD
foreach(lib base prec krylov ext util cbind) foreach(lib base prec krylov ext util cbind)
=======
foreach(lib base prec linsolve ext util cbind)
>>>>>>> cmake2
target_link_libraries(${lib} PUBLIC ${MPI_C_LIBRARIES} ${MPI_Fortran_LIBRARIES}) target_link_libraries(${lib} PUBLIC ${MPI_C_LIBRARIES} ${MPI_Fortran_LIBRARIES})
endforeach() endforeach()
endif() endif()
if(OpenCoarrays_FOUND) if(OpenCoarrays_FOUND)
<<<<<<< HEAD
foreach(lib base prec krylov ext util cbind) #TODO: check if cbind goes here! foreach(lib base prec krylov ext util cbind) #TODO: check if cbind goes here!
=======
foreach(lib base prec linsolve ext util cbind) #TODO: check if cbind goes here!
>>>>>>> cmake2
target_link_libraries(${lib} PUBLIC OpenCoarrays::caf_mpi_static) target_link_libraries(${lib} PUBLIC OpenCoarrays::caf_mpi_static)
endforeach() endforeach()
endif() endif()
@ -731,7 +763,11 @@ install(DIRECTORY "${CMAKE_BINARY_DIR}/include" DESTINATION "include"
install(DIRECTORY "${CMAKE_BINARY_DIR}/modules" DESTINATION "modules" install(DIRECTORY "${CMAKE_BINARY_DIR}/modules" DESTINATION "modules"
FILES_MATCHING PATTERN "*.mod") FILES_MATCHING PATTERN "*.mod")
<<<<<<< HEAD
install(TARGETS base prec krylov ext util cbind install(TARGETS base prec krylov ext util cbind
=======
install(TARGETS base prec linsolve ext util cbind
>>>>>>> cmake2
EXPORT ${CMAKE_PROJECT_NAME}-targets EXPORT ${CMAKE_PROJECT_NAME}-targets
DESTINATION "${CMAKE_INSTALL_LIBDIR}" DESTINATION "${CMAKE_INSTALL_LIBDIR}"
LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}" LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}"

@ -67,6 +67,12 @@ UTILMODNAME=@UTILMODNAME@
CBINDLIBNAME=libpsb_cbind.a CBINDLIBNAME=libpsb_cbind.a
OACCD=@OACCD@
OACCLD=@OACCLD@
FCOPENACC=@FCOPENACC@
CCOPENACC=@CCOPENACC@
CXXOPENACC=@CXXOPENACC@
CUDAD=@CUDAD@ CUDAD=@CUDAD@
CUDALD=@CUDALD@ CUDALD=@CUDALD@
LCUDA=@LCUDA@ LCUDA=@LCUDA@
@ -74,14 +80,18 @@ LCUDA=@LCUDA@
SPGPU_LIBS=@SPGPU_LIBS@ SPGPU_LIBS=@SPGPU_LIBS@
CUDA_DIR=@CUDA_DIR@ CUDA_DIR=@CUDA_DIR@
CUDA_DEFINES=@CUDA_DEFINES@
CUDA_INCLUDES=@CUDA_INCLUDES@ CUDA_INCLUDES=@CUDA_INCLUDES@
CUDA_LIBS=@CUDA_LIBS@ CUDA_LIBS=@CUDA_LIBS@
CUDA_VERSION=@CUDA_VERSION@ CUDA_VERSION=@CUDA_VERSION@
CUDA_SHORT_VERSION=@CUDA_SHORT_VERSION@ CUDA_SHORT_VERSION=@CUDA_SHORT_VERSION@
CUDA_DEFINES=@CUDA_DEFINES@
FCUDEFINES=@FCUDEFINES@
CCUDEFINES=@CCUDEFINES@
CXXCUDEFINES=@CXXCUDEFINES@
NVCC=@CUDA_NVCC@ NVCC=@CUDA_NVCC@
CUDEFINES=@CUDEFINES@ CUDEFINES=@CUDEFINES@
.SUFFIXES: .cu .SUFFIXES: .cu
.cu.o: .cu.o:
$(NVCC) $(CINCLUDES) $(CDEFINES) $(CUDEFINES) -c $< $(NVCC) $(CINCLUDES) $(CDEFINES) $(CUDEFINES) -c $<

@ -1,6 +1,6 @@
include Make.inc include Make.inc
all: dirs based precd kryld utild cbindd extd $(CUDAD) libd all: dirs based precd linslvd utild cbindd extd $(CUDAD) $(OACCD) libd
@echo "=====================================" @echo "====================================="
@echo "PSBLAS libraries Compilation Successful." @echo "PSBLAS libraries Compilation Successful."
@ -11,28 +11,31 @@ dirs:
precd: based precd: based
utild: based utild: based
kryld: precd linslvd: precd
extd: based extd: based
cudad: extd cudad: extd
cbindd: based precd kryld utild oaccd: extd
cbindd: based precd linslvd utild
libd: based precd kryld utild cbindd extd $(CUDALD) libd: based precd linslvd utild cbindd extd $(CUDALD) $(OACCLD)
$(MAKE) -C base lib $(MAKE) -C base lib
$(MAKE) -C prec lib $(MAKE) -C prec lib
$(MAKE) -C krylov lib $(MAKE) -C linsolve lib
$(MAKE) -C util lib $(MAKE) -C util lib
$(MAKE) -C cbind lib $(MAKE) -C cbind lib
$(MAKE) -C ext lib $(MAKE) -C ext lib
cudald: cudad cudald: cudad
$(MAKE) -C cuda lib $(MAKE) -C cuda lib
oaccld: oaccd
$(MAKE) -C openacc lib
based: based:
$(MAKE) -C base objs $(MAKE) -C base objs
precd: precd:
$(MAKE) -C prec objs $(MAKE) -C prec objs
kryld: linslvd:
$(MAKE) -C krylov objs $(MAKE) -C linsolve objs
utild: utild:
$(MAKE) -C util objs $(MAKE) -C util objs
cbindd: cbindd:
@ -41,6 +44,8 @@ extd: based
$(MAKE) -C ext objs $(MAKE) -C ext objs
cudad: based extd cudad: based extd
$(MAKE) -C cuda objs $(MAKE) -C cuda objs
oaccd: based extd
$(MAKE) -C openacc objs
install: all install: all
@ -62,11 +67,12 @@ install: all
clean: clean:
$(MAKE) -C base clean $(MAKE) -C base clean
$(MAKE) -C prec clean $(MAKE) -C prec clean
$(MAKE) -C krylov clean $(MAKE) -C linsolve clean
$(MAKE) -C util clean $(MAKE) -C util clean
$(MAKE) -C cbind clean $(MAKE) -C cbind clean
$(MAKE) -C ext clean $(MAKE) -C ext clean
$(MAKE) -C cuda clean $(MAKE) -C cuda clean
$(MAKE) -C openacc clean
check: all check: all
make check -C test/serial make check -C test/serial
@ -79,11 +85,12 @@ cleanlib:
veryclean: cleanlib veryclean: cleanlib
cd base && $(MAKE) veryclean cd base && $(MAKE) veryclean
cd prec && $(MAKE) veryclean cd prec && $(MAKE) veryclean
cd krylov && $(MAKE) veryclean cd linsolve && $(MAKE) veryclean
cd util && $(MAKE) veryclean cd util && $(MAKE) veryclean
cd cbind && $(MAKE) veryclean cd cbind && $(MAKE) veryclean
cd ext && $(MAKE) veryclean cd ext && $(MAKE) veryclean
cd cuda && $(MAKE) veryclean cd cuda && $(MAKE) veryclean
cd openacc && $(MAKE) veryclean
cd test/fileread && $(MAKE) clean cd test/fileread && $(MAKE) clean
cd test/pargen && $(MAKE) clean cd test/pargen && $(MAKE) clean
cd test/util && $(MAKE) clean cd test/util && $(MAKE) clean

@ -1,37 +1,42 @@
PSBLAS library, version 3.9 # PSBLAS library, version 3.9
===========================
The architecture of the Fortran 2003 sparse BLAS is described in:
>S. Filippone, A. Buttari. Object-Oriented Techniques for Sparse Matrix
>Computations in Fortran 2003, ACM Trans. on Math. Software, vol. 38, No.
4, 2012.
The ideas are explored further with the paper: The PSBLAS library, developed with the aim to facilitate the parallelization of computationally intensive scientific applications, is designed to address parallel implementation of iterative solvers for sparse linear systems through the distributed memory paradigm. It includes routines for multiplying sparse matrices by dense matrices, solving block diagonal systems with triangular diagonal entries, preprocessing sparse matrices, and contains additional routines for dense matrix operations. The current implementation of PSBLAS addresses a distributed memory execution model operating with message passing.
>V. Cardellini, S. Filippone and D. Rouson. Design Patterns for
>sparse-matrix computations on hybrid CPU/GPU platforms, Scientific
>Programming, 22(2014), pp.1-19.
Version 1.0 of the library is described in: The PSBLAS library version 3 is implemented in the Fortran 2008 programming language, with reuse and/or adaptation of existing Fortran 77 and Fortran 95 software, plus a handful of C routines.
>S. Filippone, M. Colajanni. PSBLAS: A library for parallel linear
>algebra computation on sparse matrices, ACM Trans. on Math. Software,
>26(4), Dec. 2000, pp. 527-550.
## References
UTILITIES
---------
The `test/util` directory contains some utilities to convert to/from
Harwell-Boeing and MatrixMarket file formats.
The architecture, philosophy and implementation details of the library are contained in the following papers:
DOCUMENTATION - The architecture of the Fortran 2003 sparse BLAS is described in:
------------- >S. Filippone, A. Buttari. Object-Oriented Techniques for Sparse Matrix
See docs/psblas-3.9.pdf; an HTML version of the same document is >Computations in Fortran 2003, ACM Trans. on Math. Software, vol. 38, No.
available in docs/html. Please consult the sample programs, especially 4, 2012.
test/pargen/psb_[sd]_pde[23]d.f90
- The ideas are explored further with the paper:
>V. Cardellini, S. Filippone and D. Rouson. Design Patterns for
>sparse-matrix computations on hybrid CPU/GPU platforms, Scientific
>Programming, 22(2014), pp.1-19.
- Version 1.0 of the library is described in:
>S. Filippone, M. Colajanni. PSBLAS: A library for parallel linear
>algebra computation on sparse matrices, ACM Trans. on Math. Software,
>26(4), Dec. 2000, pp. 527-550.
- The software infrastructure changes required to accommodate the implementation of the
Additive-Schwarz preconditioners available in [AMG4PSBLAS](https://github.com/sfilippone/amg4psblas/) are detailed in:
> A. Buttari, P. D'Ambra, D. di Serafino, S. Filippone, Extending PSBLAS to build parallel Schwarz preconditioners, Applied Parallel Computing. State of the Art in Scientific Computing: 7th International Workshop, PARA 2004, LNCS 3732, 2006, pp. 593-602.
> A. Buttari, P. D'Ambra, D. Di Serafino, S. Filippone, 2LEV-D2P4: A package of high-performance preconditioners for scientific and engineering applications, Applicable Algebra in Engineering, Communications and Computing, 2007, 18(3), pp. 223-239.
> P. D'Ambra, D. Di Serafino, S. Filippone, MLD2P4: A package of parallel algebraic multilevel domain decomposition preconditioners in Fortran 95 ACM Transactions on Mathematical Software, 2010, 37(3), 30
PSBLAS is the backbone of the Parallel Sparse Computation Toolkit ([PSCToolkit](https://psctoolkit.github.io/)) suite of libraries. See the paper:
> DAmbra, P., Durastante, F., & Filippone, S. (2023). Parallel Sparse Computation Toolkit. Software Impacts, 15, 100463.
### Other Software credits
OTHER SOFTWARE CREDITS
----------------------
We originally included a modified implementation of some of the Sparker We originally included a modified implementation of some of the Sparker
(serial sparse BLAS) material; this has been completely rewritten, way (serial sparse BLAS) material; this has been completely rewritten, way
beyond the intention(s) and responsibilities of the original developers. beyond the intention(s) and responsibilities of the original developers.
@ -40,18 +45,8 @@ The main reference for the serial sparse BLAS is:
>linear algebra subprograms for sparse matrices: a user level interface, >linear algebra subprograms for sparse matrices: a user level interface,
>ACM Trans. Math. Softw., 23(3), 379-401, 1997. >ACM Trans. Math. Softw., 23(3), 379-401, 1997.
CUDA and GPU support ## Installing
--------------------
This version of PSBLAS incorporates into a single package three
entities that were previouslty separated:
1. PSBLAS -- the base library
2. PSBLAS-EXT -- a library providing additional storage formats
3. SPGPU -- a package of kernels for NVIDIA GPUs originally
written by Davide Barbieri and Salvatore Filippone;
see the license file cuda/License-spgpu.md
INSTALLING
----------
To compile and run our software you will need the following To compile and run our software you will need the following
prerequisites (see also SERIAL below): prerequisites (see also SERIAL below):
@ -71,7 +66,8 @@ prerequisites (see also SERIAL below):
We use the C interface to AMD. We use the C interface to AMD.
5. If you have CUDA available, use 5. If you have CUDA available, use
--with-cuda=<path> to specify the CUDA toolkit location --enable-cuda to compile CUDA-enabled methods
--with-cudadir=<path> to specify the CUDA toolkit location
--with-cudacc=XX,YY,ZZ to specify a list of target CCs (compute --with-cudacc=XX,YY,ZZ to specify a list of target CCs (compute
capabilities) to compile the CUDA code for. capabilities) to compile the CUDA code for.
@ -79,21 +75,27 @@ The configure script will generate a Make.inc file suitable for building
the library. The script is capable of recognizing the needed libraries the library. The script is capable of recognizing the needed libraries
with their default names; if they are in unusual places consider adding with their default names; if they are in unusual places consider adding
the paths with `--with-libs`, or explicitly specifying the names in the paths with `--with-libs`, or explicitly specifying the names in
`--with-blas`, etc. Please note that a common way for the configure script `--with-blas`, etc.
to fail is to specify inconsistent MPI vs. plain compilers, either
directly or indirectly via environment variables; e.g. specifying the >[!CAUTION]
Intel compiler with `FC=ifort` while at the same time having an > Please note that a common way for the configure script
`MPIFC=mpif90` which points to GNU Fortran. The best way to avoid this > to fail is to specify inconsistent MPI vs. plain compilers, either
situation is (in our opinion) to use the environment modules package > directly or indirectly via environment variables; e.g. specifying the
(see http://modules.sourceforge.net/), and load the relevant > Intel compiler with `FC=ifort` while at the same time having an
variables with (e.g.) > `MPIFC=mpif90` which points to GNU Fortran.
```
module load gnu46 openmpi >[!TIP]
``` > The best way to avoid this
This will delegate to the modules setup to make sure that the version of > situation is (in our opinion) to use the environment modules package
openmpi in use is the one compiled with the gnu46 compilers. After the > (see [http://modules.sourceforge.net/](http://modules.sourceforge.net/)), and load the relevant
configure script has completed you can always tweak the Make.inc file > variables with (e.g.)
yourself. > ```
> module load gcc/13.2.0 openmpi/4.1.6
> ```
> This will delegate to the modules setup to make sure that the version of
> openmpi in use is the one compiled with the gnu46 compilers. After the
> configure script has completed you can always tweak the Make.inc file
> yourself.
After you have Make.inc fixed, run After you have Make.inc fixed, run
``` ```
@ -105,58 +107,117 @@ install and the libraries will be installed under `/path/lib`, while the
module files will be installed under `/path/modules`. The regular and module files will be installed under `/path/modules`. The regular and
experimental C interface header files are under `/path/include`. experimental C interface header files are under `/path/include`.
SERIAL ### CUDA and GPU support
------
This version of PSBLAS incorporates into a single package three
entities that were previouslty separated:
| Library | |
|---------|--------------------|
| PSBLAS | the base library |
| PSBLAS-EXT | a library providing additional storage formats for matrices and vectors |
| SPGPU | a package of kernels for NVIDIA GPUs originally written by Davide Barbieri and Salvatore Filippone; see the license file [cuda/License-spgpu.md](cuda/License-spgpu.md) |
### OpenACC
There is a highly experimental version of an OpenACC interface,
you can access it by speficifying
```bash
--enable-openacc --with-extraopenacc="-foffload=nvptx-none=-march=sm_70"
```
where the argument to the extraopenacc option depends on the compiler
you are using (the example shown here is relevant for the GNU
compiler).
### Serial
Configuring with `--enable-serial` will provide a fake MPI stub library Configuring with `--enable-serial` will provide a fake MPI stub library
that enables running in pure serial mode; no MPI installation is needed that enables running in pure serial mode; no MPI installation is needed
in this case (but note that the fake MPI stubs are only guaranteed to in this case (but note that the fake MPI stubs are only guaranteed to
cover what we use internally, it's not a complete replacement). cover what we use internally, it's not a complete replacement).
INTEGER SIZES ### Integers
-------------
We have two kind of integers: IPK for local indices, and LPK for We have two kind of integers: IPK for local indices, and LPK for
global indices. They can be specified independently at configure time, global indices. They can be specified independently at configure time,
e.g. e.g.
```bash
--with-ipk=4 --with-lpk=8 --with-ipk=4 --with-lpk=8
```
which is asking for 4-bytes local indices, and 8-bytes global indices which is asking for 4-bytes local indices, and 8-bytes global indices
(this is the default). (this is the default).
TODO ## Documentation
----
Fix all reamining bugs. Bugs? We dont' have any ! ;-)
Further information on installation and configuration can be found in the documentation.
See [docs/psblas-3.9.pdf](docs/psblas-3.9.pdf); an HTML version of the same document is
available in docs/html. Please consult the sample programs, especially
- [test/pargen/psb_s_pde2d.F90](test/pargen/psb_s_pde2d.F90) [test/pargen/psb_d_pde2d.F90](test/pargen/psb_d_pde2d.F90)
- [test/pargen/psb_s_pde2d.F90](test/pargen/psb_s_pde3d.F90) [test/pargen/psb_d_pde2d.F90](test/pargen/psb_d_pde3d.F90)
which contain examples for the solution of linear systems obtained by the discretization of a generic second-order differential equation in two:
```math
- a_1 \frac{\partial^2 u}{\partial x^2}
- a_2 \frac{\partial^2 u}{\partial y^2}
+ b_1 \frac{\partial u}{\partial x}
+ b_2 \frac{\partial u}{\partial y}
+ c u = f
```
or three
```math
- a_1 \frac{\partial^2 u}{\partial x^2}
- a_2 \frac{\partial^2 u}{\partial y^2}
- a_3 \frac{\partial^2 u}{\partial z^2}
+ b_1 \frac{\partial u}{\partial x}
+ b_2 \frac{\partial u}{\partial y}
+ b_3 \frac{\partial u}{\partial z}
+ c u = f
```
dimensions on the unit square/cube with Dirichlet boundary conditions.
The PSBLAS team. ### Utilities
---------------
Project lead: The [test/util](test/util) directory contains some utilities to convert to/from
Salvatore Filippone Harwell-Boeing and MatrixMarket file formats.
Contributors (roughly reverse cronological order): ## TODO and bugs
Dimitri Walther - [ ] Improving OpenACC support
Andea Di Iorio - [ ] Improving OpenMP support
Stefano Petrilli - [X] Fix all reamining bugs. Bugs? We dont' have any ! 🤓
Soren Rasmussen
Zaak Beekman
Ambra Abdullahi Hassan
Pasqua D'Ambra
Alfredo Buttari
Daniela di Serafino
Michele Martone
Michele Colajanni
Fabio Cerioni
Stefano Maiolatesi
Dario Pascucci
> [!NOTE]
> To report bugs 🐛 or issues ❓ please use the [GitHub issue system](https://github.com/sfilippone/psblas3/issues).
RELATED SOFTWARE
---------------- ## The PSBLAS team.
**Project lead:**
Salvatore Filippone
**Contributors** (_roughly reverse cronological order_):
- Theophane Loloum
- Fabio Durastante
- Dimitri Walther
- Andea Di Iorio
- Stefano Petrilli
- Soren Rasmussen
- Zaak Beekman
- Ambra Abdullahi Hassan
- Pasqua D'Ambra
- Alfredo Buttari
- Daniela di Serafino
- Michele Martone
- Michele Colajanni
- Fabio Cerioni
- Stefano Maiolatesi
- Dario Pascucci
## RELATED SOFTWARE
If you are looking for more sophisticated preconditioners, you may be If you are looking for more sophisticated preconditioners, you may be
interested in the package AMG4PSBLAS from interested in the package AMG4PSBLAS from
<http://github.com/sfilippone/amg4psblas> <http://github.com/sfilippone/amg4psblas> and the whole [PSCTooolkit suite](https://psctoolkit.github.io/).
Contact: <https://github.com/sfilippone/psblas3> Contact: <https://github.com/sfilippone/psblas3>

@ -10,13 +10,14 @@ BASIC_MODS= psb_const_mod.o psb_cbind_const_mod.o psb_error_mod.o psb_realloc_mo
auxil/psb_z_realloc_mod.o auxil/psb_z_realloc_mod.o
COMMINT= penv/psi_penv_mod.o \ COMMINT= penv/psi_penv_mod.o \
penv/psi_p2p_mod.o penv/psi_m_p2p_mod.o \ penv/psi_p2p_mod.o penv/psi_m_p2p_mod.o penv/psi_i2_p2p_mod.o \
penv/psi_e_p2p_mod.o \ penv/psi_e_p2p_mod.o \
penv/psi_s_p2p_mod.o \ penv/psi_s_p2p_mod.o \
penv/psi_d_p2p_mod.o \ penv/psi_d_p2p_mod.o \
penv/psi_c_p2p_mod.o \ penv/psi_c_p2p_mod.o \
penv/psi_z_p2p_mod.o \ penv/psi_z_p2p_mod.o \
penv/psi_collective_mod.o \ penv/psi_collective_mod.o \
penv/psi_i2_collective_mod.o \
penv/psi_e_collective_mod.o \ penv/psi_e_collective_mod.o \
penv/psi_m_collective_mod.o \ penv/psi_m_collective_mod.o \
penv/psi_s_collective_mod.o \ penv/psi_s_collective_mod.o \
@ -149,23 +150,25 @@ psb_realloc_mod.o: auxil/psb_m_realloc_mod.o \
auxil/psb_c_realloc_mod.o \ auxil/psb_c_realloc_mod.o \
auxil/psb_z_realloc_mod.o auxil/psb_z_realloc_mod.o
penv/psi_p2p_mod.o: penv/psi_m_p2p_mod.o \ penv/psi_p2p_mod.o: penv/psi_i2_p2p_mod.o \
penv/psi_e_p2p_mod.o \ penv/psi_m_p2p_mod.o \
penv/psi_s_p2p_mod.o \ penv/psi_e_p2p_mod.o \
penv/psi_d_p2p_mod.o \ penv/psi_s_p2p_mod.o \
penv/psi_c_p2p_mod.o \ penv/psi_d_p2p_mod.o \
penv/psi_z_p2p_mod.o penv/psi_c_p2p_mod.o \
penv/psi_collective_mod.o: penv/psi_e_collective_mod.o \ penv/psi_z_p2p_mod.o
penv/psi_m_collective_mod.o \ penv/psi_collective_mod.o: penv/psi_i2_collective_mod.o \
penv/psi_s_collective_mod.o \ penv/psi_e_collective_mod.o \
penv/psi_d_collective_mod.o \ penv/psi_m_collective_mod.o \
penv/psi_c_collective_mod.o \ penv/psi_s_collective_mod.o \
penv/psi_z_collective_mod.o penv/psi_d_collective_mod.o \
penv/psi_c_collective_mod.o \
penv/psi_m_p2p_mod.o penv/psi_e_p2p_mod.o penv/psi_s_p2p_mod.o \ penv/psi_z_collective_mod.o
penv/psi_i2_p2p_mod.o penv/psi_m_p2p_mod.o penv/psi_e_p2p_mod.o penv/psi_s_p2p_mod.o \
penv/psi_d_p2p_mod.o penv/psi_c_p2p_mod.o penv/psi_z_p2p_mod.o: penv/psi_penv_mod.o penv/psi_d_p2p_mod.o penv/psi_c_p2p_mod.o penv/psi_z_p2p_mod.o: penv/psi_penv_mod.o
penv/psi_e_collective_mod.o penv/psi_m_collective_mod.o penv/psi_s_collective_mod.o \ penv/psi_i2_collective_mod.o penv/psi_e_collective_mod.o penv/psi_m_collective_mod.o penv/psi_s_collective_mod.o \
penv/psi_d_collective_mod.o penv/psi_c_collective_mod.o penv/psi_z_collective_mod.o: penv/psi_penv_mod.o \ penv/psi_d_collective_mod.o penv/psi_c_collective_mod.o penv/psi_z_collective_mod.o: penv/psi_penv_mod.o \
penv/psi_m_p2p_mod.o penv/psi_e_p2p_mod.o penv/psi_s_p2p_mod.o \ penv/psi_m_p2p_mod.o penv/psi_e_p2p_mod.o penv/psi_s_p2p_mod.o \
penv/psi_d_p2p_mod.o penv/psi_c_p2p_mod.o penv/psi_z_p2p_mod.o penv/psi_d_p2p_mod.o penv/psi_c_p2p_mod.o penv/psi_z_p2p_mod.o

@ -35,7 +35,7 @@
! Auxiliary module for descriptor: constant values. ! Auxiliary module for descriptor: constant values.
! !
module psb_desc_const_mod module psb_desc_const_mod
use psb_const_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_ use psb_const_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_, psb_i2pk_
! !
! Communication, prolongation & restriction ! Communication, prolongation & restriction
! !

@ -167,6 +167,7 @@ module psi_penv_mod
#else #else
integer(psb_mpk_), save :: mpi_i2amx_op, mpi_i2amn_op
integer(psb_mpk_), save :: mpi_iamx_op, mpi_iamn_op integer(psb_mpk_), save :: mpi_iamx_op, mpi_iamn_op
integer(psb_mpk_), save :: mpi_mamx_op, mpi_mamn_op integer(psb_mpk_), save :: mpi_mamx_op, mpi_mamn_op
integer(psb_mpk_), save :: mpi_eamx_op, mpi_eamn_op integer(psb_mpk_), save :: mpi_eamx_op, mpi_eamn_op
@ -181,6 +182,7 @@ module psi_penv_mod
#endif #endif
private :: psi_get_sizes, psi_register_mpi_extras private :: psi_get_sizes, psi_register_mpi_extras
private :: psi_i2amx_op, psi_i2amn_op
private :: psi_iamx_op, psi_iamn_op private :: psi_iamx_op, psi_iamn_op
private :: psi_mamx_op, psi_mamn_op private :: psi_mamx_op, psi_mamn_op
private :: psi_eamx_op, psi_eamn_op private :: psi_eamx_op, psi_eamn_op
@ -773,6 +775,8 @@ contains
#if defined(SERIAL_MPI) #if defined(SERIAL_MPI)
#else #else
if (info == 0) call mpi_op_create(psi_i2amx_op,.true.,mpi_i2amx_op,info)
if (info == 0) call mpi_op_create(psi_i2amn_op,.true.,mpi_i2amn_op,info)
if (info == 0) call mpi_op_create(psi_mamx_op,.true.,mpi_mamx_op,info) if (info == 0) call mpi_op_create(psi_mamx_op,.true.,mpi_mamx_op,info)
if (info == 0) call mpi_op_create(psi_mamn_op,.true.,mpi_mamn_op,info) if (info == 0) call mpi_op_create(psi_mamn_op,.true.,mpi_mamn_op,info)
if (info == 0) call mpi_op_create(psi_eamx_op,.true.,mpi_eamx_op,info) if (info == 0) call mpi_op_create(psi_eamx_op,.true.,mpi_eamx_op,info)
@ -997,6 +1001,8 @@ contains
& call mpi_comm_Free(ctxt%ctxt,info) & call mpi_comm_Free(ctxt%ctxt,info)
end if end if
if (close_) then if (close_) then
if (info == 0) call mpi_op_free(mpi_i2amx_op,info)
if (info == 0) call mpi_op_free(mpi_i2amn_op,info)
if (info == 0) call mpi_op_free(mpi_mamx_op,info) if (info == 0) call mpi_op_free(mpi_mamx_op,info)
if (info == 0) call mpi_op_free(mpi_mamn_op,info) if (info == 0) call mpi_op_free(mpi_mamn_op,info)
if (info == 0) call mpi_op_free(mpi_eamx_op,info) if (info == 0) call mpi_op_free(mpi_eamx_op,info)
@ -1188,6 +1194,26 @@ contains
! Note: len & type are always default integer. ! Note: len & type are always default integer.
! !
! !!!!!!!!!!!!!!!!!!!!!! ! !!!!!!!!!!!!!!!!!!!!!!
subroutine psi_i2amx_op(inv, outv,len,type)
integer(psb_i2pk_) :: inv(len), outv(len)
integer(psb_mpk_) :: len,type
integer(psb_mpk_) :: i
do i=1, len
if (abs(inv(i)) > abs(outv(i))) outv(i) = inv(i)
end do
end subroutine psi_i2amx_op
subroutine psi_i2amn_op(inv, outv,len,type)
integer(psb_i2pk_) :: inv(len), outv(len)
integer(psb_mpk_) :: len,type
integer(psb_mpk_) :: i
do i=1, len
if (abs(inv(i)) < abs(outv(i))) outv(i) = inv(i)
end do
end subroutine psi_i2amn_op
subroutine psi_mamx_op(inv, outv,len,type) subroutine psi_mamx_op(inv, outv,len,type)
integer(psb_mpk_) :: inv(len), outv(len) integer(psb_mpk_) :: inv(len), outv(len)
integer(psb_mpk_) :: len,type integer(psb_mpk_) :: len,type

@ -595,12 +595,13 @@ subroutine psb_c_coo_clean_zeros(a, info)
integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(out) :: info
! !
integer(psb_ipk_) :: i,j,k, nzin integer(psb_ipk_) :: i,j,k, nzin
info = 0 info = 0
nzin = a%get_nzeros() nzin = a%get_nzeros()
j = 0 j = 0
do i=1, nzin do i=1, nzin
if (a%val(i) /= czero) then ! Always keep the diagonal, even if numerically zero
if ((a%val(i) /= czero).or.(a%ia(i) == a%ja(i))) then
j = j + 1 j = j + 1
a%val(j) = a%val(i) a%val(j) = a%val(i)
a%ia(j) = a%ia(i) a%ia(j) = a%ia(i)
@ -5926,12 +5927,13 @@ subroutine psb_lc_coo_clean_zeros(a, info)
integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(out) :: info
! !
integer(psb_lpk_) :: i,j,k, nzin integer(psb_lpk_) :: i,j,k, nzin
info = 0 info = 0
nzin = a%get_nzeros() nzin = a%get_nzeros()
j = 0 j = 0
do i=1, nzin do i=1, nzin
if (a%val(i) /= czero) then ! Always keep the diagonal, even if numerically zero
if ((a%val(i) /= czero).or.(a%ia(i) == a%ja(i))) then
j = j + 1 j = j + 1
a%val(j) = a%val(i) a%val(j) = a%val(i)
a%ia(j) = a%ia(i) a%ia(j) = a%ia(i)

@ -2421,7 +2421,8 @@ subroutine psb_c_csc_clean_zeros(a, info)
j = a%icp(1) j = a%icp(1)
do i=1, nc do i=1, nc
do k = ilcp(i), ilcp(i+1) -1 do k = ilcp(i), ilcp(i+1) -1
if (a%val(k) /= czero) then ! Always keep the diagonal, even if numerically zero
if ((a%val(k) /= czero).or.(i == a%ia(k))) then
a%val(j) = a%val(k) a%val(j) = a%val(k)
a%ia(j) = a%ia(k) a%ia(j) = a%ia(k)
j = j + 1 j = j + 1
@ -4313,7 +4314,7 @@ subroutine psb_lc_csc_clean_zeros(a, info)
! !
integer(psb_lpk_) :: i, j, k, nc integer(psb_lpk_) :: i, j, k, nc
integer(psb_lpk_), allocatable :: ilcp(:) integer(psb_lpk_), allocatable :: ilcp(:)
info = 0 info = 0
call a%sync() call a%sync()
nc = a%get_ncols() nc = a%get_ncols()
@ -4322,7 +4323,8 @@ subroutine psb_lc_csc_clean_zeros(a, info)
j = a%icp(1) j = a%icp(1)
do i=1, nc do i=1, nc
do k = ilcp(i), ilcp(i+1) -1 do k = ilcp(i), ilcp(i+1) -1
if (a%val(k) /= czero) then ! Always keep the diagonal, even if numerically zero
if ((a%val(k) /= czero).or.(i == a%ia(k))) then
a%val(j) = a%val(k) a%val(j) = a%val(k)
a%ia(j) = a%ia(k) a%ia(j) = a%ia(k)
j = j + 1 j = j + 1

@ -3642,7 +3642,8 @@ subroutine psb_c_csr_clean_zeros(a, info)
j = a%irp(1) j = a%irp(1)
do i=1, nr do i=1, nr
do k = ilrp(i), ilrp(i+1) -1 do k = ilrp(i), ilrp(i+1) -1
if (a%val(k) /= czero) then ! Always keep the diagonal, even if numerically zero
if ((a%val(k) /= czero).or.(i == a%ja(k))) then
a%val(j) = a%val(k) a%val(j) = a%val(k)
a%ja(j) = a%ja(k) a%ja(j) = a%ja(k)
j = j + 1 j = j + 1
@ -6552,7 +6553,7 @@ subroutine psb_lc_csr_clean_zeros(a, info)
! !
integer(psb_lpk_) :: i, j, k, nr integer(psb_lpk_) :: i, j, k, nr
integer(psb_lpk_), allocatable :: ilrp(:) integer(psb_lpk_), allocatable :: ilrp(:)
info = 0 info = 0
call a%sync() call a%sync()
nr = a%get_nrows() nr = a%get_nrows()
@ -6561,7 +6562,8 @@ subroutine psb_lc_csr_clean_zeros(a, info)
j = a%irp(1) j = a%irp(1)
do i=1, nr do i=1, nr
do k = ilrp(i), ilrp(i+1) -1 do k = ilrp(i), ilrp(i+1) -1
if (a%val(k) /= czero) then ! Always keep the diagonal, even if numerically zero
if ((a%val(k) /= czero).or.(i == a%ja(k))) then
a%val(j) = a%val(k) a%val(j) = a%val(k)
a%ja(j) = a%ja(k) a%ja(j) = a%ja(k)
j = j + 1 j = j + 1

@ -595,12 +595,13 @@ subroutine psb_d_coo_clean_zeros(a, info)
integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(out) :: info
! !
integer(psb_ipk_) :: i,j,k, nzin integer(psb_ipk_) :: i,j,k, nzin
info = 0 info = 0
nzin = a%get_nzeros() nzin = a%get_nzeros()
j = 0 j = 0
do i=1, nzin do i=1, nzin
if (a%val(i) /= dzero) then ! Always keep the diagonal, even if numerically zero
if ((a%val(i) /= dzero).or.(a%ia(i) == a%ja(i))) then
j = j + 1 j = j + 1
a%val(j) = a%val(i) a%val(j) = a%val(i)
a%ia(j) = a%ia(i) a%ia(j) = a%ia(i)
@ -5926,12 +5927,13 @@ subroutine psb_ld_coo_clean_zeros(a, info)
integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(out) :: info
! !
integer(psb_lpk_) :: i,j,k, nzin integer(psb_lpk_) :: i,j,k, nzin
info = 0 info = 0
nzin = a%get_nzeros() nzin = a%get_nzeros()
j = 0 j = 0
do i=1, nzin do i=1, nzin
if (a%val(i) /= dzero) then ! Always keep the diagonal, even if numerically zero
if ((a%val(i) /= dzero).or.(a%ia(i) == a%ja(i))) then
j = j + 1 j = j + 1
a%val(j) = a%val(i) a%val(j) = a%val(i)
a%ia(j) = a%ia(i) a%ia(j) = a%ia(i)

@ -2421,7 +2421,8 @@ subroutine psb_d_csc_clean_zeros(a, info)
j = a%icp(1) j = a%icp(1)
do i=1, nc do i=1, nc
do k = ilcp(i), ilcp(i+1) -1 do k = ilcp(i), ilcp(i+1) -1
if (a%val(k) /= dzero) then ! Always keep the diagonal, even if numerically zero
if ((a%val(k) /= dzero).or.(i == a%ia(k))) then
a%val(j) = a%val(k) a%val(j) = a%val(k)
a%ia(j) = a%ia(k) a%ia(j) = a%ia(k)
j = j + 1 j = j + 1
@ -4313,7 +4314,7 @@ subroutine psb_ld_csc_clean_zeros(a, info)
! !
integer(psb_lpk_) :: i, j, k, nc integer(psb_lpk_) :: i, j, k, nc
integer(psb_lpk_), allocatable :: ilcp(:) integer(psb_lpk_), allocatable :: ilcp(:)
info = 0 info = 0
call a%sync() call a%sync()
nc = a%get_ncols() nc = a%get_ncols()
@ -4322,7 +4323,8 @@ subroutine psb_ld_csc_clean_zeros(a, info)
j = a%icp(1) j = a%icp(1)
do i=1, nc do i=1, nc
do k = ilcp(i), ilcp(i+1) -1 do k = ilcp(i), ilcp(i+1) -1
if (a%val(k) /= dzero) then ! Always keep the diagonal, even if numerically zero
if ((a%val(k) /= dzero).or.(i == a%ia(k))) then
a%val(j) = a%val(k) a%val(j) = a%val(k)
a%ia(j) = a%ia(k) a%ia(j) = a%ia(k)
j = j + 1 j = j + 1

@ -3642,7 +3642,8 @@ subroutine psb_d_csr_clean_zeros(a, info)
j = a%irp(1) j = a%irp(1)
do i=1, nr do i=1, nr
do k = ilrp(i), ilrp(i+1) -1 do k = ilrp(i), ilrp(i+1) -1
if (a%val(k) /= dzero) then ! Always keep the diagonal, even if numerically zero
if ((a%val(k) /= dzero).or.(i == a%ja(k))) then
a%val(j) = a%val(k) a%val(j) = a%val(k)
a%ja(j) = a%ja(k) a%ja(j) = a%ja(k)
j = j + 1 j = j + 1
@ -6552,7 +6553,7 @@ subroutine psb_ld_csr_clean_zeros(a, info)
! !
integer(psb_lpk_) :: i, j, k, nr integer(psb_lpk_) :: i, j, k, nr
integer(psb_lpk_), allocatable :: ilrp(:) integer(psb_lpk_), allocatable :: ilrp(:)
info = 0 info = 0
call a%sync() call a%sync()
nr = a%get_nrows() nr = a%get_nrows()
@ -6561,7 +6562,8 @@ subroutine psb_ld_csr_clean_zeros(a, info)
j = a%irp(1) j = a%irp(1)
do i=1, nr do i=1, nr
do k = ilrp(i), ilrp(i+1) -1 do k = ilrp(i), ilrp(i+1) -1
if (a%val(k) /= dzero) then ! Always keep the diagonal, even if numerically zero
if ((a%val(k) /= dzero).or.(i == a%ja(k))) then
a%val(j) = a%val(k) a%val(j) = a%val(k)
a%ja(j) = a%ja(k) a%ja(j) = a%ja(k)
j = j + 1 j = j + 1

@ -595,12 +595,13 @@ subroutine psb_s_coo_clean_zeros(a, info)
integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(out) :: info
! !
integer(psb_ipk_) :: i,j,k, nzin integer(psb_ipk_) :: i,j,k, nzin
info = 0 info = 0
nzin = a%get_nzeros() nzin = a%get_nzeros()
j = 0 j = 0
do i=1, nzin do i=1, nzin
if (a%val(i) /= szero) then ! Always keep the diagonal, even if numerically zero
if ((a%val(i) /= szero).or.(a%ia(i) == a%ja(i))) then
j = j + 1 j = j + 1
a%val(j) = a%val(i) a%val(j) = a%val(i)
a%ia(j) = a%ia(i) a%ia(j) = a%ia(i)
@ -5926,12 +5927,13 @@ subroutine psb_ls_coo_clean_zeros(a, info)
integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(out) :: info
! !
integer(psb_lpk_) :: i,j,k, nzin integer(psb_lpk_) :: i,j,k, nzin
info = 0 info = 0
nzin = a%get_nzeros() nzin = a%get_nzeros()
j = 0 j = 0
do i=1, nzin do i=1, nzin
if (a%val(i) /= szero) then ! Always keep the diagonal, even if numerically zero
if ((a%val(i) /= szero).or.(a%ia(i) == a%ja(i))) then
j = j + 1 j = j + 1
a%val(j) = a%val(i) a%val(j) = a%val(i)
a%ia(j) = a%ia(i) a%ia(j) = a%ia(i)

@ -2421,7 +2421,8 @@ subroutine psb_s_csc_clean_zeros(a, info)
j = a%icp(1) j = a%icp(1)
do i=1, nc do i=1, nc
do k = ilcp(i), ilcp(i+1) -1 do k = ilcp(i), ilcp(i+1) -1
if (a%val(k) /= szero) then ! Always keep the diagonal, even if numerically zero
if ((a%val(k) /= szero).or.(i == a%ia(k))) then
a%val(j) = a%val(k) a%val(j) = a%val(k)
a%ia(j) = a%ia(k) a%ia(j) = a%ia(k)
j = j + 1 j = j + 1
@ -4313,7 +4314,7 @@ subroutine psb_ls_csc_clean_zeros(a, info)
! !
integer(psb_lpk_) :: i, j, k, nc integer(psb_lpk_) :: i, j, k, nc
integer(psb_lpk_), allocatable :: ilcp(:) integer(psb_lpk_), allocatable :: ilcp(:)
info = 0 info = 0
call a%sync() call a%sync()
nc = a%get_ncols() nc = a%get_ncols()
@ -4322,7 +4323,8 @@ subroutine psb_ls_csc_clean_zeros(a, info)
j = a%icp(1) j = a%icp(1)
do i=1, nc do i=1, nc
do k = ilcp(i), ilcp(i+1) -1 do k = ilcp(i), ilcp(i+1) -1
if (a%val(k) /= szero) then ! Always keep the diagonal, even if numerically zero
if ((a%val(k) /= szero).or.(i == a%ia(k))) then
a%val(j) = a%val(k) a%val(j) = a%val(k)
a%ia(j) = a%ia(k) a%ia(j) = a%ia(k)
j = j + 1 j = j + 1

@ -3642,7 +3642,8 @@ subroutine psb_s_csr_clean_zeros(a, info)
j = a%irp(1) j = a%irp(1)
do i=1, nr do i=1, nr
do k = ilrp(i), ilrp(i+1) -1 do k = ilrp(i), ilrp(i+1) -1
if (a%val(k) /= szero) then ! Always keep the diagonal, even if numerically zero
if ((a%val(k) /= szero).or.(i == a%ja(k))) then
a%val(j) = a%val(k) a%val(j) = a%val(k)
a%ja(j) = a%ja(k) a%ja(j) = a%ja(k)
j = j + 1 j = j + 1
@ -6552,7 +6553,7 @@ subroutine psb_ls_csr_clean_zeros(a, info)
! !
integer(psb_lpk_) :: i, j, k, nr integer(psb_lpk_) :: i, j, k, nr
integer(psb_lpk_), allocatable :: ilrp(:) integer(psb_lpk_), allocatable :: ilrp(:)
info = 0 info = 0
call a%sync() call a%sync()
nr = a%get_nrows() nr = a%get_nrows()
@ -6561,7 +6562,8 @@ subroutine psb_ls_csr_clean_zeros(a, info)
j = a%irp(1) j = a%irp(1)
do i=1, nr do i=1, nr
do k = ilrp(i), ilrp(i+1) -1 do k = ilrp(i), ilrp(i+1) -1
if (a%val(k) /= szero) then ! Always keep the diagonal, even if numerically zero
if ((a%val(k) /= szero).or.(i == a%ja(k))) then
a%val(j) = a%val(k) a%val(j) = a%val(k)
a%ja(j) = a%ja(k) a%ja(j) = a%ja(k)
j = j + 1 j = j + 1

@ -595,12 +595,13 @@ subroutine psb_z_coo_clean_zeros(a, info)
integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(out) :: info
! !
integer(psb_ipk_) :: i,j,k, nzin integer(psb_ipk_) :: i,j,k, nzin
info = 0 info = 0
nzin = a%get_nzeros() nzin = a%get_nzeros()
j = 0 j = 0
do i=1, nzin do i=1, nzin
if (a%val(i) /= zzero) then ! Always keep the diagonal, even if numerically zero
if ((a%val(i) /= zzero).or.(a%ia(i) == a%ja(i))) then
j = j + 1 j = j + 1
a%val(j) = a%val(i) a%val(j) = a%val(i)
a%ia(j) = a%ia(i) a%ia(j) = a%ia(i)
@ -5926,12 +5927,13 @@ subroutine psb_lz_coo_clean_zeros(a, info)
integer(psb_ipk_), intent(out) :: info integer(psb_ipk_), intent(out) :: info
! !
integer(psb_lpk_) :: i,j,k, nzin integer(psb_lpk_) :: i,j,k, nzin
info = 0 info = 0
nzin = a%get_nzeros() nzin = a%get_nzeros()
j = 0 j = 0
do i=1, nzin do i=1, nzin
if (a%val(i) /= zzero) then ! Always keep the diagonal, even if numerically zero
if ((a%val(i) /= zzero).or.(a%ia(i) == a%ja(i))) then
j = j + 1 j = j + 1
a%val(j) = a%val(i) a%val(j) = a%val(i)
a%ia(j) = a%ia(i) a%ia(j) = a%ia(i)

@ -2421,7 +2421,8 @@ subroutine psb_z_csc_clean_zeros(a, info)
j = a%icp(1) j = a%icp(1)
do i=1, nc do i=1, nc
do k = ilcp(i), ilcp(i+1) -1 do k = ilcp(i), ilcp(i+1) -1
if (a%val(k) /= zzero) then ! Always keep the diagonal, even if numerically zero
if ((a%val(k) /= zzero).or.(i == a%ia(k))) then
a%val(j) = a%val(k) a%val(j) = a%val(k)
a%ia(j) = a%ia(k) a%ia(j) = a%ia(k)
j = j + 1 j = j + 1
@ -4313,7 +4314,7 @@ subroutine psb_lz_csc_clean_zeros(a, info)
! !
integer(psb_lpk_) :: i, j, k, nc integer(psb_lpk_) :: i, j, k, nc
integer(psb_lpk_), allocatable :: ilcp(:) integer(psb_lpk_), allocatable :: ilcp(:)
info = 0 info = 0
call a%sync() call a%sync()
nc = a%get_ncols() nc = a%get_ncols()
@ -4322,7 +4323,8 @@ subroutine psb_lz_csc_clean_zeros(a, info)
j = a%icp(1) j = a%icp(1)
do i=1, nc do i=1, nc
do k = ilcp(i), ilcp(i+1) -1 do k = ilcp(i), ilcp(i+1) -1
if (a%val(k) /= zzero) then ! Always keep the diagonal, even if numerically zero
if ((a%val(k) /= zzero).or.(i == a%ia(k))) then
a%val(j) = a%val(k) a%val(j) = a%val(k)
a%ia(j) = a%ia(k) a%ia(j) = a%ia(k)
j = j + 1 j = j + 1

@ -3642,7 +3642,8 @@ subroutine psb_z_csr_clean_zeros(a, info)
j = a%irp(1) j = a%irp(1)
do i=1, nr do i=1, nr
do k = ilrp(i), ilrp(i+1) -1 do k = ilrp(i), ilrp(i+1) -1
if (a%val(k) /= zzero) then ! Always keep the diagonal, even if numerically zero
if ((a%val(k) /= zzero).or.(i == a%ja(k))) then
a%val(j) = a%val(k) a%val(j) = a%val(k)
a%ja(j) = a%ja(k) a%ja(j) = a%ja(k)
j = j + 1 j = j + 1
@ -6552,7 +6553,7 @@ subroutine psb_lz_csr_clean_zeros(a, info)
! !
integer(psb_lpk_) :: i, j, k, nr integer(psb_lpk_) :: i, j, k, nr
integer(psb_lpk_), allocatable :: ilrp(:) integer(psb_lpk_), allocatable :: ilrp(:)
info = 0 info = 0
call a%sync() call a%sync()
nr = a%get_nrows() nr = a%get_nrows()
@ -6561,7 +6562,8 @@ subroutine psb_lz_csr_clean_zeros(a, info)
j = a%irp(1) j = a%irp(1)
do i=1, nr do i=1, nr
do k = ilrp(i), ilrp(i+1) -1 do k = ilrp(i), ilrp(i+1) -1
if (a%val(k) /= zzero) then ! Always keep the diagonal, even if numerically zero
if ((a%val(k) /= zzero).or.(i == a%ja(k))) then
a%val(j) = a%val(k) a%val(j) = a%val(k)
a%ja(j) = a%ja(k) a%ja(j) = a%ja(k)
j = j + 1 j = j + 1

@ -29,7 +29,7 @@
! POSSIBILITY OF SUCH DAMAGE. ! POSSIBILITY OF SUCH DAMAGE.
! !
! !
! File: psb_scsprt.f90 ! File: psb_geprt.f90
! Subroutine: ! Subroutine:
! Arguments: ! Arguments:
@ -131,6 +131,7 @@ subroutine psb_cgeprt2(iout,a,head)
ncol = size(a,2) ncol = size(a,2)
write(iout,*) nrow,ncol write(iout,*) nrow,ncol
write(frmtv,'(a,i3.3,a)') '(',ncol,'2(es26.18,1x))' write(frmtv,'(a,i3.3,a)') '(',ncol,'2(es26.18,1x))'
do i=1,nrow do i=1,nrow
@ -161,7 +162,7 @@ subroutine psb_cgeprt1(iout,a,head)
write(iout,'(a)') '% ' write(iout,'(a)') '% '
nrow = size(a,1) nrow = size(a,1)
ncol = 1 ncol = 1
write(iout,*) nrow write(iout,*) nrow,ncol
write(frmtv,'(a,i3.3,a)') '(',ncol,'2(es26.18,1x))' write(frmtv,'(a,i3.3,a)') '(',ncol,'2(es26.18,1x))'

@ -29,7 +29,7 @@
! POSSIBILITY OF SUCH DAMAGE. ! POSSIBILITY OF SUCH DAMAGE.
! !
! !
! File: psb_dcsprt.f90 ! File: psb_geprt.f90
! Subroutine: ! Subroutine:
! Arguments: ! Arguments:
@ -124,13 +124,14 @@ subroutine psb_dgeprt2(iout,a,head)
character(len=80) :: frmtv character(len=80) :: frmtv
integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nrow, ncol integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nrow, ncol
write(iout,'(a)') '%%MatrixMarket matrix array real general' write(iout,'(a)') '%%MatrixMarket matrix array complex general'
write(iout,'(a)') '% '//trim(head) write(iout,'(a)') '% '//trim(head)
write(iout,'(a)') '% ' write(iout,'(a)') '% '
nrow = size(a,1) nrow = size(a,1)
ncol = size(a,2) ncol = size(a,2)
write(iout,*) nrow,ncol write(iout,*) nrow,ncol
write(frmtv,'(a,i3.3,a)') '(',ncol,'(es26.18,1x))' write(frmtv,'(a,i3.3,a)') '(',ncol,'(es26.18,1x))'
do i=1,nrow do i=1,nrow
@ -156,12 +157,12 @@ subroutine psb_dgeprt1(iout,a,head)
character(len=80) :: frmtv character(len=80) :: frmtv
integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nrow, ncol integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nrow, ncol
write(iout,'(a)') '%%MatrixMarket matrix array real general' write(iout,'(a)') '%%MatrixMarket matrix array complex general'
write(iout,'(a)') '% '//trim(head) write(iout,'(a)') '% '//trim(head)
write(iout,'(a)') '% ' write(iout,'(a)') '% '
nrow = size(a,1) nrow = size(a,1)
ncol = 1 ncol = 1
write(iout,*) nrow write(iout,*) nrow,ncol
write(frmtv,'(a,i3.3,a)') '(',ncol,'(es26.18,1x))' write(frmtv,'(a,i3.3,a)') '(',ncol,'(es26.18,1x))'

@ -29,7 +29,7 @@
! POSSIBILITY OF SUCH DAMAGE. ! POSSIBILITY OF SUCH DAMAGE.
! !
! !
! File: psb_scsprt.f90 ! File: psb_geprt.f90
! Subroutine: ! Subroutine:
! Arguments: ! Arguments:
@ -124,13 +124,14 @@ subroutine psb_sgeprt2(iout,a,head)
character(len=80) :: frmtv character(len=80) :: frmtv
integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nrow, ncol integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nrow, ncol
write(iout,'(a)') '%%MatrixMarket matrix array real general' write(iout,'(a)') '%%MatrixMarket matrix array complex general'
write(iout,'(a)') '% '//trim(head) write(iout,'(a)') '% '//trim(head)
write(iout,'(a)') '% ' write(iout,'(a)') '% '
nrow = size(a,1) nrow = size(a,1)
ncol = size(a,2) ncol = size(a,2)
write(iout,*) nrow,ncol write(iout,*) nrow,ncol
write(frmtv,'(a,i3.3,a)') '(',ncol,'(es26.18,1x))' write(frmtv,'(a,i3.3,a)') '(',ncol,'(es26.18,1x))'
do i=1,nrow do i=1,nrow
@ -156,12 +157,12 @@ subroutine psb_sgeprt1(iout,a,head)
character(len=80) :: frmtv character(len=80) :: frmtv
integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nrow, ncol integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nrow, ncol
write(iout,'(a)') '%%MatrixMarket matrix array real general' write(iout,'(a)') '%%MatrixMarket matrix array complex general'
write(iout,'(a)') '% '//trim(head) write(iout,'(a)') '% '//trim(head)
write(iout,'(a)') '% ' write(iout,'(a)') '% '
nrow = size(a,1) nrow = size(a,1)
ncol = 1 ncol = 1
write(iout,*) nrow write(iout,*) nrow,ncol
write(frmtv,'(a,i3.3,a)') '(',ncol,'(es26.18,1x))' write(frmtv,'(a,i3.3,a)') '(',ncol,'(es26.18,1x))'

@ -29,7 +29,7 @@
! POSSIBILITY OF SUCH DAMAGE. ! POSSIBILITY OF SUCH DAMAGE.
! !
! !
! File: psb_scsprt.f90 ! File: psb_geprt.f90
! Subroutine: ! Subroutine:
! Arguments: ! Arguments:
@ -131,6 +131,7 @@ subroutine psb_zgeprt2(iout,a,head)
ncol = size(a,2) ncol = size(a,2)
write(iout,*) nrow,ncol write(iout,*) nrow,ncol
write(frmtv,'(a,i3.3,a)') '(',ncol,'2(es26.18,1x))' write(frmtv,'(a,i3.3,a)') '(',ncol,'2(es26.18,1x))'
do i=1,nrow do i=1,nrow
@ -161,7 +162,7 @@ subroutine psb_zgeprt1(iout,a,head)
write(iout,'(a)') '% ' write(iout,'(a)') '% '
nrow = size(a,1) nrow = size(a,1)
ncol = 1 ncol = 1
write(iout,*) nrow write(iout,*) nrow,ncol
write(frmtv,'(a,i3.3,a)') '(',ncol,'2(es26.18,1x))' write(frmtv,'(a,i3.3,a)') '(',ncol,'2(es26.18,1x))'

@ -8,7 +8,7 @@ contains
& ah,ph,bh,xh,cdh,options) bind(c) result(res) & ah,ph,bh,xh,cdh,options) bind(c) result(res)
use psb_base_mod use psb_base_mod
use psb_prec_mod use psb_prec_mod
use psb_krylov_mod use psb_linsolve_mod
use psb_objhandle_mod use psb_objhandle_mod
use psb_prec_cbind_mod use psb_prec_cbind_mod
use psb_base_string_cbind_mod use psb_base_string_cbind_mod
@ -33,7 +33,7 @@ contains
& ah,ph,bh,xh,eps,cdh,itmax,iter,err,itrace,irst,istop) bind(c) result(res) & ah,ph,bh,xh,eps,cdh,itmax,iter,err,itrace,irst,istop) bind(c) result(res)
use psb_base_mod use psb_base_mod
use psb_prec_mod use psb_prec_mod
use psb_krylov_mod use psb_linsolve_mod
use psb_objhandle_mod use psb_objhandle_mod
use psb_prec_cbind_mod use psb_prec_cbind_mod
use psb_base_string_cbind_mod use psb_base_string_cbind_mod

@ -8,7 +8,7 @@ contains
& ah,ph,bh,xh,cdh,options) bind(c) result(res) & ah,ph,bh,xh,cdh,options) bind(c) result(res)
use psb_base_mod use psb_base_mod
use psb_prec_mod use psb_prec_mod
use psb_krylov_mod use psb_linsolve_mod
use psb_objhandle_mod use psb_objhandle_mod
use psb_prec_cbind_mod use psb_prec_cbind_mod
use psb_base_string_cbind_mod use psb_base_string_cbind_mod
@ -33,7 +33,7 @@ contains
& ah,ph,bh,xh,eps,cdh,itmax,iter,err,itrace,irst,istop) bind(c) result(res) & ah,ph,bh,xh,eps,cdh,itmax,iter,err,itrace,irst,istop) bind(c) result(res)
use psb_base_mod use psb_base_mod
use psb_prec_mod use psb_prec_mod
use psb_krylov_mod use psb_linsolve_mod
use psb_objhandle_mod use psb_objhandle_mod
use psb_prec_cbind_mod use psb_prec_cbind_mod
use psb_base_string_cbind_mod use psb_base_string_cbind_mod

@ -8,7 +8,7 @@ contains
& ah,ph,bh,xh,cdh,options) bind(c) result(res) & ah,ph,bh,xh,cdh,options) bind(c) result(res)
use psb_base_mod use psb_base_mod
use psb_prec_mod use psb_prec_mod
use psb_krylov_mod use psb_linsolve_mod
use psb_objhandle_mod use psb_objhandle_mod
use psb_prec_cbind_mod use psb_prec_cbind_mod
use psb_base_string_cbind_mod use psb_base_string_cbind_mod
@ -33,7 +33,7 @@ contains
& ah,ph,bh,xh,eps,cdh,itmax,iter,err,itrace,irst,istop) bind(c) result(res) & ah,ph,bh,xh,eps,cdh,itmax,iter,err,itrace,irst,istop) bind(c) result(res)
use psb_base_mod use psb_base_mod
use psb_prec_mod use psb_prec_mod
use psb_krylov_mod use psb_linsolve_mod
use psb_objhandle_mod use psb_objhandle_mod
use psb_prec_cbind_mod use psb_prec_cbind_mod
use psb_base_string_cbind_mod use psb_base_string_cbind_mod

@ -8,7 +8,7 @@ contains
& ah,ph,bh,xh,cdh,options) bind(c) result(res) & ah,ph,bh,xh,cdh,options) bind(c) result(res)
use psb_base_mod use psb_base_mod
use psb_prec_mod use psb_prec_mod
use psb_krylov_mod use psb_linsolve_mod
use psb_objhandle_mod use psb_objhandle_mod
use psb_prec_cbind_mod use psb_prec_cbind_mod
use psb_base_string_cbind_mod use psb_base_string_cbind_mod
@ -33,7 +33,7 @@ contains
& ah,ph,bh,xh,eps,cdh,itmax,iter,err,itrace,irst,istop) bind(c) result(res) & ah,ph,bh,xh,eps,cdh,itmax,iter,err,itrace,irst,istop) bind(c) result(res)
use psb_base_mod use psb_base_mod
use psb_prec_mod use psb_prec_mod
use psb_krylov_mod use psb_linsolve_mod
use psb_objhandle_mod use psb_objhandle_mod
use psb_prec_cbind_mod use psb_prec_cbind_mod
use psb_base_string_cbind_mod use psb_base_string_cbind_mod

@ -9,7 +9,7 @@ FINCLUDES=$(FMFLAG). $(FMFLAG)$(HERE) $(FMFLAG)$(MODDIR)
CINCLUDES=-I. -I$(HERE) -I$(INCLUDEDIR) CINCLUDES=-I. -I$(HERE) -I$(INCLUDEDIR)
PSBC_LIBS= -L$(LIBDIR) -lpsb_cbind PSBC_LIBS= -L$(LIBDIR) -lpsb_cbind
PSB_LIBS=-lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base -L$(LIBDIR) PSB_LIBS=-lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base -L$(LIBDIR)
# #
# Compilers and such # Compilers and such

@ -0,0 +1,104 @@
# AC_OPENACC
# ---------
# Check which options need to be passed to the C compiler to support Openacc.
# Set the OPENACC_CFLAGS / OPENACC_CXXFLAGS / OPENACC_FFLAGS variable to these
# options.
# The options are necessary at compile time (so the #pragmas are understood)
# and at link time (so the appropriate library is linked with).
# This macro takes care to not produce redundant options if $CC $CFLAGS already
# supports Openacc.
#
# For each candidate option, we do a compile test first, then a link test;
# if the compile test succeeds but the link test fails, that means we have
# found the correct option but it doesn't work because the libraries are
# broken. (This can happen, for instance, with SunPRO C and a bad combination
# of operating system patches.)
#
# Several of the options in our candidate list can be misinterpreted by
# compilers that don't use them to activate Openacc support; for example,
# many compilers understand "-openacc" to mean "write output to a file
# named 'penmp'" rather than "enable Openacc". We can't completely avoid
# the possibility of clobbering files named 'penmp' or 'mp' in configure's
# working directory; therefore, this macro will bomb out if any such file
# already exists when it's invoked.
AC_DEFUN([AX_C_OPENACC],
[AC_REQUIRE([_AX_OPENACC_SAFE_WD])]dnl
[AC_ARG_ENABLE([openacc],
[AS_HELP_STRING([--disable-openacc], [do not use Openacc])])]dnl
[
OPENACC_[]_AC_LANG_PREFIX[]FLAGS=
if test "$enable_openacc" != no; then
AC_LANG_PUSH([C])
AC_CACHE_CHECK([for $[]_AC_CC[] option to support Openacc],
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc],
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='not found'
dnl Try these flags:
dnl (on by default) ''
dnl GCC >= 4.2 -fopenacc
dnl SunPRO C -xopenacc
dnl Intel C -openacc
dnl SGI C, PGI C -mp
dnl Tru64 Compaq C -omp
dnl IBM XL C (AIX, Linux) -qsmp=omp
dnl Cray CCE -homp
dnl NEC SX -Popenacc
dnl Lahey Fortran (Linux) --openacc
for ac_option in '' -fopenacc -openacc -acc; do
ac_save_[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS
_AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $ac_option"
AC_COMPILE_IFELSE([
#ifndef _OPENACC
#error "OpenACC not supported"
#endif
#include <openacc.h>
int main (void) { acc_init (0); return 0;}
],
[AC_LINK_IFELSE([
#ifndef _OPENACC
#error "OpenACC not supported"
#endif
#include <openacc.h>
int main (void) { acc_init (0); return 0;}
],
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc=$ac_option],
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'])])
_AC_LANG_PREFIX[]FLAGS=$ac_save_[]_AC_LANG_PREFIX[]FLAGS
if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'not found'; then
break
fi
done
if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = 'not found'; then
ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'
elif test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = ''; then
ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='none needed'
fi
dnl _AX_OPENACC_SAFE_WD checked that these files did not exist before we
dnl started probing for Openacc support, so if they exist now, they were
dnl created by the probe loop and it's safe to delete them.
rm -f penmp mp])
if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'unsupported' && \
test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'none needed'; then
OPENACC_[]_AC_LANG_PREFIX[]FLAGS="$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc"
fi
AC_LANG_POP([C])
fi
])
# _AC_OPENACC_SAFE_WD
# ------------------
# AC_REQUIREd by AC_OPENACC. Checks both at autoconf time and at
# configure time for files that AC_OPENACC clobbers.
AC_DEFUN([_AX_OPENACC_SAFE_WD],
[m4_syscmd([test ! -e penmp && test ! -e mp])]dnl
[m4_if(sysval, [0], [], [m4_fatal(m4_normalize(
[AX_OPENACC clobbers files named 'mp' and 'penmp'.
To use AX_OPENACC you must not have either of these files
at the top level of your source tree.]))])]dnl
[if test -e penmp || test -e mp; then
AC_MSG_ERROR(m4_normalize(
[AX@&t@_OPENACC clobbers files named 'mp' and 'penmp'.
Aborting configure because one of these files already exists.]))
fi])

@ -0,0 +1,104 @@
# AC_OPENACC
# ---------
# Check which options need to be passed to the C compiler to support Openacc.
# Set the OPENACC_CFLAGS / OPENACC_CXXFLAGS / OPENACC_FFLAGS variable to these
# options.
# The options are necessary at compile time (so the #pragmas are understood)
# and at link time (so the appropriate library is linked with).
# This macro takes care to not produce redundant options if $CC $CFLAGS already
# supports Openacc.
#
# For each candidate option, we do a compile test first, then a link test;
# if the compile test succeeds but the link test fails, that means we have
# found the correct option but it doesn't work because the libraries are
# broken. (This can happen, for instance, with SunPRO C and a bad combination
# of operating system patches.)
#
# Several of the options in our candidate list can be misinterpreted by
# compilers that don't use them to activate Openacc support; for example,
# many compilers understand "-openacc" to mean "write output to a file
# named 'penmp'" rather than "enable Openacc". We can't completely avoid
# the possibility of clobbering files named 'penmp' or 'mp' in configure's
# working directory; therefore, this macro will bomb out if any such file
# already exists when it's invoked.
AC_DEFUN([AX_CXX_OPENACC],
[AC_REQUIRE([_AX_OPENACC_SAFE_WD])]dnl
[AC_ARG_ENABLE([openacc],
[AS_HELP_STRING([--disable-openacc], [do not use Openacc])])]dnl
[
OPENACC_[]_AC_LANG_PREFIX[]FLAGS=
if test "$enable_openacc" != no; then
AC_LANG_PUSH([C++])
AC_CACHE_CHECK([for $[]_AC_CC[] option to support Openacc],
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc],
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='not found'
dnl Try these flags:
dnl (on by default) ''
dnl GCC >= 4.2 -fopenacc
dnl SunPRO C -xopenacc
dnl Intel C -openacc
dnl SGI C, PGI C -mp
dnl Tru64 Compaq C -omp
dnl IBM XL C (AIX, Linux) -qsmp=omp
dnl Cray CCE -homp
dnl NEC SX -Popenacc
dnl Lahey Fortran (Linux) --openacc
for ac_option in '' -fopenacc -openacc -acc; do
ac_save_[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS
_AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $ac_option"
AC_COMPILE_IFELSE([
#ifndef _OPENACC
#error "OpenACC not supported"
#endif
#include <openacc.h>
int main (void) { acc_init (acc_get_device_type()); return 0;}
],
[AC_LINK_IFELSE([
#ifndef _OPENACC
#error "OpenACC not supported"
#endif
#include <openacc.h>
int main (void) { acc_init (acc_get_device_type()); return 0;}
],
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc=$ac_option],
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'])])
_AC_LANG_PREFIX[]FLAGS=$ac_save_[]_AC_LANG_PREFIX[]FLAGS
if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'not found'; then
break
fi
done
if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = 'not found'; then
ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'
elif test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = ''; then
ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='none needed'
fi
dnl _AX_OPENACC_SAFE_WD checked that these files did not exist before we
dnl started probing for Openacc support, so if they exist now, they were
dnl created by the probe loop and it's safe to delete them.
rm -f penmp mp])
if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'unsupported' && \
test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'none needed'; then
OPENACC_[]_AC_LANG_PREFIX[]FLAGS="$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc"
fi
AC_LANG_POP([C++])
fi
])
dnl _AC_OPENACC_SAFE_WD
dnl ------------------
dnl AC_REQUIREd by AC_OPENACC. Checks both at autoconf time and at
dnl configure time for files that AC_OPENACC clobbers.
dnl AC_DEFUN([_AX_OPENACC_SAFE_WD],
dnl [m4_syscmd([test ! -e penmp && test ! -e mp])]dnl
dnl [m4_if(sysval, [0], [], [m4_fatal(m4_normalize(
dnl [AX_OPENACC clobbers files named 'mp' and 'penmp'.
dnl To use AX_OPENACC you must not have either of these files
dnl at the top level of your source tree.]))])]dnl
dnl [if test -e penmp || test -e mp; then
dnl AC_MSG_ERROR(m4_normalize(
dnl [AX@&t@_OPENACC clobbers files named 'mp' and 'penmp'.
dnl Aborting configure because one of these files already exists.]))
dnl fi])

@ -0,0 +1,108 @@
# AC_OPENACC
# ---------
# Check which options need to be passed to the C compiler to support Openacc.
# Set the OPENACC_CFLAGS / OPENACC_CXXFLAGS / OPENACC_FFLAGS variable to these
# options.
# The options are necessary at compile time (so the #pragmas are understood)
# and at link time (so the appropriate library is linked with).
# This macro takes care to not produce redundant options if $CC $CFLAGS already
# supports Openacc.
#
# For each candidate option, we do a compile test first, then a link test;
# if the compile test succeeds but the link test fails, that means we have
# found the correct option but it doesn't work because the libraries are
# broken. (This can happen, for instance, with SunPRO C and a bad combination
# of operating system patches.)
#
# Several of the options in our candidate list can be misinterpreted by
# compilers that don't use them to activate Openacc support; for example,
# many compilers understand "-openacc" to mean "write output to a file
# named 'penmp'" rather than "enable Openacc". We can't completely avoid
# the possibility of clobbering files named 'penmp' or 'mp' in configure's
# working directory; therefore, this macro will bomb out if any such file
# already exists when it's invoked.
AC_DEFUN([AX_FC_OPENACC],
[AC_REQUIRE([_AX_OPENACC_SAFE_WD])]dnl
[AC_ARG_ENABLE([openacc],
[AS_HELP_STRING([--disable-openacc], [do not use Openacc])])]dnl
[
OPENACC_[]_AC_LANG_PREFIX[]FLAGS=
if test "$enable_openacc" != no; then
AC_LANG_PUSH([Fortran])
AC_CACHE_CHECK([for $[]_AC_CC[] option to support Openacc],
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc],
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='not found'
dnl Try these flags:
dnl (on by default) ''
dnl GCC >= 4.2 -fopenacc
dnl SunPRO C -xopenacc
dnl Intel C -openacc
dnl SGI C, PGI C -mp
dnl Tru64 Compaq C -omp
dnl IBM XL C (AIX, Linux) -qsmp=omp
dnl Cray CCE -homp
dnl NEC SX -Popenacc
dnl Lahey Fortran (Linux) --openacc
for ac_option in '' -fopenacc -openacc -acc; do
ac_save_[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS
_AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $ac_option"
AC_COMPILE_IFELSE([
program main
use openacc
implicit none
integer tid, np
tid = 42
call acc_init(0)
end
],
[AC_LINK_IFELSE([
program main
use openacc
implicit none
integer tid, np
tid = 42
call acc_init(0)
end
],
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc=$ac_option],
[ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'])])
_AC_LANG_PREFIX[]FLAGS=$ac_save_[]_AC_LANG_PREFIX[]FLAGS
if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'unsupported'; then
break
fi
done
if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = 'not found'; then
ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'
elif test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = ''; then
ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='none needed'
fi
dnl _AX_OPENACC_SAFE_WD checked that these files did not exist before we
dnl started probing for Openacc support, so if they exist now, they were
dnl created by the probe loop and it's safe to delete them.
rm -f penmp mp])
if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'unsupported' && \
test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'none needed'; then
OPENACC_[]_AC_LANG_PREFIX[]FLAGS="$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc"
fi
AC_LANG_POP([Fortran])
fi
])
# _AC_OPENACC_SAFE_WD
# ------------------
# AC_REQUIREd by AC_OPENACC. Checks both at autoconf time and at
# configure time for files that AC_OPENACC clobbers.
AC_DEFUN([_AX_OPENACC_SAFE_WD],
[m4_syscmd([test ! -e penmp && test ! -e mp])]dnl
[m4_if(sysval, [0], [], [m4_fatal(m4_normalize(
[AX_OPENACC clobbers files named 'mp' and 'penmp'.
To use AX_OPENACC you must not have either of these files
at the top level of your source tree.]))])]dnl
[if test -e penmp || test -e mp; then
AC_MSG_ERROR(m4_normalize(
[AX@&t@_OPENACC clobbers files named 'mp' and 'penmp'.
Aborting configure because one of these files already exists.]))
fi])

@ -2157,6 +2157,31 @@ CPPFLAGS="$SAVE_CPPFLAGS"
dnl @synopsis PAC_ARG_CUDA
dnl
dnl Test for --enable-cuda
dnl
dnl
dnl
dnl Example use:
dnl
dnl
dnl @author Salvatore Filippone <salvatore.filippone@uniroma2.it>
dnl
AC_DEFUN([PAC_ARG_CUDA],
[AC_MSG_CHECKING([whether we want cuda ])
AC_ARG_ENABLE(cuda,
AS_HELP_STRING([--enable-cuda],
[Specify whether to enable cuda. ]),
[
pac_cv_cuda="yes";
]
dnl ,
dnl [pac_cv_cuda="no";]
)
]
)
dnl @synopsis PAC_CHECK_CUDA dnl @synopsis PAC_CHECK_CUDA
dnl dnl
@ -2173,7 +2198,7 @@ dnl
dnl @author Salvatore Filippone <salvatore.filippone@uniroma2.it> dnl @author Salvatore Filippone <salvatore.filippone@uniroma2.it>
dnl dnl
AC_DEFUN(PAC_CHECK_CUDA, AC_DEFUN(PAC_CHECK_CUDA,
[AC_ARG_WITH(cuda, AC_HELP_STRING([--with-cuda=DIR], [Specify the CUDA install directory.]), [AC_ARG_WITH(cudadir, AC_HELP_STRING([--with-cudadir=DIR], [Specify the CUDA install directory.]),
[pac_cv_cuda_dir=$withval], [pac_cv_cuda_dir=$withval],
[pac_cv_cuda_dir='']) [pac_cv_cuda_dir=''])
@ -2234,56 +2259,6 @@ AC_HELP_STRING([--with-cudacc], [A comma-separated list of CCs to compile to, fo
[pac_cv_cudacc='']) [pac_cv_cudacc=''])
]) ])
AC_DEFUN(PAC_ARG_WITH_LIBRSB,
[SAVE_LIBS="$LIBS"
SAVE_CPPFLAGS="$CPPFLAGS"
AC_ARG_WITH(librsb,
AC_HELP_STRING([--with-librsb], [The directory for LIBRSB, for example,
--with-librsb=/opt/packages/librsb]),
[pac_cv_librsb_dir=$withval],
[pac_cv_librsb_dir=''])
if test "x$pac_cv_librsb_dir" != "x"; then
LIBS="-L$pac_cv_librsb_dir $LIBS"
RSB_INCLUDES="-I$pac_cv_librsb_dir"
# CPPFLAGS="$GPU_INCLUDES $CUDA_INCLUDES $CPPFLAGS"
RSB_LIBDIR="-L$pac_cv_librsb_dir"
fi
#AC_MSG_CHECKING([librsb dir $pac_cv_librsb_dir])
AC_CHECK_HEADER([$pac_cv_librsb_dir/rsb.h],
[pac_rsb_header_ok=yes],
[pac_rsb_header_ok=no; RSB_INCLUDES=""])
if test "x$pac_rsb_header_ok" == "xyes" ; then
RSB_LIBS="-lrsb $RSB_LIBDIR"
# LIBS="$GPU_LIBS $CUDA_LIBS -lm $LIBS";
# AC_MSG_CHECKING([for spgpuCreate in $GPU_LIBS])
# AC_TRY_LINK_FUNC(spgpuCreate,
# [pac_cv_have_spgpu=yes;pac_gpu_lib_ok=yes; ],
# [pac_cv_have_spgpu=no;pac_gpu_lib_ok=no; GPU_LIBS=""])
# AC_MSG_RESULT($pac_gpu_lib_ok)
# if test "x$pac_cv_have_spgpu" == "xyes" ; then
# AC_MSG_NOTICE([Have found SPGPU])
RSBLIBNAME="librsb.a";
LIBRSB_DIR="$pac_cv_librsb_dir";
# SPGPU_DEFINES="-DHAVE_SPGPU";
LIBRSB_INCDIR="$LIBRSB_DIR";
LIBRSB_INCLUDES="-I$LIBRSB_INCDIR";
LIBRSB_LIBS="-lrsb -L$LIBRSB_DIR";
# CUDA_DIR="$pac_cv_cuda_dir";
LIBRSB_DEFINES="-DHAVE_RSB";
LRSB=-lpsb_rsb
# CUDA_INCLUDES="-I$pac_cv_cuda_dir/include"
# CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib"
FDEFINES="$LIBRSB_DEFINES $psblas_cv_define_prepend $FDEFINES";
CDEFINES="$LIBRSB_DEFINES $CDEFINES";#CDEFINES="-DHAVE_SPGPU -DHAVE_CUDA $CDEFINES";
fi
# fi
LIBS="$SAVE_LIBS"
CPPFLAGS="$SAVE_CPPFLAGS"
])
dnl
dnl @synopsis PAC_CHECK_CUDA_VERSION dnl @synopsis PAC_CHECK_CUDA_VERSION
dnl dnl
@ -2336,3 +2311,47 @@ CPPFLAGS="$SAVE_CPPFLAGS"
])dnl ])dnl
dnl @synopsis PAC_ARG_OPENACC
dnl
dnl Test for --enable-openacc
dnl
dnl
dnl
dnl Example use:
dnl
dnl
dnl @author Salvatore Filippone <salvatore.filippone@uniroma2.it>
dnl
AC_DEFUN([PAC_ARG_OPENACC],
[AC_MSG_CHECKING([whether we want openacc ])
AC_ARG_ENABLE(openacc,
AS_HELP_STRING([--enable-openacc],
[Specify whether to enable openacc. ]),
[
pac_cv_openacc="yes";
]
dnl ,
dnl [pac_cv_openacc="no";]
)
if test x"$pac_cv_openacc" == x"yes" ; then
AC_MSG_RESULT([yes.])
# AC_LANG_PUSH([Fortran])
# AC_OPENACC()
# pac_cv_openacc_fcopt="$OPENACC_FCFLAGS";
# AC_LANG_POP()
# AC_LANG_PUSH([C])
# AC_OPENACC()
# pac_cv_openacc_ccopt="$OPENACC_CFLAGS";
# AC_LANG_POP()
# AC_LANG_PUSH([C++])
# AC_OPENACC()
# pac_cv_openacc_cxxopt="$OPENACC_CXXFLAGS";
# AC_LANG_POP()
else
pac_cv_openacc="no";
AC_MSG_RESULT([no.])
fi
]
)

530
configure vendored

@ -653,6 +653,12 @@ ac_subst_vars='am__EXEEXT_FALSE
am__EXEEXT_TRUE am__EXEEXT_TRUE
LTLIBOBJS LTLIBOBJS
LIBOBJS LIBOBJS
LRSB
LIBRSB_DEFINES
LIBRSB_DIR
LIBRSB_INCDIR
LIBRSB_INCLUDES
LIBRSB_LIBS
LCUDA LCUDA
CUDALD CUDALD
CUDAD CUDAD
@ -662,11 +668,19 @@ CUDA_SHORT_VERSION
CUDA_VERSION CUDA_VERSION
CUDA_LIBS CUDA_LIBS
CUDA_INCLUDES CUDA_INCLUDES
CXXCUDEFINES
CCUDEFINES
FCUDEFINES
CUDA_DEFINES CUDA_DEFINES
CUDA_DIR CUDA_DIR
EXTRALDLIBS EXTRALDLIBS
SPGPU_LIBS SPGPU_LIBS
SPGPU_FLAGS SPGPU_FLAGS
CXXOPENACC
CCOPENACC
FCOPENACC
OACCLD
OACCD
METISINCFILE METISINCFILE
UTILLIBNAME UTILLIBNAME
METHDLIBNAME METHDLIBNAME
@ -838,8 +852,14 @@ with_amd
with_amddir with_amddir
with_amdincdir with_amdincdir
with_amdlibdir with_amdlibdir
with_cuda enable_cuda
with_cudadir
with_cudacc with_cudacc
enable_openacc
with_extraopenacc
with_ccopenacc
with_cxxopenacc
with_fcopenacc
' '
ac_precious_vars='build_alias ac_precious_vars='build_alias
host_alias host_alias
@ -1490,6 +1510,9 @@ Optional Features:
--disable-silent-rules verbose build output (undo: "make V=0") --disable-silent-rules verbose build output (undo: "make V=0")
--enable-openmp Specify whether to enable openmp. --enable-openmp Specify whether to enable openmp.
--disable-openmp do not use OpenMP --disable-openmp do not use OpenMP
--enable-cuda Specify whether to enable cuda.
--enable-openacc Specify whether to enable openacc.
--disable-openacc do not use Openacc
Optional Packages: Optional Packages:
--with-PACKAGE[=ARG] use PACKAGE [ARG=yes] --with-PACKAGE[=ARG] use PACKAGE [ARG=yes]
@ -1532,9 +1555,17 @@ Optional Packages:
--with-amddir=DIR Specify the directory for AMD library and includes. --with-amddir=DIR Specify the directory for AMD library and includes.
--with-amdincdir=DIR Specify the directory for AMD includes. --with-amdincdir=DIR Specify the directory for AMD includes.
--with-amdlibdir=DIR Specify the directory for AMD library. --with-amdlibdir=DIR Specify the directory for AMD library.
--with-cuda=DIR Specify the CUDA install directory. --with-cudadir=DIR Specify the CUDA install directory.
--with-cudacc A comma-separated list of CCs to compile to, for --with-cudacc A comma-separated list of CCs to compile to, for
example, --with-cudacc=50,60,70,75 example, --with-cudacc=50,60,70,75
--with-extraopenacc additional [EXTRAOPENACC] flags to be added: will
prepend to [EXTRAOPENACC]
--with-ccopenacc additional [CCOPENACC] flags to be added: will
prepend to [CCOPENACC]
--with-cxxopenacc additional [CXXOPENACC] flags to be added: will
prepend to [CXXOPENACC]
--with-fcopenacc additional [FCOPENACC] flags to be added: will
prepend to [FCOPENACC]
Some influential environment variables: Some influential environment variables:
FC Fortran compiler command FC Fortran compiler command
@ -5825,7 +5856,7 @@ fi
# we just gave the user the chance to append values to these variables # we just gave the user the chance to append values to these variables
###############################################################################
if test -n "$ac_tool_prefix"; then if test -n "$ac_tool_prefix"; then
@ -7392,7 +7423,7 @@ fi
############################################################################## ##############################################################################
BASEMODNAME=psb_base_mod BASEMODNAME=psb_base_mod
PRECMODNAME=psb_prec_mod PRECMODNAME=psb_prec_mod
METHDMODNAME=psb_krylov_mod METHDMODNAME=psb_linsolve_mod
UTILMODNAME=psb_util_mod UTILMODNAME=psb_util_mod
if test "X$psblas_cv_fc" == X"cray" if test "X$psblas_cv_fc" == X"cray"
@ -7402,7 +7433,7 @@ then
FIFLAG="-I" FIFLAG="-I"
BASEMODNAME=PSB_BASE_MOD BASEMODNAME=PSB_BASE_MOD
PRECMODNAME=PSB_PREC_MOD PRECMODNAME=PSB_PREC_MOD
METHDMODNAME=PSB_KRYLOV_MOD METHDMODNAME=PSB_LINSOLVE_MOD
UTILMODNAME=PSB_UTIL_MOD UTILMODNAME=PSB_UTIL_MOD
else else
@ -10602,12 +10633,26 @@ if test "x$psblas_cv_have_amd" == "xyes" ; then
fi fi
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether we want cuda " >&5
printf %s "checking whether we want cuda ... " >&6; }
# Check whether --enable-cuda was given.
if test ${enable_cuda+y}
then :
enableval=$enable_cuda;
pac_cv_cuda="yes";
fi
if test "x$pac_cv_cuda" == "xyes"; then
# Check whether --with-cuda was given. # Check whether --with-cudadir was given.
if test ${with_cuda+y} if test ${with_cudadir+y}
then : then :
withval=$with_cuda; pac_cv_cuda_dir=$withval withval=$with_cudadir; pac_cv_cuda_dir=$withval
else $as_nop else $as_nop
pac_cv_cuda_dir='' pac_cv_cuda_dir=''
fi fi
@ -10693,10 +10738,10 @@ SAVE_LIBS="$LIBS"
SAVE_CPPFLAGS="$CPPFLAGS" SAVE_CPPFLAGS="$CPPFLAGS"
if test "x$pac_cv_have_cuda" == "x"; then if test "x$pac_cv_have_cuda" == "x"; then
# Check whether --with-cuda was given. # Check whether --with-cudadir was given.
if test ${with_cuda+y} if test ${with_cudadir+y}
then : then :
withval=$with_cuda; pac_cv_cuda_dir=$withval withval=$with_cudadir; pac_cv_cuda_dir=$withval
else $as_nop else $as_nop
pac_cv_cuda_dir='' pac_cv_cuda_dir=''
fi fi
@ -10814,9 +10859,10 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
LIBS="$SAVE_LIBS" LIBS="$SAVE_LIBS"
CPPFLAGS="$SAVE_CPPFLAGS" CPPFLAGS="$SAVE_CPPFLAGS"
HAVE_CUDA="yes";
CUDA_VERSION="$pac_cv_cuda_version"; CUDA_VERSION="$pac_cv_cuda_version";
CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000); CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000);
HAVE_CUDA="yes"; CUDA_DEFINES="-DHAVE_CUDA -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}";
SPGPU_LIBS="-lspgpu"; SPGPU_LIBS="-lspgpu";
CUDAD=cudad; CUDAD=cudad;
CUDALD=cudald; CUDALD=cudald;
@ -10837,7 +10883,7 @@ fi
if test "x$pac_cv_cudacc" == "x"; then if test "x$pac_cv_cudacc" == "x"; then
pac_cv_cudacc="50,60,70,75"; pac_cv_cudacc="50,60,70,75,80,86";
CUDA_CC="$pac_cv_cudacc"; CUDA_CC="$pac_cv_cudacc";
fi fi
if (( $pac_cv_cuda_version >= 11070 )) if (( $pac_cv_cuda_version >= 11070 ))
@ -10849,24 +10895,432 @@ fi
CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc"; CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc";
done done
if test "x$pac_cv_cuda_version" != "xunknown"; then if test "x$pac_cv_cuda_version" != "xunknown"; then
CUDEFINES="$CUDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" CUDEFINES="$CUDEFINES ${CUDA_DEFINES}"
FDEFINES="$FDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" FCUDEFINES=" ${CUDA_DEFINES}"
CDEFINES="$CDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" CCUDEFINES=" ${CUDA_DEFINES}"
CXXCUDEFINES=" ${CUDA_DEFINES}"
fi fi
fi fi
if test "x$pac_cv_ipk_size" != "x4"; then if test "x$pac_cv_ipk_size" != "x4"; then
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size, disabling CUDA/SPGPU" >&5 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size, disabling CUDA/SPGPU" >&5
printf "%s\n" "$as_me: For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size, disabling CUDA/SPGPU" >&6;} printf "%s\n" "$as_me: For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size, disabling CUDA/SPGPU" >&6;}
pac_cv_cuda="no";
HAVE_CUDA="no"; HAVE_CUDA="no";
CUDA_CC=""; CUDA_CC="";
SPGPU_LIBS=""; SPGPU_LIBS="";
CUDAD=""; CUDAD="";
CUDALD=""; CUDALD="";
CUDEFINES=""; CUDEFINES="";
CUDA_DEFINES="";
CUDA_INCLUDES=""; CUDA_INCLUDES="";
CUDA_LIBS=""; CUDA_LIBS="";
FCUDEFINES="";
CCUDEFINES="";
CXXCUDEFINES="";
fi
fi
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether we want openacc " >&5
printf %s "checking whether we want openacc ... " >&6; }
# Check whether --enable-openacc was given.
if test ${enable_openacc+y}
then :
enableval=$enable_openacc;
pac_cv_openacc="yes";
fi
if test x"$pac_cv_openacc" == x"yes" ; then
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes." >&5
printf "%s\n" "yes." >&6; }
# AC_LANG_PUSH([Fortran])
# AC_OPENACC()
# pac_cv_openacc_fcopt="$OPENACC_FCFLAGS";
# AC_LANG_POP()
# AC_LANG_PUSH([C])
# AC_OPENACC()
# pac_cv_openacc_ccopt="$OPENACC_CFLAGS";
# AC_LANG_POP()
# AC_LANG_PUSH([C++])
# AC_OPENACC()
# pac_cv_openacc_cxxopt="$OPENACC_CXXFLAGS";
# AC_LANG_POP()
else
pac_cv_openacc="no";
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no." >&5
printf "%s\n" "no." >&6; }
fi
if test x"$pac_cv_openacc" == x"yes" ; then
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether additional EXTRAOPENACC flags should be added (should be invoked only once)" >&5
printf %s "checking whether additional EXTRAOPENACC flags should be added (should be invoked only once)... " >&6; }
# Check whether --with-extraopenacc was given.
if test ${with_extraopenacc+y}
then :
withval=$with_extraopenacc;
EXTRAOPENACC="${withval} ${EXTRAOPENACC}"
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: EXTRAOPENACC = ${EXTRAOPENACC}" >&5
printf "%s\n" "EXTRAOPENACC = ${EXTRAOPENACC}" >&6; }
else $as_nop
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
printf "%s\n" "no" >&6; }
fi
if test -e penmp || test -e mp; then
as_fn_error $? "AX_OPENACC clobbers files named 'mp' and 'penmp'. Aborting configure because one of these files already exists." "$LINENO" 5
fi
# Check whether --enable-openacc was given.
if test ${enable_openacc+y}
then :
enableval=$enable_openacc;
fi
OPENACC_CFLAGS=
if test "$enable_openacc" != no; then
ac_ext=c
ac_cpp='$CPP $CPPFLAGS'
ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_c_compiler_gnu
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to support Openacc" >&5
printf %s "checking for $CC option to support Openacc... " >&6; }
if test ${ax_cv_prog_c_openacc+y}
then :
printf %s "(cached) " >&6
else $as_nop
ax_cv_prog_c_openacc='not found'
for ac_option in '' -fopenacc -openacc -acc; do
ac_save_CFLAGS=$CFLAGS
CFLAGS="$CFLAGS $ac_option"
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#ifndef _OPENACC
#error "OpenACC not supported"
#endif
#include <openacc.h>
int main (void) { acc_init (0); return 0;}
_ACEOF
if ac_fn_c_try_compile "$LINENO"
then :
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#ifndef _OPENACC
#error "OpenACC not supported"
#endif
#include <openacc.h>
int main (void) { acc_init (0); return 0;}
_ACEOF
if ac_fn_c_try_link "$LINENO"
then :
ax_cv_prog_c_openacc=$ac_option
else $as_nop
ax_cv_prog_c_openacc='unsupported'
fi fi
rm -f core conftest.err conftest.$ac_objext conftest.beam \
conftest$ac_exeext conftest.$ac_ext
fi
rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
CFLAGS=$ac_save_CFLAGS
if test "$ax_cv_prog_c_openacc" != 'not found'; then
break
fi
done
if test "$ax_cv_prog_c_openacc" = 'not found'; then
ac_cv_prog_c_openacc='unsupported'
elif test "$ax_cv_prog_c_openacc" = ''; then
ac_cv_prog_c_openacc='none needed'
fi
rm -f penmp mp
fi
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_prog_c_openacc" >&5
printf "%s\n" "$ax_cv_prog_c_openacc" >&6; }
if test "$ax_cv_prog_c_openacc" != 'unsupported' && \
test "$ax_cv_prog_c_openacc" != 'none needed'; then
OPENACC_CFLAGS="$ax_cv_prog_c_openacc"
fi
ac_ext=c
ac_cpp='$CPP $CPPFLAGS'
ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_c_compiler_gnu
fi
CCOPENACC="$ax_cv_prog_c_openacc";
# Check whether --enable-openacc was given.
if test ${enable_openacc+y}
then :
enableval=$enable_openacc;
fi
OPENACC_CFLAGS=
if test "$enable_openacc" != no; then
ac_ext=cpp
ac_cpp='$CXXCPP $CPPFLAGS'
ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CXX option to support Openacc" >&5
printf %s "checking for $CXX option to support Openacc... " >&6; }
if test ${ax_cv_prog_cxx_openacc+y}
then :
printf %s "(cached) " >&6
else $as_nop
ax_cv_prog_cxx_openacc='not found'
for ac_option in '' -fopenacc -openacc -acc; do
ac_save_CXXFLAGS=$CXXFLAGS
CXXFLAGS="$CXXFLAGS $ac_option"
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#ifndef _OPENACC
#error "OpenACC not supported"
#endif
#include <openacc.h>
int main (void) { acc_init (acc_get_device_type()); return 0;}
_ACEOF
if ac_fn_cxx_try_compile "$LINENO"
then :
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#ifndef _OPENACC
#error "OpenACC not supported"
#endif
#include <openacc.h>
int main (void) { acc_init (acc_get_device_type()); return 0;}
_ACEOF
if ac_fn_cxx_try_link "$LINENO"
then :
ax_cv_prog_cxx_openacc=$ac_option
else $as_nop
ax_cv_prog_cxx_openacc='unsupported'
fi
rm -f core conftest.err conftest.$ac_objext conftest.beam \
conftest$ac_exeext conftest.$ac_ext
fi
rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
CXXFLAGS=$ac_save_CXXFLAGS
if test "$ax_cv_prog_cxx_openacc" != 'not found'; then
break
fi
done
if test "$ax_cv_prog_cxx_openacc" = 'not found'; then
ac_cv_prog_cxx_openacc='unsupported'
elif test "$ax_cv_prog_cxx_openacc" = ''; then
ac_cv_prog_cxx_openacc='none needed'
fi
rm -f penmp mp
fi
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_prog_cxx_openacc" >&5
printf "%s\n" "$ax_cv_prog_cxx_openacc" >&6; }
if test "$ax_cv_prog_cxx_openacc" != 'unsupported' && \
test "$ax_cv_prog_cxx_openacc" != 'none needed'; then
OPENACC_CXXFLAGS="$ax_cv_prog_cxx_openacc"
fi
ac_ext=c
ac_cpp='$CPP $CPPFLAGS'
ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_c_compiler_gnu
fi
CXXOPENACC="$ax_cv_prog_cxx_openacc";
# Check whether --enable-openacc was given.
if test ${enable_openacc+y}
then :
enableval=$enable_openacc;
fi
OPENACC_CFLAGS=
if test "$enable_openacc" != no; then
ac_ext=${ac_fc_srcext-f}
ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5'
ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_fc_compiler_gnu
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $FC option to support Openacc" >&5
printf %s "checking for $FC option to support Openacc... " >&6; }
if test ${ax_cv_prog_fc_openacc+y}
then :
printf %s "(cached) " >&6
else $as_nop
ax_cv_prog_fc_openacc='not found'
for ac_option in '' -fopenacc -openacc -acc; do
ac_save_FCFLAGS=$FCFLAGS
FCFLAGS="$FCFLAGS $ac_option"
cat > conftest.$ac_ext <<_ACEOF
program main
use openacc
implicit none
integer tid, np
tid = 42
call acc_init(0)
end
_ACEOF
if ac_fn_fc_try_compile "$LINENO"
then :
cat > conftest.$ac_ext <<_ACEOF
program main
use openacc
implicit none
integer tid, np
tid = 42
call acc_init(0)
end
_ACEOF
if ac_fn_fc_try_link "$LINENO"
then :
ax_cv_prog_fc_openacc=$ac_option
else $as_nop
ax_cv_prog_fc_openacc='unsupported'
fi
rm -f core conftest.err conftest.$ac_objext conftest.beam \
conftest$ac_exeext conftest.$ac_ext
fi
rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
FCFLAGS=$ac_save_FCFLAGS
if test "$ax_cv_prog_fc_openacc" != 'unsupported'; then
break
fi
done
if test "$ax_cv_prog_fc_openacc" = 'not found'; then
ac_cv_prog_fc_openacc='unsupported'
elif test "$ax_cv_prog_fc_openacc" = ''; then
ac_cv_prog_fc_openacc='none needed'
fi
rm -f penmp mp
fi
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_prog_fc_openacc" >&5
printf "%s\n" "$ax_cv_prog_fc_openacc" >&6; }
if test "$ax_cv_prog_fc_openacc" != 'unsupported' && \
test "$ax_cv_prog_fc_openacc" != 'none needed'; then
OPENACC_FCFLAGS="$ax_cv_prog_fc_openacc"
fi
ac_ext=c
ac_cpp='$CPP $CPPFLAGS'
ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
ac_compiler_gnu=$ac_cv_c_compiler_gnu
fi
FCOPENACC="$ax_cv_prog_fc_openacc";
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether additional CCOPENACC flags should be added (should be invoked only once)" >&5
printf %s "checking whether additional CCOPENACC flags should be added (should be invoked only once)... " >&6; }
# Check whether --with-ccopenacc was given.
if test ${with_ccopenacc+y}
then :
withval=$with_ccopenacc;
CCOPENACC="${withval} ${CCOPENACC}"
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: CCOPENACC = ${CCOPENACC}" >&5
printf "%s\n" "CCOPENACC = ${CCOPENACC}" >&6; }
else $as_nop
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
printf "%s\n" "no" >&6; }
fi
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether additional CXXOPENACC flags should be added (should be invoked only once)" >&5
printf %s "checking whether additional CXXOPENACC flags should be added (should be invoked only once)... " >&6; }
# Check whether --with-cxxopenacc was given.
if test ${with_cxxopenacc+y}
then :
withval=$with_cxxopenacc;
CXXOPENACC="${withval} ${CXXOPENACC}"
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: CXXOPENACC = ${CXXOPENACC}" >&5
printf "%s\n" "CXXOPENACC = ${CXXOPENACC}" >&6; }
else $as_nop
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
printf "%s\n" "no" >&6; }
fi
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether additional FCOPENACC flags should be added (should be invoked only once)" >&5
printf %s "checking whether additional FCOPENACC flags should be added (should be invoked only once)... " >&6; }
# Check whether --with-fcopenacc was given.
if test ${with_fcopenacc+y}
then :
withval=$with_fcopenacc;
FCOPENACC="${withval} ${FCOPENACC}"
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: FCOPENACC = ${FCOPENACC}" >&5
printf "%s\n" "FCOPENACC = ${FCOPENACC}" >&6; }
else $as_nop
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
printf "%s\n" "no" >&6; }
fi
CCOPENACC="$CCOPENACC $EXTRAOPENACC";
CXXOPENACC="$CXXOPENACC $EXTRAOPENACC";
FCOPENACC="$FCOPENACC $EXTRAOPENACC";
OACCD=oaccd;
OACCLD=oaccld;
#FCOPT="$FCOPT $FCOPENACC";
#CCOPT="$CCOPT $CCOPENACC"
#CXXOPT="$CXXOPT $CXXOPENACC"
#FLINK="$FLINK $FCOPENACC";
fi
###############################################################################
LIBRSB_DIR="$pac_cv_librsb_dir";
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for LIBRSB install dir" >&5
printf %s "checking for LIBRSB install dir... " >&6; }
case $LIBRSB_DIR in
/*) ;;
*) esac
pac_cv_status_file="$LIBRSB_DIR/librsb.a"
if test ! -f "$pac_cv_status_file" ; then
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
printf "%s\n" "no" >&6; }
#AC_MSG_ERROR([Could not find an installation in $LIBRSB_DIR.])
else
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $LIBRSB_DIR" >&5
printf "%s\n" "$LIBRSB_DIR" >&6; }
RSBTARGETLIB=rsbd;
RSBTARGETOBJ=rsbobj;
fi
@ -10878,7 +11332,7 @@ fi
LIBDIR=lib LIBDIR=lib
BASELIBNAME=libpsb_base.a BASELIBNAME=libpsb_base.a
PRECLIBNAME=libpsb_prec.a PRECLIBNAME=libpsb_prec.a
METHDLIBNAME=libpsb_krylov.a METHDLIBNAME=libpsb_linsolve.a
UTILLIBNAME=libpsb_util.a UTILLIBNAME=libpsb_util.a
############################################################################### ###############################################################################
@ -10928,9 +11382,9 @@ UTILLIBNAME=libpsb_util.a
PSBLASRULES=' PSBLASRULES='
PSBLDLIBS=$(LAPACK) $(BLAS) $(METIS_LIB) $(AMD_LIB) $(LIBS) PSBLDLIBS=$(LAPACK) $(BLAS) $(METIS_LIB) $(AMD_LIB) $(LIBS)
CXXDEFINES=$(PSBCXXDEFINES) $(CUDA_DEFINES) CXXDEFINES=$(PSBCXXDEFINES)
CDEFINES=$(PSBCDEFINES) $(CUDA_DEFINES) CDEFINES=$(PSBCDEFINES)
FDEFINES=$(PSBFDEFINES) $(CUDA_DEFINES) FDEFINES=$(PSBFDEFINES)
# These should be portable rules, arent they? # These should be portable rules, arent they?
@ -10961,6 +11415,22 @@ FDEFINES=$(PSBFDEFINES) $(CUDA_DEFINES)
@ -12299,6 +12769,11 @@ fi
CUDA : ${HAVE_CUDA} CUDA : ${HAVE_CUDA}
CUDA_CC : ${pac_cv_cudacc} CUDA_CC : ${pac_cv_cudacc}
OPENACC : ${pac_cv_openacc}
FCOPENACC : ${FCOPENACC}
OACCD : ${OACCD}
OACCLD : ${OACCLD}
BLAS : ${BLAS_LIBS} BLAS : ${BLAS_LIBS}
METIS usable : ${psblas_cv_have_metis} METIS usable : ${psblas_cv_have_metis}
@ -12332,6 +12807,11 @@ printf "%s\n" "$as_me:
CUDA : ${HAVE_CUDA} CUDA : ${HAVE_CUDA}
CUDA_CC : ${pac_cv_cudacc} CUDA_CC : ${pac_cv_cudacc}
OPENACC : ${pac_cv_openacc}
FCOPENACC : ${FCOPENACC}
OACCD : ${OACCD}
OACCLD : ${OACCLD}
BLAS : ${BLAS_LIBS} BLAS : ${BLAS_LIBS}
METIS usable : ${psblas_cv_have_metis} METIS usable : ${psblas_cv_have_metis}
@ -12348,6 +12828,16 @@ printf "%s\n" "$as_me:
If you are satisfied, run 'make' to build ${PACKAGE_NAME} and its documentation; otherwise If you are satisfied, run 'make' to build ${PACKAGE_NAME} and its documentation; otherwise
type ./configure --help=short for a complete list of configure options specific to ${PACKAGE_NAME}. type ./configure --help=short for a complete list of configure options specific to ${PACKAGE_NAME}.
" >&6;} " >&6;}
if test x"${pac_cv_openacc}" == x"yes" ; then
if test x"${FCOPENACC}" == x ; then
{ printf "%s\n" "$as_me:${as_lineno-$LINENO}:
WARNING: OpenACC enabled, but no choice for FCOPENACC compile flag.
You may want to rerun configure with --with-fcopenacc= " >&5
printf "%s\n" "$as_me:
WARNING: OpenACC enabled, but no choice for FCOPENACC compile flag.
You may want to rerun configure with --with-fcopenacc= " >&6;}
fi
fi
############################################################################### ###############################################################################

@ -483,7 +483,7 @@ fi
############################################################################## ##############################################################################
BASEMODNAME=psb_base_mod BASEMODNAME=psb_base_mod
PRECMODNAME=psb_prec_mod PRECMODNAME=psb_prec_mod
METHDMODNAME=psb_krylov_mod METHDMODNAME=psb_linsolve_mod
UTILMODNAME=psb_util_mod UTILMODNAME=psb_util_mod
if test "X$psblas_cv_fc" == X"cray" if test "X$psblas_cv_fc" == X"cray"
@ -493,7 +493,7 @@ then
FIFLAG="-I" FIFLAG="-I"
BASEMODNAME=PSB_BASE_MOD BASEMODNAME=PSB_BASE_MOD
PRECMODNAME=PSB_PREC_MOD PRECMODNAME=PSB_PREC_MOD
METHDMODNAME=PSB_KRYLOV_MOD METHDMODNAME=PSB_LINSOLVE_MOD
UTILMODNAME=PSB_UTIL_MOD UTILMODNAME=PSB_UTIL_MOD
else else
@ -795,16 +795,19 @@ if test "x$psblas_cv_have_amd" == "xyes" ; then
fi fi
PAC_ARG_CUDA()
if test "x$pac_cv_cuda" == "xyes"; then
PAC_CHECK_CUDA() PAC_CHECK_CUDA()
if test "x$pac_cv_have_cuda" == "xyes"; then if test "x$pac_cv_have_cuda" == "xyes"; then
PAC_CHECK_CUDA_VERSION() PAC_CHECK_CUDA_VERSION()
CUDA_VERSION="$pac_cv_cuda_version";
CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000);
dnl PAC_CHECK_SPGPU() dnl PAC_CHECK_SPGPU()
HAVE_CUDA="yes"; HAVE_CUDA="yes";
CUDA_VERSION="$pac_cv_cuda_version";
CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000);
CUDA_DEFINES="-DHAVE_CUDA -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}";
SPGPU_LIBS="-lspgpu"; SPGPU_LIBS="-lspgpu";
CUDAD=cudad; CUDAD=cudad;
CUDALD=cudald; CUDALD=cudald;
@ -814,7 +817,7 @@ if test "x$pac_cv_have_cuda" == "xyes"; then
PAC_ARG_WITH_CUDACC() PAC_ARG_WITH_CUDACC()
if test "x$pac_cv_cudacc" == "x"; then if test "x$pac_cv_cudacc" == "x"; then
pac_cv_cudacc="50,60,70,75"; pac_cv_cudacc="50,60,70,75,80,86";
CUDA_CC="$pac_cv_cudacc"; CUDA_CC="$pac_cv_cudacc";
fi fi
if (( $pac_cv_cuda_version >= 11070 )) if (( $pac_cv_cuda_version >= 11070 ))
@ -826,27 +829,72 @@ if test "x$pac_cv_have_cuda" == "xyes"; then
CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc"; CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc";
done done
if test "x$pac_cv_cuda_version" != "xunknown"; then if test "x$pac_cv_cuda_version" != "xunknown"; then
CUDEFINES="$CUDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" CUDEFINES="$CUDEFINES ${CUDA_DEFINES}"
FDEFINES="$FDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" FCUDEFINES=" ${CUDA_DEFINES}"
CDEFINES="$CDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}" CCUDEFINES=" ${CUDA_DEFINES}"
CXXCUDEFINES=" ${CUDA_DEFINES}"
fi fi
fi fi
if test "x$pac_cv_ipk_size" != "x4"; then if test "x$pac_cv_ipk_size" != "x4"; then
AC_MSG_NOTICE([For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size, disabling CUDA/SPGPU]) AC_MSG_NOTICE([For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size, disabling CUDA/SPGPU])
pac_cv_cuda="no";
HAVE_CUDA="no"; HAVE_CUDA="no";
CUDA_CC=""; CUDA_CC="";
SPGPU_LIBS=""; SPGPU_LIBS="";
CUDAD=""; CUDAD="";
CUDALD=""; CUDALD="";
CUDEFINES=""; CUDEFINES="";
CUDA_DEFINES="";
CUDA_INCLUDES=""; CUDA_INCLUDES="";
CUDA_LIBS=""; CUDA_LIBS="";
FCUDEFINES="";
CCUDEFINES="";
CXXCUDEFINES="";
fi
fi fi
PAC_ARG_OPENACC()
dnl AC_ARG_ENABLE([openacc],
dnl [AS_HELP_STRING([--disable-openacc], [do not use Openacc])])
if test x"$pac_cv_openacc" == x"yes" ; then
PAC_ARG_WITH_FLAGS(extraopenacc,EXTRAOPENACC)
dnl if test false; then
AX_C_OPENACC()
CCOPENACC="$ax_cv_prog_c_openacc";
AX_CXX_OPENACC()
CXXOPENACC="$ax_cv_prog_cxx_openacc";
AX_FC_OPENACC()
FCOPENACC="$ax_cv_prog_fc_openacc";
dnl AX_OPENACC()
dnl
dnl CXXOPENACC="$ax_cv_prog_cxx_openacc";
dnl FCOPENACC="$ax_cv_prog_fc_openacc";
dnl else
dnl AC_MSG_NOTICE([OpenACC 1 flags CC $CCOPENACC CXX $CXXOPENACC FC $FCOPENACC])
PAC_ARG_WITH_FLAGS(ccopenacc,CCOPENACC)
PAC_ARG_WITH_FLAGS(cxxopenacc,CXXOPENACC)
PAC_ARG_WITH_FLAGS(fcopenacc,FCOPENACC)
dnl AC_MSG_NOTICE([OpenACC 2 flags CC $CCOPENACC CXX $CXXOPENACC FC $FCOPENACC])
dnl CCOPENACC="$ax_cv_prog_c_openacc";
dnl CXXOPENACC="$ax_cv_prog_cxx_openacc";
dnl FCOPENACC="$ax_cv_prog_fc_openacc";
dnl fi
CCOPENACC="$CCOPENACC $EXTRAOPENACC";
CXXOPENACC="$CXXOPENACC $EXTRAOPENACC";
FCOPENACC="$FCOPENACC $EXTRAOPENACC";
dnl AC_MSG_NOTICE([OpenACC 3 flags CC $CCOPENACC CXX $CXXOPENACC FC $FCOPENACC])
OACCD=oaccd;
OACCLD=oaccld;
#FCOPT="$FCOPT $FCOPENACC";
#CCOPT="$CCOPT $CCOPENACC"
#CXXOPT="$CXXOPT $CXXOPENACC"
#FLINK="$FLINK $FCOPENACC";
fi
############################################################################### ###############################################################################
PAC_ARG_WITH_LIBRSB dnl PAC_ARG_WITH_LIBRSB()
LIBRSB_DIR="$pac_cv_librsb_dir"; LIBRSB_DIR="$pac_cv_librsb_dir";
AC_MSG_CHECKING([for LIBRSB install dir]) AC_MSG_CHECKING([for LIBRSB install dir])
case $LIBRSB_DIR in case $LIBRSB_DIR in
@ -878,7 +926,7 @@ fi
LIBDIR=lib LIBDIR=lib
BASELIBNAME=libpsb_base.a BASELIBNAME=libpsb_base.a
PRECLIBNAME=libpsb_prec.a PRECLIBNAME=libpsb_prec.a
METHDLIBNAME=libpsb_krylov.a METHDLIBNAME=libpsb_linsolve.a
UTILLIBNAME=libpsb_util.a UTILLIBNAME=libpsb_util.a
############################################################################### ###############################################################################
@ -928,9 +976,9 @@ AC_SUBST(FINCLUDES)
PSBLASRULES=' PSBLASRULES='
PSBLDLIBS=$(LAPACK) $(BLAS) $(METIS_LIB) $(AMD_LIB) $(LIBS) PSBLDLIBS=$(LAPACK) $(BLAS) $(METIS_LIB) $(AMD_LIB) $(LIBS)
CXXDEFINES=$(PSBCXXDEFINES) $(CUDA_DEFINES) CXXDEFINES=$(PSBCXXDEFINES)
CDEFINES=$(PSBCDEFINES) $(CUDA_DEFINES) CDEFINES=$(PSBCDEFINES)
FDEFINES=$(PSBFDEFINES) $(CUDA_DEFINES) FDEFINES=$(PSBFDEFINES)
# These should be portable rules, arent they? # These should be portable rules, arent they?
@ -952,6 +1000,12 @@ AC_SUBST(PRECLIBNAME)
AC_SUBST(METHDLIBNAME) AC_SUBST(METHDLIBNAME)
AC_SUBST(UTILLIBNAME) AC_SUBST(UTILLIBNAME)
AC_SUBST(METISINCFILE) AC_SUBST(METISINCFILE)
AC_SUBST(OACCD)
AC_SUBST(OACCLD)
AC_SUBST(FCOPENACC)
AC_SUBST(CCOPENACC)
AC_SUBST(CXXOPENACC)
AC_SUBST(SPGPU_FLAGS) AC_SUBST(SPGPU_FLAGS)
AC_SUBST(SPGPU_LIBS) AC_SUBST(SPGPU_LIBS)
dnl AC_SUBST(SPGPU_DIR) dnl AC_SUBST(SPGPU_DIR)
@ -960,6 +1014,9 @@ dnl AC_SUBST(SPGPU_INCDIR)
AC_SUBST(EXTRALDLIBS) AC_SUBST(EXTRALDLIBS)
AC_SUBST(CUDA_DIR) AC_SUBST(CUDA_DIR)
AC_SUBST(CUDA_DEFINES) AC_SUBST(CUDA_DEFINES)
AC_SUBST(FCUDEFINES)
AC_SUBST(CCUDEFINES)
AC_SUBST(CXXCUDEFINES)
AC_SUBST(CUDA_INCLUDES) AC_SUBST(CUDA_INCLUDES)
AC_SUBST(CUDA_LIBS) AC_SUBST(CUDA_LIBS)
AC_SUBST(CUDA_VERSION) AC_SUBST(CUDA_VERSION)
@ -976,7 +1033,6 @@ AC_SUBST(LIBRSB_DIR)
AC_SUBST(LIBRSB_DEFINES) AC_SUBST(LIBRSB_DEFINES)
AC_SUBST(LRSB) AC_SUBST(LRSB)
############################################################################### ###############################################################################
# the following files will be created by Automake # the following files will be created by Automake
@ -1004,6 +1060,11 @@ AC_MSG_NOTICE([
CUDA : ${HAVE_CUDA} CUDA : ${HAVE_CUDA}
CUDA_CC : ${pac_cv_cudacc} CUDA_CC : ${pac_cv_cudacc}
OPENACC : ${pac_cv_openacc}
FCOPENACC : ${FCOPENACC}
OACCD : ${OACCD}
OACCLD : ${OACCLD}
BLAS : ${BLAS_LIBS} BLAS : ${BLAS_LIBS}
METIS usable : ${psblas_cv_have_metis} METIS usable : ${psblas_cv_have_metis}
@ -1023,6 +1084,13 @@ dnl Note : we should use LDLIBS sooner or later!
dnl To install the program and its documentation, run 'make install' if you are root, dnl To install the program and its documentation, run 'make install' if you are root,
dnl or run 'su -c "make install"' if you are not root. dnl or run 'su -c "make install"' if you are not root.
]) ])
if test x"${pac_cv_openacc}" == x"yes" ; then
if test x"${FCOPENACC}" == x ; then
AC_MSG_NOTICE([
WARNING: OpenACC enabled, but no choice for FCOPENACC compile flag.
You may want to rerun configure with --with-fcopenacc= ])
fi
fi
############################################################################### ###############################################################################

@ -141,3 +141,12 @@ spgpuclean:
$(MAKE) -C spgpu clean $(MAKE) -C spgpu clean
veryclean: clean veryclean: clean
.c.o:
$(CC) $(CCOPT) $(CCUDEFINES) $(CINCLUDES) $(CDEFINES) -c $< -o $@
.f90.o:
$(FC) $(FCOPT) $(FCUDEFINES) $(FINCLUDES) -c $< -o $@
.F90.o:
$(FC) $(FCOPT) $(FCUDEFINES) $(FINCLUDES) $(FDEFINES) -c $< -o $@
.cpp.o:
$(CXX) $(CXXOPT) $(CXXCUDEFINES) $(CXXINCLUDES) $(CXXDEFINES) -c $< -o $@

@ -184,7 +184,7 @@ int iscatMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n,
} }
int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA) int nrm2MultiVecDeviceFloatComplex(float* y_res, int n, void* devMultiVecA)
{ int i=0; { int i=0;
spgpuHandle_t handle=psb_cudaGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
@ -194,7 +194,7 @@ int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiV
return(i); return(i);
} }
int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA) int amaxMultiVecDeviceFloatComplex(float* y_res, int n, void* devMultiVecA)
{ int i=0; { int i=0;
spgpuHandle_t handle=psb_cudaGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
@ -204,7 +204,7 @@ int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiV
return(i); return(i);
} }
int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA) int asumMultiVecDeviceFloatComplex(float* y_res, int n, void* devMultiVecA)
{ int i=0; { int i=0;
spgpuHandle_t handle=psb_cudaGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;

@ -37,6 +37,7 @@
#include "vectordev.h" #include "vectordev.h"
#include "cuda_runtime.h" #include "cuda_runtime.h"
#include "core.h" #include "core.h"
#include "vector.h"
int registerMappedFloatComplex(void *, void **, int, cuFloatComplex); int registerMappedFloatComplex(void *, void **, int, cuFloatComplex);
int writeMultiVecDeviceFloatComplex(void* deviceMultiVec, cuFloatComplex* hostMultiVec); int writeMultiVecDeviceFloatComplex(void* deviceMultiVec, cuFloatComplex* hostMultiVec);
@ -63,9 +64,9 @@ int iscatMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n, int fi
int hfirst, void* host_values, int indexBase, cuFloatComplex beta); int hfirst, void* host_values, int indexBase, cuFloatComplex beta);
int scalMultiVecDeviceFloatComplex(cuFloatComplex alpha, void* devMultiVecA); int scalMultiVecDeviceFloatComplex(cuFloatComplex alpha, void* devMultiVecA);
int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA); int nrm2MultiVecDeviceFloatComplex(float* y_res, int n, void* devVecA);
int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA); int amaxMultiVecDeviceFloatComplex(float* y_res, int n, void* devVecA);
int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA); int asumMultiVecDeviceFloatComplex(float* y_res, int n, void* devVecA);
int dotMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA, void* devVecB); int dotMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA, void* devVecB);
int axpbyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void* devVecX, cuFloatComplex beta, void* devVecY); int axpbyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void* devVecX, cuFloatComplex beta, void* devVecY);

@ -178,12 +178,12 @@ int spmvDnsDeviceFloatComplex(char transa, int m, int n, int k, float complex *a
/* Note: the M,N,K choices according to TRANS have already been handled in the caller */ /* Note: the M,N,K choices according to TRANS have already been handled in the caller */
if (n == 1) { if (n == 1) {
status = cublasCgemv(handle, trans, m,k, status = cublasCgemv(handle, trans, m,k,
alpha, devMat->cM,devMat->pitch, x->v_,1, (const cuComplex *) alpha, devMat->cM,devMat->pitch, x->v_,1,
beta, y->v_,1); (const cuComplex *) beta, y->v_,1);
} else { } else {
status = cublasCgemm(handle, trans, CUBLAS_OP_N, m,n,k, status = cublasCgemm(handle, trans, CUBLAS_OP_N, m,n,k,
alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_, (const cuComplex *) alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_,
beta, y->v_,y->pitch_); (const cuComplex *) beta, y->v_,y->pitch_);
} }
if (status == CUBLAS_STATUS_SUCCESS) if (status == CUBLAS_STATUS_SUCCESS)
@ -205,12 +205,12 @@ int spmvDnsDeviceDoubleComplex(char transa, int m, int n, int k, double complex
/* Note: the M,N,K choices according to TRANS have already been handled in the caller */ /* Note: the M,N,K choices according to TRANS have already been handled in the caller */
if (n == 1) { if (n == 1) {
status = cublasZgemv(handle, trans, m,k, status = cublasZgemv(handle, trans, m,k,
alpha, devMat->cM,devMat->pitch, x->v_,1, (const cuDoubleComplex *) alpha, devMat->cM,devMat->pitch, x->v_,1,
beta, y->v_,1); (const cuDoubleComplex *) beta, y->v_,1);
} else { } else {
status = cublasZgemm(handle, trans, CUBLAS_OP_N, m,n,k, status = cublasZgemm(handle, trans, CUBLAS_OP_N, m,n,k,
alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_, (const cuDoubleComplex *) alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_,
beta, y->v_,y->pitch_); (const cuDoubleComplex *) beta, y->v_,y->pitch_);
} }
if (status == CUBLAS_STATUS_SUCCESS) if (status == CUBLAS_STATUS_SUCCESS)

@ -220,7 +220,8 @@ int dotMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA, void* devM
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB;
spgpuHandle_t handle=psb_cudaGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
spgpuDmdot(handle, y_res, n, (double*)devVecA->v_, (double*)devVecB->v_,devVecA->count_,devVecB->pitch_); spgpuDmdot(handle, y_res, n, (double*)devVecA->v_, (double*)devVecB->v_,
devVecA->count_,devVecB->pitch_);
return(i); return(i);
} }

@ -35,6 +35,7 @@
#include "vectordev.h" #include "vectordev.h"
#include "cuda_runtime.h" #include "cuda_runtime.h"
#include "core.h" #include "core.h"
#include "vector.h"
int registerMappedDouble(void *, void **, int, double); int registerMappedDouble(void *, void **, int, double);
int writeMultiVecDeviceDouble(void* deviceMultiVec, double* hostMultiVec); int writeMultiVecDeviceDouble(void* deviceMultiVec, double* hostMultiVec);

@ -439,7 +439,7 @@ int T_CSRGDeviceSetMatFillMode(T_Cmat *Matrix, int type)
T_CSRGDeviceMat *cMat= Matrix->mat; T_CSRGDeviceMat *cMat= Matrix->mat;
cusparseFillMode_t mode=type; cusparseFillMode_t mode=type;
CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->spmvDescr, CHECK_CUSPARSE(cusparseSpMatSetAttribute((*(cMat->spmvDescr)),
CUSPARSE_SPMAT_FILL_MODE, CUSPARSE_SPMAT_FILL_MODE,
(const void*) &mode, (const void*) &mode,
sizeof(cusparseFillMode_t))); sizeof(cusparseFillMode_t)));
@ -450,7 +450,7 @@ int T_CSRGDeviceSetMatDiagType(T_Cmat *Matrix, int type)
{ {
T_CSRGDeviceMat *cMat= Matrix->mat; T_CSRGDeviceMat *cMat= Matrix->mat;
cusparseDiagType_t cutype=type; cusparseDiagType_t cutype=type;
CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->spmvDescr, CHECK_CUSPARSE(cusparseSpMatSetAttribute((*(cMat->spmvDescr)),
CUSPARSE_SPMAT_DIAG_TYPE, CUSPARSE_SPMAT_DIAG_TYPE,
(const void*) &cutype, (const void*) &cutype,
sizeof(cusparseDiagType_t))); sizeof(cusparseDiagType_t)));

@ -295,3 +295,12 @@ lib: objs
clean: clean:
/bin/rm -f $(OBJS) /bin/rm -f $(OBJS)
.c.o:
$(CC) $(CCOPT) $(CCUDEFINES) $(CINCLUDES) $(CDEFINES) -c $< -o $@
.f90.o:
$(FC) $(FCOPT) $(FCUDEFINES) $(FINCLUDES) -c $< -o $@
.F90.o:
$(FC) $(FCOPT) $(FCUDEFINES) $(FINCLUDES) $(FDEFINES) -c $< -o $@
.cpp.o:
$(CXX) $(CXXOPT) $(CXXCUDEFINES) $(CXXINCLUDES) $(CXXDEFINES) -c $< -o $@

@ -35,6 +35,7 @@
#include "vectordev.h" #include "vectordev.h"
#include "cuda_runtime.h" #include "cuda_runtime.h"
#include "core.h" #include "core.h"
#include "vector.h"
int registerMappedInt(void *, void **, int, int); int registerMappedInt(void *, void **, int, int);
int writeMultiVecDeviceInt(void* deviceMultiVec, int* hostMultiVec); int writeMultiVecDeviceInt(void* deviceMultiVec, int* hostMultiVec);

@ -813,18 +813,6 @@ contains
call x%set_dev() call x%set_dev()
end subroutine c_cuda_set_scal end subroutine c_cuda_set_scal
!!$
!!$ subroutine c_cuda_set_vect(x,val)
!!$ class(psb_c_vect_cuda), intent(inout) :: x
!!$ complex(psb_spk_), intent(in) :: val(:)
!!$ integer(psb_ipk_) :: nr
!!$ integer(psb_ipk_) :: info
!!$
!!$ if (x%is_dev()) call x%sync()
!!$ call x%psb_c_base_vect_type%set_vect(val)
!!$ call x%set_host()
!!$
!!$ end subroutine c_cuda_set_vect
@ -834,7 +822,6 @@ contains
class(psb_c_base_vect_type), intent(inout) :: y class(psb_c_base_vect_type), intent(inout) :: y
integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(in) :: n
complex(psb_spk_) :: res complex(psb_spk_) :: res
complex(psb_spk_), external :: ddot
integer(psb_ipk_) :: info integer(psb_ipk_) :: info
res = czero res = czero
@ -844,9 +831,6 @@ contains
! TYPE psb_c_vect ! TYPE psb_c_vect
! !
select type(yy => y) select type(yy => y)
type is (psb_c_base_vect_type)
if (x%is_dev()) call x%sync()
res = ddot(n,x%v,1,yy%v,1)
type is (psb_c_vect_cuda) type is (psb_c_vect_cuda)
if (x%is_host()) call x%sync() if (x%is_host()) call x%sync()
if (yy%is_host()) call yy%sync() if (yy%is_host()) call yy%sync()
@ -858,7 +842,7 @@ contains
class default class default
! y%sync is done in dot_a ! y%sync is done in dot_a
call x%sync() if (x%is_dev()) call x%sync()
res = y%dot(n,x%v) res = y%dot(n,x%v)
end select end select
@ -870,10 +854,10 @@ contains
complex(psb_spk_), intent(in) :: y(:) complex(psb_spk_), intent(in) :: y(:)
integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(in) :: n
complex(psb_spk_) :: res complex(psb_spk_) :: res
complex(psb_spk_), external :: ddot complex(psb_spk_), external :: cdot
if (x%is_dev()) call x%sync() if (x%is_dev()) call x%sync()
res = ddot(n,y,1,x%v,1) res = cdot(n,y,1,x%v,1)
end function c_cuda_dot_a end function c_cuda_dot_a
@ -1393,14 +1377,14 @@ module psb_c_cuda_multivect_mod
end type psb_c_multivect_cuda end type psb_c_multivect_cuda
public :: psb_c_multivect_cuda public :: psb_c_multivect_cuda
private :: constructor private :: mconstructor
interface psb_c_multivect_cuda interface psb_c_multivect_cuda
module procedure constructor module procedure mconstructor
end interface end interface
contains contains
function constructor(x) result(this) function mconstructor(x) result(this)
complex(psb_spk_) :: x(:,:) complex(psb_spk_) :: x(:,:)
type(psb_c_multivect_cuda) :: this type(psb_c_multivect_cuda) :: this
integer(psb_ipk_) :: info integer(psb_ipk_) :: info
@ -1408,7 +1392,7 @@ contains
this%v = x this%v = x
call this%asb(size(x,1),size(x,2),info) call this%asb(size(x,1),size(x,2),info)
end function constructor end function mconstructor
!!$ subroutine c_cuda_multi_gthzv_x(i,n,idx,x,y) !!$ subroutine c_cuda_multi_gthzv_x(i,n,idx,x,y)

@ -813,18 +813,6 @@ contains
call x%set_dev() call x%set_dev()
end subroutine d_cuda_set_scal end subroutine d_cuda_set_scal
!!$
!!$ subroutine d_cuda_set_vect(x,val)
!!$ class(psb_d_vect_cuda), intent(inout) :: x
!!$ real(psb_dpk_), intent(in) :: val(:)
!!$ integer(psb_ipk_) :: nr
!!$ integer(psb_ipk_) :: info
!!$
!!$ if (x%is_dev()) call x%sync()
!!$ call x%psb_d_base_vect_type%set_vect(val)
!!$ call x%set_host()
!!$
!!$ end subroutine d_cuda_set_vect
@ -834,7 +822,6 @@ contains
class(psb_d_base_vect_type), intent(inout) :: y class(psb_d_base_vect_type), intent(inout) :: y
integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(in) :: n
real(psb_dpk_) :: res real(psb_dpk_) :: res
real(psb_dpk_), external :: ddot
integer(psb_ipk_) :: info integer(psb_ipk_) :: info
res = dzero res = dzero
@ -844,9 +831,6 @@ contains
! TYPE psb_d_vect ! TYPE psb_d_vect
! !
select type(yy => y) select type(yy => y)
type is (psb_d_base_vect_type)
if (x%is_dev()) call x%sync()
res = ddot(n,x%v,1,yy%v,1)
type is (psb_d_vect_cuda) type is (psb_d_vect_cuda)
if (x%is_host()) call x%sync() if (x%is_host()) call x%sync()
if (yy%is_host()) call yy%sync() if (yy%is_host()) call yy%sync()
@ -858,7 +842,7 @@ contains
class default class default
! y%sync is done in dot_a ! y%sync is done in dot_a
call x%sync() if (x%is_dev()) call x%sync()
res = y%dot(n,x%v) res = y%dot(n,x%v)
end select end select
@ -1393,14 +1377,14 @@ module psb_d_cuda_multivect_mod
end type psb_d_multivect_cuda end type psb_d_multivect_cuda
public :: psb_d_multivect_cuda public :: psb_d_multivect_cuda
private :: constructor private :: mconstructor
interface psb_d_multivect_cuda interface psb_d_multivect_cuda
module procedure constructor module procedure mconstructor
end interface end interface
contains contains
function constructor(x) result(this) function mconstructor(x) result(this)
real(psb_dpk_) :: x(:,:) real(psb_dpk_) :: x(:,:)
type(psb_d_multivect_cuda) :: this type(psb_d_multivect_cuda) :: this
integer(psb_ipk_) :: info integer(psb_ipk_) :: info
@ -1408,7 +1392,7 @@ contains
this%v = x this%v = x
call this%asb(size(x,1),size(x,2),info) call this%asb(size(x,1),size(x,2),info)
end function constructor end function mconstructor
!!$ subroutine d_cuda_multi_gthzv_x(i,n,idx,x,y) !!$ subroutine d_cuda_multi_gthzv_x(i,n,idx,x,y)

@ -795,18 +795,6 @@ contains
call x%set_dev() call x%set_dev()
end subroutine i_cuda_set_scal end subroutine i_cuda_set_scal
!!$
!!$ subroutine i_cuda_set_vect(x,val)
!!$ class(psb_i_vect_cuda), intent(inout) :: x
!!$ integer(psb_ipk_), intent(in) :: val(:)
!!$ integer(psb_ipk_) :: nr
!!$ integer(psb_ipk_) :: info
!!$
!!$ if (x%is_dev()) call x%sync()
!!$ call x%psb_i_base_vect_type%set_vect(val)
!!$ call x%set_host()
!!$
!!$ end subroutine i_cuda_set_vect
@ -949,14 +937,14 @@ module psb_i_cuda_multivect_mod
end type psb_i_multivect_cuda end type psb_i_multivect_cuda
public :: psb_i_multivect_cuda public :: psb_i_multivect_cuda
private :: constructor private :: mconstructor
interface psb_i_multivect_cuda interface psb_i_multivect_cuda
module procedure constructor module procedure mconstructor
end interface end interface
contains contains
function constructor(x) result(this) function mconstructor(x) result(this)
integer(psb_ipk_) :: x(:,:) integer(psb_ipk_) :: x(:,:)
type(psb_i_multivect_cuda) :: this type(psb_i_multivect_cuda) :: this
integer(psb_ipk_) :: info integer(psb_ipk_) :: info
@ -964,7 +952,7 @@ contains
this%v = x this%v = x
call this%asb(size(x,1),size(x,2),info) call this%asb(size(x,1),size(x,2),info)
end function constructor end function mconstructor
!!$ subroutine i_cuda_multi_gthzv_x(i,n,idx,x,y) !!$ subroutine i_cuda_multi_gthzv_x(i,n,idx,x,y)

@ -813,18 +813,6 @@ contains
call x%set_dev() call x%set_dev()
end subroutine s_cuda_set_scal end subroutine s_cuda_set_scal
!!$
!!$ subroutine s_cuda_set_vect(x,val)
!!$ class(psb_s_vect_cuda), intent(inout) :: x
!!$ real(psb_spk_), intent(in) :: val(:)
!!$ integer(psb_ipk_) :: nr
!!$ integer(psb_ipk_) :: info
!!$
!!$ if (x%is_dev()) call x%sync()
!!$ call x%psb_s_base_vect_type%set_vect(val)
!!$ call x%set_host()
!!$
!!$ end subroutine s_cuda_set_vect
@ -834,7 +822,6 @@ contains
class(psb_s_base_vect_type), intent(inout) :: y class(psb_s_base_vect_type), intent(inout) :: y
integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(in) :: n
real(psb_spk_) :: res real(psb_spk_) :: res
real(psb_spk_), external :: ddot
integer(psb_ipk_) :: info integer(psb_ipk_) :: info
res = szero res = szero
@ -844,9 +831,6 @@ contains
! TYPE psb_s_vect ! TYPE psb_s_vect
! !
select type(yy => y) select type(yy => y)
type is (psb_s_base_vect_type)
if (x%is_dev()) call x%sync()
res = ddot(n,x%v,1,yy%v,1)
type is (psb_s_vect_cuda) type is (psb_s_vect_cuda)
if (x%is_host()) call x%sync() if (x%is_host()) call x%sync()
if (yy%is_host()) call yy%sync() if (yy%is_host()) call yy%sync()
@ -858,7 +842,7 @@ contains
class default class default
! y%sync is done in dot_a ! y%sync is done in dot_a
call x%sync() if (x%is_dev()) call x%sync()
res = y%dot(n,x%v) res = y%dot(n,x%v)
end select end select
@ -870,10 +854,10 @@ contains
real(psb_spk_), intent(in) :: y(:) real(psb_spk_), intent(in) :: y(:)
integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(in) :: n
real(psb_spk_) :: res real(psb_spk_) :: res
real(psb_spk_), external :: ddot real(psb_spk_), external :: sdot
if (x%is_dev()) call x%sync() if (x%is_dev()) call x%sync()
res = ddot(n,y,1,x%v,1) res = sdot(n,y,1,x%v,1)
end function s_cuda_dot_a end function s_cuda_dot_a
@ -1393,14 +1377,14 @@ module psb_s_cuda_multivect_mod
end type psb_s_multivect_cuda end type psb_s_multivect_cuda
public :: psb_s_multivect_cuda public :: psb_s_multivect_cuda
private :: constructor private :: mconstructor
interface psb_s_multivect_cuda interface psb_s_multivect_cuda
module procedure constructor module procedure mconstructor
end interface end interface
contains contains
function constructor(x) result(this) function mconstructor(x) result(this)
real(psb_spk_) :: x(:,:) real(psb_spk_) :: x(:,:)
type(psb_s_multivect_cuda) :: this type(psb_s_multivect_cuda) :: this
integer(psb_ipk_) :: info integer(psb_ipk_) :: info
@ -1408,7 +1392,7 @@ contains
this%v = x this%v = x
call this%asb(size(x,1),size(x,2),info) call this%asb(size(x,1),size(x,2),info)
end function constructor end function mconstructor
!!$ subroutine s_cuda_multi_gthzv_x(i,n,idx,x,y) !!$ subroutine s_cuda_multi_gthzv_x(i,n,idx,x,y)

@ -813,18 +813,6 @@ contains
call x%set_dev() call x%set_dev()
end subroutine z_cuda_set_scal end subroutine z_cuda_set_scal
!!$
!!$ subroutine z_cuda_set_vect(x,val)
!!$ class(psb_z_vect_cuda), intent(inout) :: x
!!$ complex(psb_dpk_), intent(in) :: val(:)
!!$ integer(psb_ipk_) :: nr
!!$ integer(psb_ipk_) :: info
!!$
!!$ if (x%is_dev()) call x%sync()
!!$ call x%psb_z_base_vect_type%set_vect(val)
!!$ call x%set_host()
!!$
!!$ end subroutine z_cuda_set_vect
@ -834,7 +822,6 @@ contains
class(psb_z_base_vect_type), intent(inout) :: y class(psb_z_base_vect_type), intent(inout) :: y
integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(in) :: n
complex(psb_dpk_) :: res complex(psb_dpk_) :: res
complex(psb_dpk_), external :: ddot
integer(psb_ipk_) :: info integer(psb_ipk_) :: info
res = zzero res = zzero
@ -844,9 +831,6 @@ contains
! TYPE psb_z_vect ! TYPE psb_z_vect
! !
select type(yy => y) select type(yy => y)
type is (psb_z_base_vect_type)
if (x%is_dev()) call x%sync()
res = ddot(n,x%v,1,yy%v,1)
type is (psb_z_vect_cuda) type is (psb_z_vect_cuda)
if (x%is_host()) call x%sync() if (x%is_host()) call x%sync()
if (yy%is_host()) call yy%sync() if (yy%is_host()) call yy%sync()
@ -858,7 +842,7 @@ contains
class default class default
! y%sync is done in dot_a ! y%sync is done in dot_a
call x%sync() if (x%is_dev()) call x%sync()
res = y%dot(n,x%v) res = y%dot(n,x%v)
end select end select
@ -870,10 +854,10 @@ contains
complex(psb_dpk_), intent(in) :: y(:) complex(psb_dpk_), intent(in) :: y(:)
integer(psb_ipk_), intent(in) :: n integer(psb_ipk_), intent(in) :: n
complex(psb_dpk_) :: res complex(psb_dpk_) :: res
complex(psb_dpk_), external :: ddot complex(psb_dpk_), external :: zdot
if (x%is_dev()) call x%sync() if (x%is_dev()) call x%sync()
res = ddot(n,y,1,x%v,1) res = zdot(n,y,1,x%v,1)
end function z_cuda_dot_a end function z_cuda_dot_a
@ -1393,14 +1377,14 @@ module psb_z_cuda_multivect_mod
end type psb_z_multivect_cuda end type psb_z_multivect_cuda
public :: psb_z_multivect_cuda public :: psb_z_multivect_cuda
private :: constructor private :: mconstructor
interface psb_z_multivect_cuda interface psb_z_multivect_cuda
module procedure constructor module procedure mconstructor
end interface end interface
contains contains
function constructor(x) result(this) function mconstructor(x) result(this)
complex(psb_dpk_) :: x(:,:) complex(psb_dpk_) :: x(:,:)
type(psb_z_multivect_cuda) :: this type(psb_z_multivect_cuda) :: this
integer(psb_ipk_) :: info integer(psb_ipk_) :: info
@ -1408,7 +1392,7 @@ contains
this%v = x this%v = x
call this%asb(size(x,1),size(x,2),info) call this%asb(size(x,1),size(x,2),info)
end function constructor end function mconstructor
!!$ subroutine z_cuda_multi_gthzv_x(i,n,idx,x,y) !!$ subroutine z_cuda_multi_gthzv_x(i,n,idx,x,y)

@ -96,7 +96,6 @@ __global__ void spgpuSdot_kern(int n, float* x, float* y)
{ {
#endif #endif
#ifdef ASSUME_LOCK_SYNC_PARALLELISM #ifdef ASSUME_LOCK_SYNC_PARALLELISM
volatile float* vsSum = sSum; volatile float* vsSum = sSum;
vsSum[threadIdx.x] = res; vsSum[threadIdx.x] = res;

@ -220,7 +220,8 @@ int dotMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA, void* devMul
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB;
spgpuHandle_t handle=psb_cudaGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
spgpuSmdot(handle, y_res, n, (float*)devVecA->v_, (float*)devVecB->v_,devVecA->count_,devVecB->pitch_); spgpuSmdot(handle, y_res, n, (float*)devVecA->v_, (float*)devVecB->v_,
devVecA->count_,devVecB->pitch_);
return(i); return(i);
} }

@ -35,6 +35,7 @@
#include "vectordev.h" #include "vectordev.h"
#include "cuda_runtime.h" #include "cuda_runtime.h"
#include "core.h" #include "core.h"
#include "vector.h"
int registerMappedFloat(void *, void **, int, float); int registerMappedFloat(void *, void **, int, float);
int writeMultiVecDeviceFloat(void* deviceMultiVec, float* hostMultiVec); int writeMultiVecDeviceFloat(void* deviceMultiVec, float* hostMultiVec);

@ -34,6 +34,7 @@
#include "cuda_runtime.h" #include "cuda_runtime.h"
//#include "common.h" //#include "common.h"
//#include "cintrf.h" //#include "cintrf.h"
#include "cuda_util.h"
#include <complex.h> #include <complex.h>
struct MultiVectDevice struct MultiVectDevice

@ -183,7 +183,7 @@ int iscatMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n,
} }
int nrm2MultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA) int nrm2MultiVecDeviceDoubleComplex(double* y_res, int n, void* devMultiVecA)
{ int i=0; { int i=0;
spgpuHandle_t handle=psb_cudaGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
@ -192,7 +192,7 @@ int nrm2MultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMult
return(i); return(i);
} }
int amaxMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA) int amaxMultiVecDeviceDoubleComplex(double* y_res, int n, void* devMultiVecA)
{ int i=0; { int i=0;
spgpuHandle_t handle=psb_cudaGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
@ -202,7 +202,7 @@ int amaxMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMult
return(i); return(i);
} }
int asumMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA) int asumMultiVecDeviceDoubleComplex(double* y_res, int n, void* devMultiVecA)
{ int i=0; { int i=0;
spgpuHandle_t handle=psb_cudaGetHandle(); spgpuHandle_t handle=psb_cudaGetHandle();
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
@ -223,7 +223,8 @@ int scalMultiVecDeviceDoubleComplex(cuDoubleComplex alpha, void* devMultiVecA)
return(i); return(i);
} }
int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA, void* devMultiVecB) int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n,
void* devMultiVecA, void* devMultiVecB)
{int i=0; {int i=0;
struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA; struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB; struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB;

@ -37,6 +37,7 @@
#include "vectordev.h" #include "vectordev.h"
#include "cuda_runtime.h" #include "cuda_runtime.h"
#include "core.h" #include "core.h"
#include "vector.h"
int registerMappedDoubleComplex(void *, void **, int, cuDoubleComplex); int registerMappedDoubleComplex(void *, void **, int, cuDoubleComplex);
int writeMultiVecDeviceDoubleComplex(void* deviceMultiVec, cuDoubleComplex* hostMultiVec); int writeMultiVecDeviceDoubleComplex(void* deviceMultiVec, cuDoubleComplex* hostMultiVec);
@ -69,9 +70,9 @@ int iscatMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n,
int indexBase, cuDoubleComplex beta); int indexBase, cuDoubleComplex beta);
int scalMultiVecDeviceDoubleComplex(cuDoubleComplex alpha, void* devMultiVecA); int scalMultiVecDeviceDoubleComplex(cuDoubleComplex alpha, void* devMultiVecA);
int nrm2MultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devVecA); int nrm2MultiVecDeviceDoubleComplex(double* y_res, int n, void* devVecA);
int amaxMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devVecA); int amaxMultiVecDeviceDoubleComplex(double* y_res, int n, void* devVecA);
int asumMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devVecA); int asumMultiVecDeviceDoubleComplex(double* y_res, int n, void* devVecA);
int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n,
void* devVecA, void* devVecB); void* devVecA, void* devVecB);

@ -1,7 +1,7 @@
all: guide all: guide
guide: guide:
cd src && $(MAKE) cd src && $(MAKE) clean all
doxy: doxy:
doxygen doxypsb doxygen doxypsb

@ -52,7 +52,7 @@ PROJECT_LOGO =
# If a relative path is entered, it will be relative to the location # If a relative path is entered, it will be relative to the location
# where doxygen was started. If left blank the current directory will be used. # where doxygen was started. If left blank the current directory will be used.
OUTPUT_DIRECTORY = ../../psblas-3.4-doxygen OUTPUT_DIRECTORY = ../../psblas-3.9-doxygen
# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create # If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
# 4096 sub-directories (in 2 levels) under the output directory of each output # 4096 sub-directories (in 2 levels) under the output directory of each output

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 58 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 61 KiB

@ -10,16 +10,16 @@
<link rel="stylesheet" type="text/css" href="userhtml.css"> <link rel="stylesheet" type="text/css" href="userhtml.css">
</head><body </head><body
> >
<!--l. 91--><p class="noindent" ><span <!--l. 99--><p class="noindent" ><span
class="cmbx-12x-x-144">PSBLAS</span><br class="pplb7t-x-x-172">PSBLAS</span><br
class="newline" /> <span class="newline" /> <span
class="cmbx-12x-x-144">User&#8217;s and Reference Guide</span><br class="pplb7t-x-x-172">User&#8217;s and Reference Guide</span><br
class="newline" /> <span class="newline" /> <span
class="cmti-12">A reference guide for the Parallel Sparse BLAS library</span><br class="pplri7t-x-x-120">A reference guide for the Parallel Sparse BLAS library</span><br
class="newline" /> <span class="newline" /> <span
class="cmbx-10">Salvatore Filippone</span><br class="pplb7t-">Salvatore Filippone</span><br
class="newline" /><span class="newline" /><span
class="cmbx-10">Alfredo Buttari </span><br class="pplb7t-">Alfredo Buttari </span><br
class="newline" />Software version: 3.9.0<br class="newline" />Software version: 3.9.0<br
class="newline" />Aug 1st, 2024 class="newline" />Aug 1st, 2024
@ -52,13 +52,13 @@ href="userhtmlse9.html#x14-1280009" id="QQ2-14-158">Utilities</a></span>
<br /> &#x00A0;<span class="sectionToc" >10 <a <br /> &#x00A0;<span class="sectionToc" >10 <a
href="userhtmlse10.html#x15-13500010" id="QQ2-15-165">Preconditioner routines</a></span> href="userhtmlse10.html#x15-13500010" id="QQ2-15-165">Preconditioner routines</a></span>
<br /> &#x00A0;<span class="sectionToc" >11 <a <br /> &#x00A0;<span class="sectionToc" >11 <a
href="userhtmlse11.html#x17-14200011" id="QQ2-17-172">Iterative Methods</a></span> href="userhtmlse11.html#x17-14300011" id="QQ2-17-174">Iterative Methods</a></span>
<br /> &#x00A0;<span class="sectionToc" >12 <a <br /> &#x00A0;<span class="sectionToc" >12 <a
href="userhtmlse12.html#x19-14400012" id="QQ2-19-174">Extensions</a></span> href="userhtmlse12.html#x19-14600012" id="QQ2-19-177">Extensions</a></span>
<br /> &#x00A0;<span class="sectionToc" >13 <a <br /> &#x00A0;<span class="sectionToc" >13 <a
href="userhtmlse13.html#x20-15300013" id="QQ2-20-189">CUDA Environment Routines</a></span> href="userhtmlse13.html#x20-15500013" id="QQ2-20-192">CUDA Environment Routines</a></span>
<br /> &#x00A0;<span class="likesectionToc" ><a <br /> &#x00A0;<span class="likesectionToc" ><a
href="userhtmlli2.html#x21-168000" id="QQ2-21-218">References</a></span> href="userhtmlli2.html#x21-170000" id="QQ2-21-221">References</a></span>
</div> </div>

Binary file not shown.

After

Width:  |  Height:  |  Size: 90 KiB

@ -1,33 +1,62 @@
/* start css.sty */ /* start css.sty */
.cmr-7{font-size:70%;} .pplb7t-x-x-172{font-size:172%;font-weight: bold;}
.cmmi-5{font-size:50%;font-style: italic;} .pplb7t-x-x-172{font-weight: bold;}
.cmmi-7{font-size:70%;font-style: italic;} .pplb7t-x-x-172{font-weight: bold;}
.cmmi-10{font-style: italic;} .pplri7t-{font-style: italic;}
.cmsy-7{font-size:70%;} .pplri7t-{font-style: italic;}
.cmbx-12x-x-144{font-size:172%; font-weight: bold;} .pplri7t-x-x-120{font-size:120%;font-style: italic;}
.cmbx-12x-x-144{ font-weight: bold;} .pplri7t-x-x-120{font-style: italic;}
.cmbx-12x-x-144{ font-weight: bold;} .pplb7t-{font-weight: bold;}
.cmti-10{ font-style: italic;} .pplb7t-{font-weight: bold;}
.cmti-12{font-size:120%; font-style: italic;} .pplb7t-{font-weight: bold;}
.cmbx-10{ font-weight: bold;}
.cmbx-10{ font-weight: bold;}
.cmbx-10{ font-weight: bold;}
.cmtt-10{font-family: monospace,monospace;} .cmtt-10{font-family: monospace,monospace;}
.cmtt-10{font-family: monospace,monospace;} .cmtt-10{font-family: monospace,monospace;}
.cmtt-10{font-family: monospace,monospace;} .cmtt-10{font-family: monospace,monospace;}
.cmr-9{font-size:90%;} .pplr7t-x-x-76{font-size:76%;}
.cmr-8{font-size:80%;} .zplmr7m-{font-style: italic;}
.cmbx-12{font-size:120%; font-weight: bold;} .zplmr7m-{font-style: italic;}
.cmbx-12{ font-weight: bold;} .zplmr7m-{font-style: italic;}
.cmbx-12{ font-weight: bold;} .zplmr7m-{font-style: italic;}
.zplmr7m-{font-style: italic;}
.zplmr7m-x-x-76{font-size:76%;font-style: italic;}
.zplmr7m-x-x-76{font-style: italic;}
.zplmr7m-x-x-76{font-style: italic;}
.zplmr7m-x-x-76{font-style: italic;}
.zplmr7m-x-x-76{font-style: italic;}
.zplmr7m-x-x-60{font-size:60%;font-style: italic;}
.zplmr7m-x-x-60{font-style: italic;}
.zplmr7m-x-x-60{font-style: italic;}
.zplmr7m-x-x-60{font-style: italic;}
.zplmr7m-x-x-60{font-style: italic;}
.zplmr7y-x-x-76{font-size:76%;}
.zplmr7t-x-x-76{font-size:76%;}
.pplr7t-x-x-90{font-size:90%;}
.pplr7t-x-x-80{font-size:80%;}
.pplb7t-x-x-120{font-size:120%;font-weight: bold;}
.pplb7t-x-x-120{font-weight: bold;}
.pplb7t-x-x-120{font-weight: bold;}
.cmtt-8{font-size:80%;font-family: monospace,monospace;} .cmtt-8{font-size:80%;font-family: monospace,monospace;}
.cmtt-8{font-family: monospace,monospace;} .cmtt-8{font-family: monospace,monospace;}
.cmtt-8{font-family: monospace,monospace;} .cmtt-8{font-family: monospace,monospace;}
.cmtt-9{font-size:90%;font-family: monospace,monospace;} .cmtt-9{font-size:90%;font-family: monospace,monospace;}
.cmtt-9{font-family: monospace,monospace;} .cmtt-9{font-family: monospace,monospace;}
.cmtt-9{font-family: monospace,monospace;} .cmtt-9{font-family: monospace,monospace;}
.cmmi-8{font-size:80%;font-style: italic;} .pplr7t-x-x-70{font-size:70%;}
.zplmr7m-x-x-90{font-size:90%;font-style: italic;}
.zplmr7m-x-x-90{font-style: italic;}
.zplmr7m-x-x-90{font-style: italic;}
.zplmr7m-x-x-90{font-style: italic;}
.zplmr7m-x-x-90{font-style: italic;}
.zplmr7y-x-x-90{font-size:90%;}
.zplmr7m-x-x-80{font-size:80%;font-style: italic;}
.zplmr7m-x-x-80{font-style: italic;}
.zplmr7m-x-x-80{font-style: italic;}
.zplmr7m-x-x-80{font-style: italic;}
.zplmr7m-x-x-80{font-style: italic;}
.zplmr7t-x-x-80{font-size:80%;}
.pplrc7t-x-x-90{font-size:90%;}
.small-caps{font-variant: small-caps; }
p{margin-top:0;margin-bottom:0} p{margin-top:0;margin-bottom:0}
p.indent{text-indent:0;} p.indent{text-indent:0;}
p + p{margin-top:1em;} p + p{margin-top:1em;}
@ -158,5 +187,11 @@ pre.listings{font-family: monospace,monospace; white-space: pre-wrap; margin-top
pre.lstlisting{font-family: monospace,monospace; white-space: pre-wrap; margin-top:0.5em; margin-bottom:0.5em; } pre.lstlisting{font-family: monospace,monospace; white-space: pre-wrap; margin-top:0.5em; margin-bottom:0.5em; }
pre.lstinputlisting{ font-family: monospace,monospace; white-space: pre-wrap; } pre.lstinputlisting{ font-family: monospace,monospace; white-space: pre-wrap; }
.lstinputlisting .label{margin-right:0.5em;} .lstinputlisting .label{margin-right:0.5em;}
#TBL-24-1{border-left: 1px solid black;}
#TBL-24-1{border-right:1px solid black;}
#TBL-24-2{border-right:1px solid black;}
#TBL-24-3{border-right:1px solid black;}
#TBL-24-4{border-right:1px solid black;}
#TBL-24-5{border-right:1px solid black;}
/* end css.sty */ /* end css.sty */

@ -10,16 +10,16 @@
<link rel="stylesheet" type="text/css" href="userhtml.css"> <link rel="stylesheet" type="text/css" href="userhtml.css">
</head><body </head><body
> >
<!--l. 91--><p class="noindent" ><span <!--l. 99--><p class="noindent" ><span
class="cmbx-12x-x-144">PSBLAS</span><br class="pplb7t-x-x-172">PSBLAS</span><br
class="newline" /> <span class="newline" /> <span
class="cmbx-12x-x-144">User&#8217;s and Reference Guide</span><br class="pplb7t-x-x-172">User&#8217;s and Reference Guide</span><br
class="newline" /> <span class="newline" /> <span
class="cmti-12">A reference guide for the Parallel Sparse BLAS library</span><br class="pplri7t-x-x-120">A reference guide for the Parallel Sparse BLAS library</span><br
class="newline" /> <span class="newline" /> <span
class="cmbx-10">Salvatore Filippone</span><br class="pplb7t-">Salvatore Filippone</span><br
class="newline" /><span class="newline" /><span
class="cmbx-10">Alfredo Buttari </span><br class="pplb7t-">Alfredo Buttari </span><br
class="newline" />Software version: 3.9.0<br class="newline" />Software version: 3.9.0<br
class="newline" />Aug 1st, 2024 class="newline" />Aug 1st, 2024
@ -52,13 +52,13 @@ href="userhtmlse9.html#x14-1280009" id="QQ2-14-158">Utilities</a></span>
<br /> &#x00A0;<span class="sectionToc" >10 <a <br /> &#x00A0;<span class="sectionToc" >10 <a
href="userhtmlse10.html#x15-13500010" id="QQ2-15-165">Preconditioner routines</a></span> href="userhtmlse10.html#x15-13500010" id="QQ2-15-165">Preconditioner routines</a></span>
<br /> &#x00A0;<span class="sectionToc" >11 <a <br /> &#x00A0;<span class="sectionToc" >11 <a
href="userhtmlse11.html#x17-14200011" id="QQ2-17-172">Iterative Methods</a></span> href="userhtmlse11.html#x17-14300011" id="QQ2-17-174">Iterative Methods</a></span>
<br /> &#x00A0;<span class="sectionToc" >12 <a <br /> &#x00A0;<span class="sectionToc" >12 <a
href="userhtmlse12.html#x19-14400012" id="QQ2-19-174">Extensions</a></span> href="userhtmlse12.html#x19-14600012" id="QQ2-19-177">Extensions</a></span>
<br /> &#x00A0;<span class="sectionToc" >13 <a <br /> &#x00A0;<span class="sectionToc" >13 <a
href="userhtmlse13.html#x20-15300013" id="QQ2-20-189">CUDA Environment Routines</a></span> href="userhtmlse13.html#x20-15500013" id="QQ2-20-192">CUDA Environment Routines</a></span>
<br /> &#x00A0;<span class="likesectionToc" ><a <br /> &#x00A0;<span class="likesectionToc" ><a
href="userhtmlli2.html#x21-168000" id="QQ2-21-218">References</a></span> href="userhtmlli2.html#x21-170000" id="QQ2-21-221">References</a></span>
</div> </div>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.5 KiB

After

Width:  |  Height:  |  Size: 1.5 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.8 KiB

After

Width:  |  Height:  |  Size: 1.9 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.1 KiB

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.7 KiB

After

Width:  |  Height:  |  Size: 1.7 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.2 KiB

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.2 KiB

After

Width:  |  Height:  |  Size: 1.3 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 KiB

After

Width:  |  Height:  |  Size: 1.4 KiB

@ -10,10 +10,10 @@
<link rel="stylesheet" type="text/css" href="userhtml.css"> <link rel="stylesheet" type="text/css" href="userhtml.css">
</head><body </head><body
> >
<div class="footnote-text"> <div class="footnote-text">
<!--l. 72--><p class="indent" > <span class="footnote-mark"><a <!--l. 72--><p class="indent" > <span class="footnote-mark"><a
id="fn4x0"><a id="fn4x0"><a
id="x16-136002x10.1"></a> <sup class="textsuperscript">4</sup></a></span><span id="x16-136002x10.1"></a> <sup class="textsuperscript">4</sup></a></span><span
class="cmr-8">The string is case-insensitive</span></div> class="pplr7t-x-x-80">The string is case-insensitive</span></div>
</body></html> </body></html>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 968 B

After

Width:  |  Height:  |  Size: 1021 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.1 KiB

After

Width:  |  Height:  |  Size: 1.1 KiB

@ -13,8 +13,11 @@
<div class="footnote-text"> <div class="footnote-text">
<!--l. 53--><p class="noindent" ><span class="footnote-mark"><a <!--l. 53--><p class="noindent" ><span class="footnote-mark"><a
id="fn5x0"><a id="fn5x0"><a
id="x18-143004x11.1"></a> <sup class="textsuperscript">5</sup></a></span><span id="x18-144004x11.1"></a> <sup class="textsuperscript">5</sup></a></span><span
class="cmr-8">Note: the implementation is for </span><span class="pplr7t-x-x-80">Note: the implementation is for </span><span
class="cmmi-8">FCG</span><span class="zplmr7m-x-x-80">FCG</span><span
class="cmr-8">(1).</span></div> class="zplmr7t-x-x-80">(</span><span
class="pplr7t-x-x-80">1</span><span
class="zplmr7t-x-x-80">)</span><span
class="pplr7t-x-x-80">.</span></div>
</body></html> </body></html>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.1 KiB

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 KiB

After

Width:  |  Height:  |  Size: 1.3 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.0 KiB

After

Width:  |  Height:  |  Size: 1.0 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 KiB

After

Width:  |  Height:  |  Size: 1.4 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 7.5 KiB

After

Width:  |  Height:  |  Size: 8.3 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 KiB

After

Width:  |  Height:  |  Size: 1.4 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.0 KiB

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 970 B

After

Width:  |  Height:  |  Size: 985 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 420 B

After

Width:  |  Height:  |  Size: 399 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 710 B

After

Width:  |  Height:  |  Size: 700 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.7 KiB

After

Width:  |  Height:  |  Size: 1.8 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.8 KiB

After

Width:  |  Height:  |  Size: 2.0 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.0 KiB

After

Width:  |  Height:  |  Size: 1.9 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1016 B

After

Width:  |  Height:  |  Size: 1.0 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.1 KiB

After

Width:  |  Height:  |  Size: 2.2 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.5 KiB

After

Width:  |  Height:  |  Size: 1.6 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.5 KiB

After

Width:  |  Height:  |  Size: 1.6 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.8 KiB

After

Width:  |  Height:  |  Size: 1.7 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.6 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.0 KiB

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.6 KiB

After

Width:  |  Height:  |  Size: 1.7 KiB

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save