Merge branch 'cmake2' into cmake

2 months ago · ef71a32484
parent 5b81cbac12 61976812be
commit ef71a32484
375 changed files with 40725 additions and 19657 deletions
--- a/.VERSION
+++ b/.VERSION
@ -0,0 +1,12 @@
+$Format:%d%n%n$
+# Fall back version, probably last release:
+3.9.0
+
+# PSBLAS version file.
+#
+# Release archive created from commit:
+# $Format:%H %d$
+# $Format:Created on %ci by %cN, and$
+# $Format:signed by %GS using %GK.$
+# $Format:Signature status: %G?$
+$Format:%GG$
--- a/.gitignore
+++ b/.gitignore
@ -1,6 +1,7 @@
 *.a
 *.o
 *.mod
+*.smod
 *~

 # header files generated
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -430,7 +430,11 @@ endif()
 #---------------------------------------

 # Link order, left to right:
+<<<<<<< HEAD
 # cbind.a, util.a krylov.a prec.a base.a
+=======
+# cbind.a, util.a linsolve.a prec.a base.a
+>>>>>>> cmake2

 include(${CMAKE_CURRENT_LIST_DIR}/base/CMakeLists.txt)

@ -515,6 +519,7 @@ target_include_directories(prec PUBLIC
  $<INSTALL_INTERFACE:modules>)
 target_link_libraries(prec PUBLIC base)

+<<<<<<< HEAD
 include(${CMAKE_CURRENT_LIST_DIR}/krylov/CMakeLists.txt)
 add_library(krylov ${krylov_source_files})
 set_target_properties(krylov
@ -528,6 +533,21 @@ target_include_directories(krylov PUBLIC
  $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/modules>
  $<INSTALL_INTERFACE:modules>)
 target_link_libraries(krylov PUBLIC base prec)
+=======
+include(${CMAKE_CURRENT_LIST_DIR}/linsolve/CMakeLists.txt)
+add_library(linsolve ${linsolve_source_files})
+set_target_properties(linsolve
+  PROPERTIES
+  Fortran_MODULE_DIRECTORY "${CMAKE_BINARY_DIR}/modules"
+  POSITION_INDEPENDENT_CODE TRUE
+  OUTPUT_NAME psb_linsolve
+  LINKER_LANGUAGE Fortran
+  )
+target_include_directories(linsolve PUBLIC
+  $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/modules>
+  $<INSTALL_INTERFACE:modules>)
+target_link_libraries(linsolve PUBLIC base prec)
+>>>>>>> cmake2


 include(${CMAKE_CURRENT_LIST_DIR}/ext/CMakeLists.txt)
@ -644,7 +664,11 @@ target_include_directories(cbind PUBLIC
  $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/modules>  # Path for building
  $<INSTALL_INTERFACE:modules>  # Path for installation
 )
+<<<<<<< HEAD
 target_link_libraries(cbind PUBLIC base prec krylov ext util)
+=======
+target_link_libraries(cbind PUBLIC base prec linsolve ext util)
+>>>>>>> cmake2


 # Custom command to copy all header files
@ -706,13 +730,21 @@ if(MPI_FOUND)
  endif()
    
    
+<<<<<<< HEAD
  foreach(lib base prec krylov ext util cbind)
+=======
+  foreach(lib base prec linsolve ext util cbind)
+>>>>>>> cmake2
    target_link_libraries(${lib} PUBLIC ${MPI_C_LIBRARIES} ${MPI_Fortran_LIBRARIES})
  endforeach()
 endif()

 if(OpenCoarrays_FOUND)
+<<<<<<< HEAD
  foreach(lib base prec krylov ext util cbind)  #TODO: check if cbind goes here!
+=======
+  foreach(lib base prec linsolve ext util cbind)  #TODO: check if cbind goes here!
+>>>>>>> cmake2
    target_link_libraries(${lib} PUBLIC OpenCoarrays::caf_mpi_static)
  endforeach()
 endif()
@ -731,7 +763,11 @@ install(DIRECTORY "${CMAKE_BINARY_DIR}/include" DESTINATION "include"
 install(DIRECTORY "${CMAKE_BINARY_DIR}/modules" DESTINATION "modules"
  FILES_MATCHING PATTERN "*.mod")

+<<<<<<< HEAD
 install(TARGETS base prec krylov ext util cbind
+=======
+install(TARGETS base prec linsolve ext util cbind
+>>>>>>> cmake2
  EXPORT ${CMAKE_PROJECT_NAME}-targets
  DESTINATION "${CMAKE_INSTALL_LIBDIR}"
  LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}"
--- a/Make.inc.in
+++ b/Make.inc.in
@ -67,6 +67,12 @@ UTILMODNAME=@UTILMODNAME@

 CBINDLIBNAME=libpsb_cbind.a

+OACCD=@OACCD@
+OACCLD=@OACCLD@
+FCOPENACC=@FCOPENACC@
+CCOPENACC=@CCOPENACC@
+CXXOPENACC=@CXXOPENACC@	
+
 CUDAD=@CUDAD@
 CUDALD=@CUDALD@
 LCUDA=@LCUDA@
@ -74,14 +80,18 @@ LCUDA=@LCUDA@
 SPGPU_LIBS=@SPGPU_LIBS@

 CUDA_DIR=@CUDA_DIR@
-CUDA_DEFINES=@CUDA_DEFINES@
 CUDA_INCLUDES=@CUDA_INCLUDES@
 CUDA_LIBS=@CUDA_LIBS@
 CUDA_VERSION=@CUDA_VERSION@
 CUDA_SHORT_VERSION=@CUDA_SHORT_VERSION@
+CUDA_DEFINES=@CUDA_DEFINES@
+FCUDEFINES=@FCUDEFINES@
+CCUDEFINES=@CCUDEFINES@
+CXXCUDEFINES=@CXXCUDEFINES@
 NVCC=@CUDA_NVCC@
 CUDEFINES=@CUDEFINES@

+
 .SUFFIXES: .cu
 .cu.o:
 	$(NVCC)   $(CINCLUDES) $(CDEFINES) $(CUDEFINES) -c $<
--- a/25
+++ b/25
@ -1,6 +1,6 @@
 include Make.inc

-all: dirs based precd kryld utild cbindd extd  $(CUDAD) libd
+all: dirs based precd linslvd utild cbindd extd  $(CUDAD) $(OACCD) libd
 	@echo "====================================="
 	@echo "PSBLAS libraries Compilation Successful."

@ -11,28 +11,31 @@ dirs:

 precd: based
 utild: based	
-kryld: precd 
+linslvd: precd 
 extd:  based
 cudad:  extd
-cbindd: based precd kryld utild 
+oaccd:  extd	
+cbindd: based precd linslvd utild 

-libd: based precd kryld utild cbindd extd $(CUDALD)
+libd: based precd linslvd utild cbindd extd $(CUDALD) $(OACCLD)
 	$(MAKE) -C base lib
 	$(MAKE) -C prec lib
-	$(MAKE) -C krylov lib
+	$(MAKE) -C linsolve lib
 	$(MAKE) -C util lib 
 	$(MAKE) -C cbind lib
 	$(MAKE) -C ext lib
 cudald:  cudad
 	$(MAKE) -C cuda lib
+oaccld:  oaccd
+	$(MAKE) -C openacc lib


 based:
 	$(MAKE) -C base objs
 precd:
 	$(MAKE) -C prec objs
-kryld:
-	$(MAKE) -C krylov objs
+linslvd:
+	$(MAKE) -C linsolve objs
 utild:
 	$(MAKE) -C util objs 
 cbindd:
@ -41,6 +44,8 @@ extd:   based
 	$(MAKE) -C ext objs
 cudad:   based extd
 	$(MAKE) -C cuda objs
+oaccd:   based extd
+	$(MAKE) -C openacc objs 


 install: all
@ -62,11 +67,12 @@ install: all
 clean: 
 	$(MAKE) -C base clean
 	$(MAKE) -C prec clean 
-	$(MAKE) -C krylov clean
+	$(MAKE) -C linsolve clean
 	$(MAKE) -C util clean
 	$(MAKE) -C cbind clean
 	$(MAKE) -C ext clean
 	$(MAKE) -C cuda clean
+	$(MAKE) -C openacc clean

 check: all
 	make check -C test/serial
@ -79,11 +85,12 @@ cleanlib:
 veryclean: cleanlib
 	cd base && $(MAKE) veryclean
 	cd prec && $(MAKE) veryclean 
-	cd krylov && $(MAKE) veryclean
+	cd linsolve && $(MAKE) veryclean
 	cd util && $(MAKE) veryclean
 	cd cbind && $(MAKE) veryclean
 	cd ext && $(MAKE) veryclean
 	cd cuda && $(MAKE) veryclean
+	cd openacc && $(MAKE) veryclean
 	cd test/fileread && $(MAKE) clean
 	cd test/pargen && $(MAKE) clean
 	cd test/util && $(MAKE) clean
--- a/README.md
+++ b/README.md
@ -1,37 +1,42 @@
-PSBLAS library, version 3.9
-===========================
+# PSBLAS library, version 3.9

-The architecture of the Fortran 2003 sparse BLAS is described in:
+
+The PSBLAS library, developed with the aim to facilitate the parallelization of computationally intensive scientific applications, is designed to address parallel implementation of iterative solvers for sparse linear systems through the distributed memory paradigm. It includes routines for multiplying sparse matrices by dense matrices, solving block diagonal systems with triangular diagonal entries, preprocessing sparse matrices, and contains additional routines for dense matrix operations. The current implementation of PSBLAS addresses a distributed memory execution model operating with message passing.
+
+The PSBLAS library version 3 is implemented in the Fortran 2008 programming language, with reuse and/or adaptation of existing Fortran 77 and Fortran 95 software, plus a handful of C routines. 
+
+## References
+
+
+The architecture, philosophy and implementation details of the library are contained in the following papers:
+
+- The architecture of the Fortran 2003 sparse BLAS is described in:
  >S. Filippone, A. Buttari. Object-Oriented Techniques for Sparse Matrix
  >Computations in Fortran 2003, ACM Trans. on Math. Software, vol. 38, No.
  4, 2012.

-The ideas are explored further with the paper:
+- The ideas are explored further with the paper:
  >V. Cardellini, S. Filippone and D. Rouson. Design Patterns for
  >sparse-matrix computations on hybrid CPU/GPU platforms, Scientific 
  >Programming, 22(2014), pp.1-19.

-Version 1.0 of the library is described in:
+- Version 1.0 of the library is described in:
  >S. Filippone, M. Colajanni. PSBLAS: A library for parallel linear
  >algebra computation on sparse matrices, ACM Trans. on Math. Software,
  >26(4), Dec. 2000, pp. 527-550.
+- The software infrastructure changes required to accommodate the implementation of the
+  Additive-Schwarz preconditioners available in [AMG4PSBLAS](https://github.com/sfilippone/amg4psblas/) are detailed in:
+  > A. Buttari, P. D'Ambra, D. di Serafino, S. Filippone, Extending PSBLAS to build parallel Schwarz preconditioners, Applied Parallel Computing. State of the Art in Scientific Computing: 7th International Workshop, PARA 2004, LNCS 3732, 2006, pp. 593-602.
  
+  > A. Buttari,  P. D'Ambra, D. Di Serafino, S. Filippone, 2LEV-D2P4: A package of high-performance preconditioners for scientific and engineering applications, Applicable Algebra in Engineering, Communications and Computing, 2007, 18(3), pp. 223-239.
  
-UTILITIES
---------
-The `test/util` directory contains some utilities to convert to/from
-Harwell-Boeing and MatrixMarket file formats.
+  > P. D'Ambra, D. Di Serafino, S. Filippone, MLD2P4: A package of parallel algebraic multilevel domain decomposition preconditioners in Fortran 95 ACM Transactions on Mathematical Software, 2010, 37(3), 30

+PSBLAS is the backbone of the Parallel Sparse Computation Toolkit ([PSCToolkit](https://psctoolkit.github.io/)) suite of libraries. See the paper:
+> D’Ambra, P., Durastante, F., & Filippone, S. (2023). Parallel Sparse Computation Toolkit. Software Impacts, 15, 100463.

-DOCUMENTATION
-------------
-See docs/psblas-3.9.pdf; an HTML version of the same document is
-available in docs/html. Please consult the sample programs, especially
-test/pargen/psb_[sd]_pde[23]d.f90
+### Other Software credits 

-
-OTHER SOFTWARE CREDITS 
----------------------
 We originally included a modified implementation of some of the Sparker
 (serial sparse BLAS)  material; this has been completely rewritten, way
 beyond the intention(s) and responsibilities of the original developers.
@ -40,18 +45,8 @@ The main reference for the serial sparse BLAS is:
 >linear algebra subprograms for sparse matrices: a user level interface,
 >ACM Trans. Math. Softw., 23(3), 379-401, 1997.

-CUDA and GPU support
--------------------
-This version of PSBLAS incorporates into a single package three
-entities that were previouslty separated:
-1. PSBLAS     -- the base library
-2. PSBLAS-EXT -- a library providing additional storage formats
-3. SPGPU      -- a package of kernels for NVIDIA GPUs originally
-   	      	 written by Davide Barbieri and Salvatore Filippone;
-		 see the license file cuda/License-spgpu.md
-
-INSTALLING
----------
+## Installing
+
 To compile and run our software you will need the following
 prerequisites (see also SERIAL below):

@ -71,7 +66,8 @@ prerequisites (see also SERIAL below):
   We use the C interface to AMD.

 5. If you have CUDA available, use
-   --with-cuda=<path>      to specify the CUDA toolkit location
+   --enable-cuda           to compile CUDA-enabled methods
+   --with-cudadir=<path>   to specify the CUDA toolkit location
   --with-cudacc=XX,YY,ZZ  to specify a list of target CCs (compute
   			   capabilities) to compile the CUDA code for.

@ -79,21 +75,27 @@ The configure script will generate a Make.inc file suitable for building
 the library. The script is capable of recognizing the needed libraries
 with their default names; if they are in unusual places consider adding
 the paths with `--with-libs`, or explicitly specifying the names in
-`--with-blas`, etc. Please note that a common way for the configure script
-to fail is to specify inconsistent MPI vs. plain compilers, either
-directly or indirectly via environment variables; e.g. specifying the
-Intel compiler with `FC=ifort` while at the same time having an 
-`MPIFC=mpif90` which points to GNU Fortran. The best way to avoid this
-situation is (in our opinion) to use the environment modules package
-(see http://modules.sourceforge.net/), and load the relevant
-variables with (e.g.) 
-```
-module load gnu46 openmpi
-```
-This will delegate to the modules setup to make sure that the version of
-openmpi in use is the one compiled with the gnu46 compilers. After the
-configure script has completed you can always tweak the Make.inc file
-yourself. 
+`--with-blas`, etc. 
+
+>[!CAUTION]
+> Please note that a common way for the configure script
+> to fail is to specify inconsistent MPI vs. plain compilers, either
+> directly or indirectly via environment variables; e.g. specifying the
+> Intel compiler with `FC=ifort` while at the same time having an 
+> `MPIFC=mpif90` which points to GNU Fortran. 
+
+>[!TIP]
+> The best way to avoid this
+> situation is (in our opinion) to use the environment modules package
+> (see [http://modules.sourceforge.net/](http://modules.sourceforge.net/)), and load the relevant
+> variables with (e.g.) 
+> ```
+> module load gcc/13.2.0 openmpi/4.1.6
+> ```
+> This will delegate to the modules setup to make sure that the version of
+> openmpi in use is the one compiled with the gnu46 compilers. After the
+> configure script has completed you can always tweak the Make.inc file
+> yourself.

 After you have Make.inc fixed,  run 
 ```
@ -105,58 +107,117 @@ install and the libraries will be installed under `/path/lib`, while the
 module files will be installed under `/path/modules`. The regular and
 experimental C interface header files are under `/path/include`.

-SERIAL
------
+### CUDA and GPU support
+
+This version of PSBLAS incorporates into a single package three
+entities that were previouslty separated:
+| Library |                    |
+|---------|--------------------|
+| PSBLAS  | the base library   |
+| PSBLAS-EXT | a library providing additional storage formats for matrices and vectors |
+| SPGPU      | a package of kernels for NVIDIA GPUs originally written by Davide Barbieri and Salvatore Filippone; see the license file [cuda/License-spgpu.md](cuda/License-spgpu.md) |
+
+### OpenACC
+There is a highly experimental version of an OpenACC interface,
+you can access it by speficifying
+```bash
+--enable-openacc  --with-extraopenacc="-foffload=nvptx-none=-march=sm_70"
+```
+where the argument to the extraopenacc option depends on the compiler
+you are using (the example shown here is relevant for the GNU
+compiler). 
+
+### Serial
+
 Configuring with `--enable-serial` will provide a fake MPI stub library
 that enables running in pure serial mode; no MPI installation is needed
 in this case (but note that the fake MPI stubs are only guaranteed to
 cover what we use internally, it's not a complete replacement). 

-INTEGER SIZES
-------------
+### Integers
+
 We have two kind of integers: IPK for local indices, and LPK for
 global indices. They can be specified independently at configure time,
 e.g.
+```bash
 --with-ipk=4 --with-lpk=8
+```
 which is asking for 4-bytes local indices, and 8-bytes global indices
 (this is the default). 


+## Documentation

-TODO
----
-Fix all reamining bugs. Bugs? We dont' have any ! ;-)
+Further information on installation and configuration can be found in the documentation.
+See [docs/psblas-3.9.pdf](docs/psblas-3.9.pdf); an HTML version of the same document is
+available in docs/html. Please consult the sample programs, especially
+- [test/pargen/psb_s_pde2d.F90](test/pargen/psb_s_pde2d.F90) [test/pargen/psb_d_pde2d.F90](test/pargen/psb_d_pde2d.F90)
+- [test/pargen/psb_s_pde2d.F90](test/pargen/psb_s_pde3d.F90) [test/pargen/psb_d_pde2d.F90](test/pargen/psb_d_pde3d.F90)
+
+which contain examples for the solution of linear systems obtained by the discretization of a generic second-order differential equation in two:
+```math
+- a_1 \frac{\partial^2 u}{\partial x^2} 
+- a_2 \frac{\partial^2 u}{\partial y^2} 
+ b_1 \frac{\partial u}{\partial x} 
+ b_2 \frac{\partial u}{\partial y} 
+ c u = f
+```
+or three
+```math
+- a_1 \frac{\partial^2 u}{\partial x^2} 
+- a_2 \frac{\partial^2 u}{\partial y^2} 
+- a_3 \frac{\partial^2 u}{\partial z^2} 
+ b_1 \frac{\partial u}{\partial x} 
+ b_2 \frac{\partial u}{\partial y} 
+ b_3 \frac{\partial u}{\partial z} 
+ c u = f
+```
+dimensions on the unit square/cube with Dirichlet boundary conditions.

+### Utilities

-The PSBLAS team. 
---------------
-Project lead:
-Salvatore Filippone
+The [test/util](test/util) directory contains some utilities to convert to/from
+Harwell-Boeing and MatrixMarket file formats.

-Contributors (roughly reverse cronological order):
+## TODO and bugs

-Dimitri    Walther
-Andea      Di Iorio
-Stefano    Petrilli
-Soren 	   Rasmussen
-Zaak       Beekman
-Ambra	   Abdullahi Hassan
-Pasqua	   D'Ambra
-Alfredo    Buttari
-Daniela    di Serafino
-Michele    Martone
-Michele    Colajanni
-Fabio      Cerioni
-Stefano    Maiolatesi
-Dario      Pascucci
+- [ ] Improving OpenACC support
+- [ ] Improving OpenMP support
+- [X] Fix all reamining bugs. Bugs? We dont' have any ! 🤓

+> [!NOTE]
+> To report bugs 🐛 or issues ❓ please use the [GitHub issue system](https://github.com/sfilippone/psblas3/issues).


-RELATED SOFTWARE
----------------
+
+## The PSBLAS team. 
+**Project lead:**
+Salvatore Filippone
+
+**Contributors** (_roughly reverse cronological order_):
+
+- Theophane  Loloum
+- Fabio      Durastante
+- Dimitri    Walther
+- Andea      Di Iorio
+- Stefano    Petrilli
+- Soren 	   Rasmussen
+- Zaak       Beekman
+- Ambra	   Abdullahi Hassan
+- Pasqua	   D'Ambra
+- Alfredo    Buttari
+- Daniela    di Serafino
+- Michele    Martone
+- Michele    Colajanni
+- Fabio      Cerioni
+- Stefano    Maiolatesi
+- Dario      Pascucci
+
+
+## RELATED SOFTWARE
 If you are looking for more sophisticated preconditioners, you may be
 interested in the package AMG4PSBLAS from
-<http://github.com/sfilippone/amg4psblas>
+<http://github.com/sfilippone/amg4psblas> and the whole [PSCTooolkit suite](https://psctoolkit.github.io/).


 Contact: <https://github.com/sfilippone/psblas3>
--- a/base/modules/Makefile
+++ b/base/modules/Makefile
@ -10,13 +10,14 @@ BASIC_MODS= psb_const_mod.o psb_cbind_const_mod.o psb_error_mod.o psb_realloc_mo
 	auxil/psb_z_realloc_mod.o

 COMMINT= penv/psi_penv_mod.o \
-	 penv/psi_p2p_mod.o penv/psi_m_p2p_mod.o \
+	 penv/psi_p2p_mod.o penv/psi_m_p2p_mod.o penv/psi_i2_p2p_mod.o \
 	 penv/psi_e_p2p_mod.o \
 	 penv/psi_s_p2p_mod.o \
 	 penv/psi_d_p2p_mod.o \
 	 penv/psi_c_p2p_mod.o \
 	 penv/psi_z_p2p_mod.o \
 	 penv/psi_collective_mod.o \
+	 penv/psi_i2_collective_mod.o \
 	 penv/psi_e_collective_mod.o \
 	 penv/psi_m_collective_mod.o \
 	 penv/psi_s_collective_mod.o \
@ -149,23 +150,25 @@ psb_realloc_mod.o: auxil/psb_m_realloc_mod.o \
 	auxil/psb_c_realloc_mod.o \
 	auxil/psb_z_realloc_mod.o

-penv/psi_p2p_mod.o: penv/psi_m_p2p_mod.o \
+penv/psi_p2p_mod.o:  penv/psi_i2_p2p_mod.o \
+	penv/psi_m_p2p_mod.o \
 	penv/psi_e_p2p_mod.o \
 	penv/psi_s_p2p_mod.o \
 	penv/psi_d_p2p_mod.o \
 	penv/psi_c_p2p_mod.o \
 	penv/psi_z_p2p_mod.o
-penv/psi_collective_mod.o: penv/psi_e_collective_mod.o \
+penv/psi_collective_mod.o: penv/psi_i2_collective_mod.o \
+	penv/psi_e_collective_mod.o \
 	penv/psi_m_collective_mod.o \
 	penv/psi_s_collective_mod.o \
 	penv/psi_d_collective_mod.o \
 	penv/psi_c_collective_mod.o \
 	penv/psi_z_collective_mod.o 

-penv/psi_m_p2p_mod.o penv/psi_e_p2p_mod.o  penv/psi_s_p2p_mod.o \
+penv/psi_i2_p2p_mod.o penv/psi_m_p2p_mod.o penv/psi_e_p2p_mod.o  penv/psi_s_p2p_mod.o \
 penv/psi_d_p2p_mod.o penv/psi_c_p2p_mod.o  penv/psi_z_p2p_mod.o: penv/psi_penv_mod.o 

-penv/psi_e_collective_mod.o  penv/psi_m_collective_mod.o penv/psi_s_collective_mod.o \
+penv/psi_i2_collective_mod.o  penv/psi_e_collective_mod.o  penv/psi_m_collective_mod.o penv/psi_s_collective_mod.o \
 penv/psi_d_collective_mod.o  penv/psi_c_collective_mod.o penv/psi_z_collective_mod.o: penv/psi_penv_mod.o \
 	penv/psi_m_p2p_mod.o penv/psi_e_p2p_mod.o  penv/psi_s_p2p_mod.o \
 	penv/psi_d_p2p_mod.o penv/psi_c_p2p_mod.o  penv/psi_z_p2p_mod.o
--- a/base/modules/desc/psb_desc_const_mod.f90
+++ b/base/modules/desc/psb_desc_const_mod.f90
@ -35,7 +35,7 @@
 !    Auxiliary module for descriptor: constant values. 
 !
 module psb_desc_const_mod
-  use psb_const_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_
+  use psb_const_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_, psb_i2pk_
  !
  !     Communication, prolongation & restriction
  !
--- a/base/modules/penv/psi_penv_mod.F90
+++ b/base/modules/penv/psi_penv_mod.F90
@ -167,6 +167,7 @@ module psi_penv_mod

 #else 

+  integer(psb_mpk_), save :: mpi_i2amx_op, mpi_i2amn_op
  integer(psb_mpk_), save :: mpi_iamx_op, mpi_iamn_op
  integer(psb_mpk_), save :: mpi_mamx_op, mpi_mamn_op
  integer(psb_mpk_), save :: mpi_eamx_op, mpi_eamn_op
@ -181,6 +182,7 @@ module psi_penv_mod
 #endif

  private :: psi_get_sizes,  psi_register_mpi_extras
+  private :: psi_i2amx_op, psi_i2amn_op
  private :: psi_iamx_op, psi_iamn_op 
  private :: psi_mamx_op, psi_mamn_op 
  private :: psi_eamx_op, psi_eamn_op 
@ -773,6 +775,8 @@ contains

 #if defined(SERIAL_MPI)
 #else 
+    if (info == 0) call mpi_op_create(psi_i2amx_op,.true.,mpi_i2amx_op,info)
+    if (info == 0) call mpi_op_create(psi_i2amn_op,.true.,mpi_i2amn_op,info)
    if (info == 0) call mpi_op_create(psi_mamx_op,.true.,mpi_mamx_op,info)
    if (info == 0) call mpi_op_create(psi_mamn_op,.true.,mpi_mamn_op,info)
    if (info == 0) call mpi_op_create(psi_eamx_op,.true.,mpi_eamx_op,info)
@ -997,6 +1001,8 @@ contains
           & call mpi_comm_Free(ctxt%ctxt,info)
    end if
    if (close_) then 
+      if (info == 0) call mpi_op_free(mpi_i2amx_op,info)
+      if (info == 0) call mpi_op_free(mpi_i2amn_op,info)
      if (info == 0) call mpi_op_free(mpi_mamx_op,info)
      if (info == 0) call mpi_op_free(mpi_mamn_op,info)
      if (info == 0) call mpi_op_free(mpi_eamx_op,info)
@ -1188,6 +1194,26 @@ contains
  ! Note: len & type are always default integer.
  !
  ! !!!!!!!!!!!!!!!!!!!!!!
+  subroutine psi_i2amx_op(inv, outv,len,type) 
+    integer(psb_i2pk_) :: inv(len), outv(len)
+    integer(psb_mpk_) :: len,type
+    integer(psb_mpk_) :: i
+
+    do i=1, len
+      if (abs(inv(i)) > abs(outv(i))) outv(i) = inv(i)
+    end do
+  end subroutine psi_i2amx_op
+  
+  subroutine psi_i2amn_op(inv, outv,len,type) 
+    integer(psb_i2pk_) :: inv(len), outv(len)
+    integer(psb_mpk_) :: len,type
+    integer(psb_mpk_) :: i
+
+    do i=1, len
+      if (abs(inv(i)) < abs(outv(i))) outv(i) = inv(i)
+    end do
+  end subroutine psi_i2amn_op
+
  subroutine psi_mamx_op(inv, outv,len,type) 
    integer(psb_mpk_) :: inv(len), outv(len)
    integer(psb_mpk_) :: len,type
--- a/base/serial/impl/psb_c_coo_impl.F90
+++ b/base/serial/impl/psb_c_coo_impl.F90
@ -600,7 +600,8 @@ subroutine  psb_c_coo_clean_zeros(a, info)
  nzin = a%get_nzeros()
  j = 0
  do i=1, nzin
-    if (a%val(i) /= czero) then
+    ! Always keep the diagonal, even if numerically zero
+    if ((a%val(i) /= czero).or.(a%ia(i) == a%ja(i))) then
      j = j + 1
      a%val(j) = a%val(i)
      a%ia(j)  = a%ia(i)
@ -5931,7 +5932,8 @@ subroutine  psb_lc_coo_clean_zeros(a, info)
  nzin = a%get_nzeros()
  j = 0
  do i=1, nzin
-    if (a%val(i) /= czero) then
+    ! Always keep the diagonal, even if numerically zero
+    if ((a%val(i) /= czero).or.(a%ia(i) == a%ja(i))) then 
      j = j + 1
      a%val(j) = a%val(i)
      a%ia(j)  = a%ia(i)
--- a/base/serial/impl/psb_c_csc_impl.F90
+++ b/base/serial/impl/psb_c_csc_impl.F90
@ -2421,7 +2421,8 @@ subroutine  psb_c_csc_clean_zeros(a, info)
  j        = a%icp(1)
  do i=1, nc
    do k = ilcp(i), ilcp(i+1) -1
-      if (a%val(k) /= czero) then
+      ! Always keep the diagonal, even if numerically zero
+      if ((a%val(k) /= czero).or.(i == a%ia(k))) then
        a%val(j) = a%val(k)
        a%ia(j)  = a%ia(k)
        j = j + 1
@ -4322,7 +4323,8 @@ subroutine  psb_lc_csc_clean_zeros(a, info)
  j        = a%icp(1)
  do i=1, nc
    do k = ilcp(i), ilcp(i+1) -1
-      if (a%val(k) /= czero) then
+      ! Always keep the diagonal, even if numerically zero
+      if ((a%val(k) /= czero).or.(i == a%ia(k))) then
        a%val(j) = a%val(k)
        a%ia(j)  = a%ia(k)
        j = j + 1
--- a/base/serial/impl/psb_c_csr_impl.F90
+++ b/base/serial/impl/psb_c_csr_impl.F90
@ -3642,7 +3642,8 @@ subroutine  psb_c_csr_clean_zeros(a, info)
  j        = a%irp(1)
  do i=1, nr
    do k = ilrp(i), ilrp(i+1) -1
-      if (a%val(k) /= czero) then
+      ! Always keep the diagonal, even if numerically zero
+      if ((a%val(k) /= czero).or.(i == a%ja(k))) then
        a%val(j) = a%val(k)
        a%ja(j)  = a%ja(k)
        j = j + 1
@ -6561,7 +6562,8 @@ subroutine  psb_lc_csr_clean_zeros(a, info)
  j        = a%irp(1)
  do i=1, nr
    do k = ilrp(i), ilrp(i+1) -1
-      if (a%val(k) /= czero) then
+      ! Always keep the diagonal, even if numerically zero
+      if ((a%val(k) /= czero).or.(i == a%ja(k))) then 
        a%val(j) = a%val(k)
        a%ja(j)  = a%ja(k)
        j = j + 1
--- a/base/serial/impl/psb_d_coo_impl.F90
+++ b/base/serial/impl/psb_d_coo_impl.F90
@ -600,7 +600,8 @@ subroutine  psb_d_coo_clean_zeros(a, info)
  nzin = a%get_nzeros()
  j = 0
  do i=1, nzin
-    if (a%val(i) /= dzero) then
+    ! Always keep the diagonal, even if numerically zero
+    if ((a%val(i) /= dzero).or.(a%ia(i) == a%ja(i))) then
      j = j + 1
      a%val(j) = a%val(i)
      a%ia(j)  = a%ia(i)
@ -5931,7 +5932,8 @@ subroutine  psb_ld_coo_clean_zeros(a, info)
  nzin = a%get_nzeros()
  j = 0
  do i=1, nzin
-    if (a%val(i) /= dzero) then
+    ! Always keep the diagonal, even if numerically zero
+    if ((a%val(i) /= dzero).or.(a%ia(i) == a%ja(i))) then 
      j = j + 1
      a%val(j) = a%val(i)
      a%ia(j)  = a%ia(i)
--- a/base/serial/impl/psb_d_csc_impl.F90
+++ b/base/serial/impl/psb_d_csc_impl.F90
@ -2421,7 +2421,8 @@ subroutine  psb_d_csc_clean_zeros(a, info)
  j        = a%icp(1)
  do i=1, nc
    do k = ilcp(i), ilcp(i+1) -1
-      if (a%val(k) /= dzero) then
+      ! Always keep the diagonal, even if numerically zero
+      if ((a%val(k) /= dzero).or.(i == a%ia(k))) then
        a%val(j) = a%val(k)
        a%ia(j)  = a%ia(k)
        j = j + 1
@ -4322,7 +4323,8 @@ subroutine  psb_ld_csc_clean_zeros(a, info)
  j        = a%icp(1)
  do i=1, nc
    do k = ilcp(i), ilcp(i+1) -1
-      if (a%val(k) /= dzero) then
+      ! Always keep the diagonal, even if numerically zero
+      if ((a%val(k) /= dzero).or.(i == a%ia(k))) then
        a%val(j) = a%val(k)
        a%ia(j)  = a%ia(k)
        j = j + 1
--- a/base/serial/impl/psb_d_csr_impl.F90
+++ b/base/serial/impl/psb_d_csr_impl.F90
@ -3642,7 +3642,8 @@ subroutine  psb_d_csr_clean_zeros(a, info)
  j        = a%irp(1)
  do i=1, nr
    do k = ilrp(i), ilrp(i+1) -1
-      if (a%val(k) /= dzero) then
+      ! Always keep the diagonal, even if numerically zero
+      if ((a%val(k) /= dzero).or.(i == a%ja(k))) then
        a%val(j) = a%val(k)
        a%ja(j)  = a%ja(k)
        j = j + 1
@ -6561,7 +6562,8 @@ subroutine  psb_ld_csr_clean_zeros(a, info)
  j        = a%irp(1)
  do i=1, nr
    do k = ilrp(i), ilrp(i+1) -1
-      if (a%val(k) /= dzero) then
+      ! Always keep the diagonal, even if numerically zero
+      if ((a%val(k) /= dzero).or.(i == a%ja(k))) then 
        a%val(j) = a%val(k)
        a%ja(j)  = a%ja(k)
        j = j + 1
--- a/base/serial/impl/psb_s_coo_impl.F90
+++ b/base/serial/impl/psb_s_coo_impl.F90
@ -600,7 +600,8 @@ subroutine  psb_s_coo_clean_zeros(a, info)
  nzin = a%get_nzeros()
  j = 0
  do i=1, nzin
-    if (a%val(i) /= szero) then
+    ! Always keep the diagonal, even if numerically zero
+    if ((a%val(i) /= szero).or.(a%ia(i) == a%ja(i))) then
      j = j + 1
      a%val(j) = a%val(i)
      a%ia(j)  = a%ia(i)
@ -5931,7 +5932,8 @@ subroutine  psb_ls_coo_clean_zeros(a, info)
  nzin = a%get_nzeros()
  j = 0
  do i=1, nzin
-    if (a%val(i) /= szero) then
+    ! Always keep the diagonal, even if numerically zero
+    if ((a%val(i) /= szero).or.(a%ia(i) == a%ja(i))) then 
      j = j + 1
      a%val(j) = a%val(i)
      a%ia(j)  = a%ia(i)
--- a/base/serial/impl/psb_s_csc_impl.F90
+++ b/base/serial/impl/psb_s_csc_impl.F90
@ -2421,7 +2421,8 @@ subroutine  psb_s_csc_clean_zeros(a, info)
  j        = a%icp(1)
  do i=1, nc
    do k = ilcp(i), ilcp(i+1) -1
-      if (a%val(k) /= szero) then
+      ! Always keep the diagonal, even if numerically zero
+      if ((a%val(k) /= szero).or.(i == a%ia(k))) then
        a%val(j) = a%val(k)
        a%ia(j)  = a%ia(k)
        j = j + 1
@ -4322,7 +4323,8 @@ subroutine  psb_ls_csc_clean_zeros(a, info)
  j        = a%icp(1)
  do i=1, nc
    do k = ilcp(i), ilcp(i+1) -1
-      if (a%val(k) /= szero) then
+      ! Always keep the diagonal, even if numerically zero
+      if ((a%val(k) /= szero).or.(i == a%ia(k))) then
        a%val(j) = a%val(k)
        a%ia(j)  = a%ia(k)
        j = j + 1
--- a/base/serial/impl/psb_s_csr_impl.F90
+++ b/base/serial/impl/psb_s_csr_impl.F90
@ -3642,7 +3642,8 @@ subroutine  psb_s_csr_clean_zeros(a, info)
  j        = a%irp(1)
  do i=1, nr
    do k = ilrp(i), ilrp(i+1) -1
-      if (a%val(k) /= szero) then
+      ! Always keep the diagonal, even if numerically zero
+      if ((a%val(k) /= szero).or.(i == a%ja(k))) then
        a%val(j) = a%val(k)
        a%ja(j)  = a%ja(k)
        j = j + 1
@ -6561,7 +6562,8 @@ subroutine  psb_ls_csr_clean_zeros(a, info)
  j        = a%irp(1)
  do i=1, nr
    do k = ilrp(i), ilrp(i+1) -1
-      if (a%val(k) /= szero) then
+      ! Always keep the diagonal, even if numerically zero
+      if ((a%val(k) /= szero).or.(i == a%ja(k))) then 
        a%val(j) = a%val(k)
        a%ja(j)  = a%ja(k)
        j = j + 1
--- a/base/serial/impl/psb_z_coo_impl.F90
+++ b/base/serial/impl/psb_z_coo_impl.F90
@ -600,7 +600,8 @@ subroutine  psb_z_coo_clean_zeros(a, info)
  nzin = a%get_nzeros()
  j = 0
  do i=1, nzin
-    if (a%val(i) /= zzero) then
+    ! Always keep the diagonal, even if numerically zero
+    if ((a%val(i) /= zzero).or.(a%ia(i) == a%ja(i))) then
      j = j + 1
      a%val(j) = a%val(i)
      a%ia(j)  = a%ia(i)
@ -5931,7 +5932,8 @@ subroutine  psb_lz_coo_clean_zeros(a, info)
  nzin = a%get_nzeros()
  j = 0
  do i=1, nzin
-    if (a%val(i) /= zzero) then
+    ! Always keep the diagonal, even if numerically zero
+    if ((a%val(i) /= zzero).or.(a%ia(i) == a%ja(i))) then 
      j = j + 1
      a%val(j) = a%val(i)
      a%ia(j)  = a%ia(i)
--- a/base/serial/impl/psb_z_csc_impl.F90
+++ b/base/serial/impl/psb_z_csc_impl.F90
@ -2421,7 +2421,8 @@ subroutine  psb_z_csc_clean_zeros(a, info)
  j        = a%icp(1)
  do i=1, nc
    do k = ilcp(i), ilcp(i+1) -1
-      if (a%val(k) /= zzero) then
+      ! Always keep the diagonal, even if numerically zero
+      if ((a%val(k) /= zzero).or.(i == a%ia(k))) then
        a%val(j) = a%val(k)
        a%ia(j)  = a%ia(k)
        j = j + 1
@ -4322,7 +4323,8 @@ subroutine  psb_lz_csc_clean_zeros(a, info)
  j        = a%icp(1)
  do i=1, nc
    do k = ilcp(i), ilcp(i+1) -1
-      if (a%val(k) /= zzero) then
+      ! Always keep the diagonal, even if numerically zero
+      if ((a%val(k) /= zzero).or.(i == a%ia(k))) then
        a%val(j) = a%val(k)
        a%ia(j)  = a%ia(k)
        j = j + 1
--- a/base/serial/impl/psb_z_csr_impl.F90
+++ b/base/serial/impl/psb_z_csr_impl.F90
@ -3642,7 +3642,8 @@ subroutine  psb_z_csr_clean_zeros(a, info)
  j        = a%irp(1)
  do i=1, nr
    do k = ilrp(i), ilrp(i+1) -1
-      if (a%val(k) /= zzero) then
+      ! Always keep the diagonal, even if numerically zero
+      if ((a%val(k) /= zzero).or.(i == a%ja(k))) then
        a%val(j) = a%val(k)
        a%ja(j)  = a%ja(k)
        j = j + 1
@ -6561,7 +6562,8 @@ subroutine  psb_lz_csr_clean_zeros(a, info)
  j        = a%irp(1)
  do i=1, nr
    do k = ilrp(i), ilrp(i+1) -1
-      if (a%val(k) /= zzero) then
+      ! Always keep the diagonal, even if numerically zero
+      if ((a%val(k) /= zzero).or.(i == a%ja(k))) then 
        a%val(j) = a%val(k)
        a%ja(j)  = a%ja(k)
        j = j + 1
--- a/base/serial/psb_cgeprt.f90
+++ b/base/serial/psb_cgeprt.f90
@ -29,7 +29,7 @@
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
 !    
-! File:  psb_scsprt.f90 
+! File:  psb_geprt.f90 
 ! Subroutine: 
 ! Arguments:

@ -131,6 +131,7 @@ subroutine psb_cgeprt2(iout,a,head)
  ncol = size(a,2) 
  write(iout,*) nrow,ncol

+
  write(frmtv,'(a,i3.3,a)') '(',ncol,'2(es26.18,1x))'

  do i=1,nrow
@ -161,7 +162,7 @@ subroutine psb_cgeprt1(iout,a,head)
  write(iout,'(a)') '% '
  nrow = size(a,1) 
  ncol = 1
-  write(iout,*) nrow
+  write(iout,*) nrow,ncol

  write(frmtv,'(a,i3.3,a)') '(',ncol,'2(es26.18,1x))'

--- a/base/serial/psb_dgeprt.f90
+++ b/base/serial/psb_dgeprt.f90
@ -29,7 +29,7 @@
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
 !    
-! File:  psb_dcsprt.f90 
+! File:  psb_geprt.f90 
 ! Subroutine: 
 ! Arguments:

@ -124,13 +124,14 @@ subroutine psb_dgeprt2(iout,a,head)
  character(len=80)              :: frmtv 
  integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nrow, ncol

-  write(iout,'(a)') '%%MatrixMarket matrix array real general'
+  write(iout,'(a)') '%%MatrixMarket matrix array complex general'
  write(iout,'(a)') '% '//trim(head)
  write(iout,'(a)') '% '
  nrow = size(a,1) 
  ncol = size(a,2) 
  write(iout,*) nrow,ncol

+
  write(frmtv,'(a,i3.3,a)') '(',ncol,'(es26.18,1x))'

  do i=1,nrow
@ -156,12 +157,12 @@ subroutine psb_dgeprt1(iout,a,head)
  character(len=80)              :: frmtv 
  integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nrow, ncol

-  write(iout,'(a)') '%%MatrixMarket matrix array real general'
+  write(iout,'(a)') '%%MatrixMarket matrix array complex general'
  write(iout,'(a)') '% '//trim(head)
  write(iout,'(a)') '% '
  nrow = size(a,1) 
  ncol = 1
-  write(iout,*) nrow
+  write(iout,*) nrow,ncol

  write(frmtv,'(a,i3.3,a)') '(',ncol,'(es26.18,1x))'

--- a/base/serial/psb_sgeprt.f90
+++ b/base/serial/psb_sgeprt.f90
@ -29,7 +29,7 @@
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
 !    
-! File:  psb_scsprt.f90 
+! File:  psb_geprt.f90 
 ! Subroutine: 
 ! Arguments:

@ -124,13 +124,14 @@ subroutine psb_sgeprt2(iout,a,head)
  character(len=80)              :: frmtv 
  integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nrow, ncol

-  write(iout,'(a)') '%%MatrixMarket matrix array real general'
+  write(iout,'(a)') '%%MatrixMarket matrix array complex general'
  write(iout,'(a)') '% '//trim(head)
  write(iout,'(a)') '% '
  nrow = size(a,1) 
  ncol = size(a,2) 
  write(iout,*) nrow,ncol

+
  write(frmtv,'(a,i3.3,a)') '(',ncol,'(es26.18,1x))'

  do i=1,nrow
@ -156,12 +157,12 @@ subroutine psb_sgeprt1(iout,a,head)
  character(len=80)              :: frmtv 
  integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nrow, ncol

-  write(iout,'(a)') '%%MatrixMarket matrix array real general'
+  write(iout,'(a)') '%%MatrixMarket matrix array complex general'
  write(iout,'(a)') '% '//trim(head)
  write(iout,'(a)') '% '
  nrow = size(a,1) 
  ncol = 1
-  write(iout,*) nrow
+  write(iout,*) nrow,ncol

  write(frmtv,'(a,i3.3,a)') '(',ncol,'(es26.18,1x))'

--- a/base/serial/psb_zgeprt.f90
+++ b/base/serial/psb_zgeprt.f90
@ -29,7 +29,7 @@
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
 !    
-! File:  psb_scsprt.f90 
+! File:  psb_geprt.f90 
 ! Subroutine: 
 ! Arguments:

@ -131,6 +131,7 @@ subroutine psb_zgeprt2(iout,a,head)
  ncol = size(a,2) 
  write(iout,*) nrow,ncol

+
  write(frmtv,'(a,i3.3,a)') '(',ncol,'2(es26.18,1x))'

  do i=1,nrow
@ -161,7 +162,7 @@ subroutine psb_zgeprt1(iout,a,head)
  write(iout,'(a)') '% '
  nrow = size(a,1) 
  ncol = 1
-  write(iout,*) nrow
+  write(iout,*) nrow,ncol

  write(frmtv,'(a,i3.3,a)') '(',ncol,'2(es26.18,1x))'

--- a/cbind/krylov/psb_ckrylov_cbind_mod.f90
+++ b/cbind/krylov/psb_ckrylov_cbind_mod.f90
@ -8,7 +8,7 @@ contains
       & ah,ph,bh,xh,cdh,options) bind(c) result(res)
    use psb_base_mod
    use psb_prec_mod
-    use psb_krylov_mod
+    use psb_linsolve_mod
    use psb_objhandle_mod
    use psb_prec_cbind_mod
    use psb_base_string_cbind_mod
@ -33,7 +33,7 @@ contains
       & ah,ph,bh,xh,eps,cdh,itmax,iter,err,itrace,irst,istop) bind(c) result(res)
    use psb_base_mod
    use psb_prec_mod
-    use psb_krylov_mod
+    use psb_linsolve_mod
    use psb_objhandle_mod
    use psb_prec_cbind_mod
    use psb_base_string_cbind_mod
--- a/cbind/krylov/psb_dkrylov_cbind_mod.f90
+++ b/cbind/krylov/psb_dkrylov_cbind_mod.f90
@ -8,7 +8,7 @@ contains
       & ah,ph,bh,xh,cdh,options) bind(c) result(res)
    use psb_base_mod
    use psb_prec_mod
-    use psb_krylov_mod
+    use psb_linsolve_mod
    use psb_objhandle_mod
    use psb_prec_cbind_mod
    use psb_base_string_cbind_mod
@ -33,7 +33,7 @@ contains
       & ah,ph,bh,xh,eps,cdh,itmax,iter,err,itrace,irst,istop) bind(c) result(res)
    use psb_base_mod
    use psb_prec_mod
-    use psb_krylov_mod
+    use psb_linsolve_mod
    use psb_objhandle_mod
    use psb_prec_cbind_mod
    use psb_base_string_cbind_mod
--- a/cbind/krylov/psb_skrylov_cbind_mod.f90
+++ b/cbind/krylov/psb_skrylov_cbind_mod.f90
@ -8,7 +8,7 @@ contains
       & ah,ph,bh,xh,cdh,options) bind(c) result(res)
    use psb_base_mod
    use psb_prec_mod
-    use psb_krylov_mod
+    use psb_linsolve_mod
    use psb_objhandle_mod
    use psb_prec_cbind_mod
    use psb_base_string_cbind_mod
@ -33,7 +33,7 @@ contains
       & ah,ph,bh,xh,eps,cdh,itmax,iter,err,itrace,irst,istop) bind(c) result(res)
    use psb_base_mod
    use psb_prec_mod
-    use psb_krylov_mod
+    use psb_linsolve_mod
    use psb_objhandle_mod
    use psb_prec_cbind_mod
    use psb_base_string_cbind_mod
--- a/cbind/krylov/psb_zkrylov_cbind_mod.f90
+++ b/cbind/krylov/psb_zkrylov_cbind_mod.f90
@ -8,7 +8,7 @@ contains
       & ah,ph,bh,xh,cdh,options) bind(c) result(res)
    use psb_base_mod
    use psb_prec_mod
-    use psb_krylov_mod
+    use psb_linsolve_mod
    use psb_objhandle_mod
    use psb_prec_cbind_mod
    use psb_base_string_cbind_mod
@ -33,7 +33,7 @@ contains
       & ah,ph,bh,xh,eps,cdh,itmax,iter,err,itrace,irst,istop) bind(c) result(res)
    use psb_base_mod
    use psb_prec_mod
-    use psb_krylov_mod
+    use psb_linsolve_mod
    use psb_objhandle_mod
    use psb_prec_cbind_mod
    use psb_base_string_cbind_mod
--- a/cbind/test/pargen/Makefile
+++ b/cbind/test/pargen/Makefile
@ -9,7 +9,7 @@ FINCLUDES=$(FMFLAG). $(FMFLAG)$(HERE) $(FMFLAG)$(MODDIR)
 CINCLUDES=-I. -I$(HERE) -I$(INCLUDEDIR)

 PSBC_LIBS= -L$(LIBDIR) -lpsb_cbind
-PSB_LIBS=-lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base -L$(LIBDIR)
+PSB_LIBS=-lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base -L$(LIBDIR)

 #
 # Compilers and such
--- a/config/ax_c_openacc.m4
+++ b/config/ax_c_openacc.m4
@ -0,0 +1,104 @@
+# AC_OPENACC
+# ---------
+# Check which options need to be passed to the C compiler to support Openacc.
+# Set the OPENACC_CFLAGS / OPENACC_CXXFLAGS / OPENACC_FFLAGS variable to these
+# options.
+# The options are necessary at compile time (so the #pragmas are understood)
+# and at link time (so the appropriate library is linked with).
+# This macro takes care to not produce redundant options if $CC $CFLAGS already
+# supports Openacc.
+#
+# For each candidate option, we do a compile test first, then a link test;
+# if the compile test succeeds but the link test fails, that means we have
+# found the correct option but it doesn't work because the libraries are
+# broken.  (This can happen, for instance, with SunPRO C and a bad combination
+# of operating system patches.)
+#
+# Several of the options in our candidate list can be misinterpreted by
+# compilers that don't use them to activate Openacc support; for example,
+# many compilers understand "-openacc" to mean "write output to a file
+# named 'penmp'" rather than "enable Openacc".  We can't completely avoid
+# the possibility of clobbering files named 'penmp' or 'mp' in configure's
+# working directory; therefore, this macro will bomb out if any such file
+# already exists when it's invoked.
+AC_DEFUN([AX_C_OPENACC],
+[AC_REQUIRE([_AX_OPENACC_SAFE_WD])]dnl
+[AC_ARG_ENABLE([openacc],
+   [AS_HELP_STRING([--disable-openacc], [do not use Openacc])])]dnl
+[
+  OPENACC_[]_AC_LANG_PREFIX[]FLAGS=
+  if test "$enable_openacc" != no; then
+      AC_LANG_PUSH([C])
+      AC_CACHE_CHECK([for $[]_AC_CC[] option to support Openacc],
+      [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc],
+      [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='not found'
+      dnl Try these flags:
+      dnl   (on by default)      ''
+      dnl   GCC >= 4.2           -fopenacc
+      dnl   SunPRO C             -xopenacc
+      dnl   Intel C              -openacc
+      dnl   SGI C, PGI C         -mp
+      dnl   Tru64 Compaq C       -omp
+      dnl   IBM XL C (AIX, Linux) -qsmp=omp
+      dnl   Cray CCE             -homp
+      dnl   NEC SX               -Popenacc
+      dnl   Lahey Fortran (Linux)  --openacc
+      for ac_option in '' -fopenacc -openacc -acc; do
+
+        ac_save_[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS
+        _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $ac_option"
+        AC_COMPILE_IFELSE([
+#ifndef _OPENACC
+#error "OpenACC not supported"
+#endif
+#include <openacc.h>
+  int main (void) { acc_init (0); return 0;}
+],
+          [AC_LINK_IFELSE([
+#ifndef _OPENACC
+#error "OpenACC not supported"
+#endif
+#include <openacc.h>
+ int main (void) { acc_init (0); return 0;}
+],
+            [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc=$ac_option],
+            [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'])])
+        _AC_LANG_PREFIX[]FLAGS=$ac_save_[]_AC_LANG_PREFIX[]FLAGS
+
+        if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'not found'; then
+          break
+        fi
+      done
+      if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = 'not found'; then
+        ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'
+      elif test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = ''; then
+        ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='none needed'
+      fi
+      dnl _AX_OPENACC_SAFE_WD checked that these files did not exist before we
+      dnl started probing for Openacc support, so if they exist now, they were
+      dnl created by the probe loop and it's safe to delete them.
+      rm -f penmp mp])
+    if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'unsupported' && \
+       test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'none needed'; then
+      OPENACC_[]_AC_LANG_PREFIX[]FLAGS="$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc"
+    fi
+   AC_LANG_POP([C])
+  fi
+])
+
+# _AC_OPENACC_SAFE_WD
+# ------------------
+# AC_REQUIREd by AC_OPENACC.  Checks both at autoconf time and at
+# configure time for files that AC_OPENACC clobbers.
+AC_DEFUN([_AX_OPENACC_SAFE_WD],
+[m4_syscmd([test ! -e penmp && test ! -e mp])]dnl
+[m4_if(sysval, [0], [], [m4_fatal(m4_normalize(
+  [AX_OPENACC clobbers files named 'mp' and 'penmp'.
+   To use AX_OPENACC you must not have either of these files
+   at the top level of your source tree.]))])]dnl
+[if test -e penmp || test -e mp; then
+  AC_MSG_ERROR(m4_normalize(
+    [AX@&t@_OPENACC clobbers files named 'mp' and 'penmp'.
+     Aborting configure because one of these files already exists.]))
+fi])
+
--- a/config/ax_cxx_openacc.m4
+++ b/config/ax_cxx_openacc.m4
@ -0,0 +1,104 @@
+# AC_OPENACC
+# ---------
+# Check which options need to be passed to the C compiler to support Openacc.
+# Set the OPENACC_CFLAGS / OPENACC_CXXFLAGS / OPENACC_FFLAGS variable to these
+# options.
+# The options are necessary at compile time (so the #pragmas are understood)
+# and at link time (so the appropriate library is linked with).
+# This macro takes care to not produce redundant options if $CC $CFLAGS already
+# supports Openacc.
+#
+# For each candidate option, we do a compile test first, then a link test;
+# if the compile test succeeds but the link test fails, that means we have
+# found the correct option but it doesn't work because the libraries are
+# broken.  (This can happen, for instance, with SunPRO C and a bad combination
+# of operating system patches.)
+#
+# Several of the options in our candidate list can be misinterpreted by
+# compilers that don't use them to activate Openacc support; for example,
+# many compilers understand "-openacc" to mean "write output to a file
+# named 'penmp'" rather than "enable Openacc".  We can't completely avoid
+# the possibility of clobbering files named 'penmp' or 'mp' in configure's
+# working directory; therefore, this macro will bomb out if any such file
+# already exists when it's invoked.
+AC_DEFUN([AX_CXX_OPENACC],
+[AC_REQUIRE([_AX_OPENACC_SAFE_WD])]dnl
+[AC_ARG_ENABLE([openacc],
+   [AS_HELP_STRING([--disable-openacc], [do not use Openacc])])]dnl
+[
+  OPENACC_[]_AC_LANG_PREFIX[]FLAGS=
+  if test "$enable_openacc" != no; then
+      AC_LANG_PUSH([C++])
+      AC_CACHE_CHECK([for $[]_AC_CC[] option to support Openacc],
+      [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc],
+      [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='not found'
+      dnl Try these flags:
+      dnl   (on by default)      ''
+      dnl   GCC >= 4.2           -fopenacc
+      dnl   SunPRO C             -xopenacc
+      dnl   Intel C              -openacc
+      dnl   SGI C, PGI C         -mp
+      dnl   Tru64 Compaq C       -omp
+      dnl   IBM XL C (AIX, Linux) -qsmp=omp
+      dnl   Cray CCE             -homp
+      dnl   NEC SX               -Popenacc
+      dnl   Lahey Fortran (Linux)  --openacc
+      for ac_option in '' -fopenacc -openacc -acc; do
+
+        ac_save_[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS
+        _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $ac_option"
+        AC_COMPILE_IFELSE([
+#ifndef _OPENACC
+#error "OpenACC not supported"
+#endif
+#include <openacc.h>
+ int main (void) { acc_init (acc_get_device_type()); return 0;}
+],
+          [AC_LINK_IFELSE([
+#ifndef _OPENACC
+#error "OpenACC not supported"
+#endif
+#include <openacc.h>
+ int main (void) { acc_init (acc_get_device_type()); return 0;}
+],
+            [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc=$ac_option],
+            [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'])])
+        _AC_LANG_PREFIX[]FLAGS=$ac_save_[]_AC_LANG_PREFIX[]FLAGS
+
+        if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'not found'; then
+          break
+        fi
+      done
+      if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = 'not found'; then
+        ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'
+      elif test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = ''; then
+        ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='none needed'
+      fi
+      dnl _AX_OPENACC_SAFE_WD checked that these files did not exist before we
+      dnl started probing for Openacc support, so if they exist now, they were
+      dnl created by the probe loop and it's safe to delete them.
+      rm -f penmp mp])
+    if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'unsupported' && \
+       test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'none needed'; then
+      OPENACC_[]_AC_LANG_PREFIX[]FLAGS="$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc"
+    fi
+   AC_LANG_POP([C++])
+  fi
+])
+
+dnl _AC_OPENACC_SAFE_WD
+dnl ------------------
+dnl AC_REQUIREd by AC_OPENACC.  Checks both at autoconf time and at
+dnl configure time for files that AC_OPENACC clobbers.
+dnl AC_DEFUN([_AX_OPENACC_SAFE_WD],
+dnl [m4_syscmd([test ! -e penmp && test ! -e mp])]dnl
+dnl [m4_if(sysval, [0], [], [m4_fatal(m4_normalize(
+dnl   [AX_OPENACC clobbers files named 'mp' and 'penmp'.
+dnl    To use AX_OPENACC you must not have either of these files
+dnl    at the top level of your source tree.]))])]dnl
+dnl [if test -e penmp || test -e mp; then
+dnl   AC_MSG_ERROR(m4_normalize(
+dnl     [AX@&t@_OPENACC clobbers files named 'mp' and 'penmp'.
+dnl      Aborting configure because one of these files already exists.]))
+dnl fi])
+
--- a/config/ax_fc_openacc.m4
+++ b/config/ax_fc_openacc.m4
@ -0,0 +1,108 @@
+# AC_OPENACC
+# ---------
+# Check which options need to be passed to the C compiler to support Openacc.
+# Set the OPENACC_CFLAGS / OPENACC_CXXFLAGS / OPENACC_FFLAGS variable to these
+# options.
+# The options are necessary at compile time (so the #pragmas are understood)
+# and at link time (so the appropriate library is linked with).
+# This macro takes care to not produce redundant options if $CC $CFLAGS already
+# supports Openacc.
+#
+# For each candidate option, we do a compile test first, then a link test;
+# if the compile test succeeds but the link test fails, that means we have
+# found the correct option but it doesn't work because the libraries are
+# broken.  (This can happen, for instance, with SunPRO C and a bad combination
+# of operating system patches.)
+#
+# Several of the options in our candidate list can be misinterpreted by
+# compilers that don't use them to activate Openacc support; for example,
+# many compilers understand "-openacc" to mean "write output to a file
+# named 'penmp'" rather than "enable Openacc".  We can't completely avoid
+# the possibility of clobbering files named 'penmp' or 'mp' in configure's
+# working directory; therefore, this macro will bomb out if any such file
+# already exists when it's invoked.
+AC_DEFUN([AX_FC_OPENACC],
+[AC_REQUIRE([_AX_OPENACC_SAFE_WD])]dnl
+[AC_ARG_ENABLE([openacc],
+   [AS_HELP_STRING([--disable-openacc], [do not use Openacc])])]dnl
+[
+  OPENACC_[]_AC_LANG_PREFIX[]FLAGS=
+  if test "$enable_openacc" != no; then
+      AC_LANG_PUSH([Fortran])
+      AC_CACHE_CHECK([for $[]_AC_CC[] option to support Openacc],
+      [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc],
+      [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='not found'
+      dnl Try these flags:
+      dnl   (on by default)      ''
+      dnl   GCC >= 4.2           -fopenacc
+      dnl   SunPRO C             -xopenacc
+      dnl   Intel C              -openacc
+      dnl   SGI C, PGI C         -mp
+      dnl   Tru64 Compaq C       -omp
+      dnl   IBM XL C (AIX, Linux) -qsmp=omp
+      dnl   Cray CCE             -homp
+      dnl   NEC SX               -Popenacc
+      dnl   Lahey Fortran (Linux)  --openacc
+      for ac_option in '' -fopenacc -openacc -acc; do
+
+        ac_save_[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS
+        _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $ac_option"
+        AC_COMPILE_IFELSE([
+      program main
+      use openacc
+      implicit none
+      integer tid, np
+      tid = 42
+      call acc_init(0)
+      end
+],
+          [AC_LINK_IFELSE([
+      program main
+      use openacc
+      implicit none
+      integer tid, np
+      tid = 42
+      call acc_init(0)
+      end
+],
+            [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc=$ac_option],
+            [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'])])
+        _AC_LANG_PREFIX[]FLAGS=$ac_save_[]_AC_LANG_PREFIX[]FLAGS
+
+        if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'unsupported'; then
+          break
+        fi
+      done
+      if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = 'not found'; then
+        ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'
+      elif test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = ''; then
+        ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='none needed'
+      fi
+      dnl _AX_OPENACC_SAFE_WD checked that these files did not exist before we
+      dnl started probing for Openacc support, so if they exist now, they were
+      dnl created by the probe loop and it's safe to delete them.
+      rm -f penmp mp])
+    if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'unsupported' && \
+       test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'none needed'; then
+      OPENACC_[]_AC_LANG_PREFIX[]FLAGS="$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc"
+    fi
+   AC_LANG_POP([Fortran])
+  fi
+])
+
+# _AC_OPENACC_SAFE_WD
+# ------------------
+# AC_REQUIREd by AC_OPENACC.  Checks both at autoconf time and at
+# configure time for files that AC_OPENACC clobbers.
+AC_DEFUN([_AX_OPENACC_SAFE_WD],
+[m4_syscmd([test ! -e penmp && test ! -e mp])]dnl
+[m4_if(sysval, [0], [], [m4_fatal(m4_normalize(
+  [AX_OPENACC clobbers files named 'mp' and 'penmp'.
+   To use AX_OPENACC you must not have either of these files
+   at the top level of your source tree.]))])]dnl
+[if test -e penmp || test -e mp; then
+  AC_MSG_ERROR(m4_normalize(
+    [AX@&t@_OPENACC clobbers files named 'mp' and 'penmp'.
+     Aborting configure because one of these files already exists.]))
+fi])
+
--- a/config/pac.m4
+++ b/config/pac.m4
@ -2157,6 +2157,31 @@ CPPFLAGS="$SAVE_CPPFLAGS"



+dnl @synopsis PAC_ARG_CUDA
+dnl
+dnl Test for --enable-cuda
+dnl 
+dnl 
+dnl
+dnl Example use:
+dnl
+dnl
+dnl @author Salvatore Filippone <salvatore.filippone@uniroma2.it>
+dnl
+AC_DEFUN([PAC_ARG_CUDA],
+[AC_MSG_CHECKING([whether we want cuda ])
+AC_ARG_ENABLE(cuda,
+AS_HELP_STRING([--enable-cuda], 
+[Specify whether to enable cuda. ]),
+[
+pac_cv_cuda="yes";
+]
+dnl ,
+dnl [pac_cv_cuda="no";]
+	     )
+]
+)
+

 dnl @synopsis PAC_CHECK_CUDA
 dnl
@ -2173,7 +2198,7 @@ dnl
 dnl @author Salvatore Filippone <salvatore.filippone@uniroma2.it>
 dnl
 AC_DEFUN(PAC_CHECK_CUDA,
-[AC_ARG_WITH(cuda, AC_HELP_STRING([--with-cuda=DIR], [Specify the CUDA install directory.]),
+[AC_ARG_WITH(cudadir, AC_HELP_STRING([--with-cudadir=DIR], [Specify the CUDA install directory.]),
        [pac_cv_cuda_dir=$withval],
        [pac_cv_cuda_dir=''])

@ -2234,56 +2259,6 @@ AC_HELP_STRING([--with-cudacc], [A comma-separated list of CCs to compile to, fo
 [pac_cv_cudacc=''])
 ])

-AC_DEFUN(PAC_ARG_WITH_LIBRSB,
-	 [SAVE_LIBS="$LIBS"
-	  SAVE_CPPFLAGS="$CPPFLAGS"
-
-	  AC_ARG_WITH(librsb,
-	  AC_HELP_STRING([--with-librsb], [The directory for LIBRSB, for example,
- 	  --with-librsb=/opt/packages/librsb]),
-	  [pac_cv_librsb_dir=$withval],
-	  [pac_cv_librsb_dir=''])
-	  
-	  if test "x$pac_cv_librsb_dir" != "x"; then 
-	  LIBS="-L$pac_cv_librsb_dir $LIBS"
-	  RSB_INCLUDES="-I$pac_cv_librsb_dir"
-	  # CPPFLAGS="$GPU_INCLUDES $CUDA_INCLUDES $CPPFLAGS"
-	  RSB_LIBDIR="-L$pac_cv_librsb_dir"
-	  fi
-	  #AC_MSG_CHECKING([librsb dir $pac_cv_librsb_dir])
-	  AC_CHECK_HEADER([$pac_cv_librsb_dir/rsb.h],
-			  [pac_rsb_header_ok=yes],
-			  [pac_rsb_header_ok=no; RSB_INCLUDES=""])
-	  
-	  if test "x$pac_rsb_header_ok" == "xyes" ; then 
-	  RSB_LIBS="-lrsb $RSB_LIBDIR"
-	  # LIBS="$GPU_LIBS $CUDA_LIBS -lm $LIBS";
-	  # AC_MSG_CHECKING([for spgpuCreate in $GPU_LIBS])
-	  # AC_TRY_LINK_FUNC(spgpuCreate, 
-	  # 		   [pac_cv_have_spgpu=yes;pac_gpu_lib_ok=yes; ],
-	  # 		   [pac_cv_have_spgpu=no;pac_gpu_lib_ok=no; GPU_LIBS=""])
-	  # AC_MSG_RESULT($pac_gpu_lib_ok)
-	  # if test "x$pac_cv_have_spgpu" == "xyes" ; then 
-	  # AC_MSG_NOTICE([Have found SPGPU])
-	  RSBLIBNAME="librsb.a";
-	  LIBRSB_DIR="$pac_cv_librsb_dir";
-	  # SPGPU_DEFINES="-DHAVE_SPGPU";
-	  LIBRSB_INCDIR="$LIBRSB_DIR";
-	  LIBRSB_INCLUDES="-I$LIBRSB_INCDIR";
-	  LIBRSB_LIBS="-lrsb -L$LIBRSB_DIR";
-	  # CUDA_DIR="$pac_cv_cuda_dir";
-	  LIBRSB_DEFINES="-DHAVE_RSB";
-	  LRSB=-lpsb_rsb
-	  # CUDA_INCLUDES="-I$pac_cv_cuda_dir/include"
-	  # CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib"
-	  FDEFINES="$LIBRSB_DEFINES $psblas_cv_define_prepend $FDEFINES";
-	  CDEFINES="$LIBRSB_DEFINES $CDEFINES";#CDEFINES="-DHAVE_SPGPU -DHAVE_CUDA $CDEFINES";
-	  fi
-#  fi
-LIBS="$SAVE_LIBS"
-CPPFLAGS="$SAVE_CPPFLAGS"
-])
-dnl

 dnl @synopsis PAC_CHECK_CUDA_VERSION
 dnl
@ -2336,3 +2311,47 @@ CPPFLAGS="$SAVE_CPPFLAGS"
 ])dnl 


+
+dnl @synopsis PAC_ARG_OPENACC
+dnl
+dnl Test for --enable-openacc
+dnl 
+dnl 
+dnl
+dnl Example use:
+dnl
+dnl
+dnl @author Salvatore Filippone <salvatore.filippone@uniroma2.it>
+dnl
+AC_DEFUN([PAC_ARG_OPENACC],
+[AC_MSG_CHECKING([whether we want openacc ])
+AC_ARG_ENABLE(openacc,
+AS_HELP_STRING([--enable-openacc], 
+[Specify whether to enable openacc. ]),
+[
+pac_cv_openacc="yes";
+]
+dnl ,
+dnl [pac_cv_openacc="no";]
+	     )
+if test x"$pac_cv_openacc" == x"yes" ; then
+   AC_MSG_RESULT([yes.])
+#   AC_LANG_PUSH([Fortran])
+#   AC_OPENACC() 
+#   pac_cv_openacc_fcopt="$OPENACC_FCFLAGS";
+#   AC_LANG_POP()
+#   AC_LANG_PUSH([C])
+#   AC_OPENACC() 
+#   pac_cv_openacc_ccopt="$OPENACC_CFLAGS";
+#   AC_LANG_POP()
+#   AC_LANG_PUSH([C++])
+#   AC_OPENACC() 
+#   pac_cv_openacc_cxxopt="$OPENACC_CXXFLAGS";
+#   AC_LANG_POP()
+else
+ pac_cv_openacc="no";
+ AC_MSG_RESULT([no.])
+fi
+]
+)
+
--- a/530
+++ b/530
@ -653,6 +653,12 @@ ac_subst_vars='am__EXEEXT_FALSE
 am__EXEEXT_TRUE
 LTLIBOBJS
 LIBOBJS
+LRSB
+LIBRSB_DEFINES
+LIBRSB_DIR
+LIBRSB_INCDIR
+LIBRSB_INCLUDES
+LIBRSB_LIBS
 LCUDA
 CUDALD
 CUDAD
@ -662,11 +668,19 @@ CUDA_SHORT_VERSION
 CUDA_VERSION
 CUDA_LIBS
 CUDA_INCLUDES
+CXXCUDEFINES
+CCUDEFINES
+FCUDEFINES
 CUDA_DEFINES
 CUDA_DIR
 EXTRALDLIBS
 SPGPU_LIBS
 SPGPU_FLAGS
+CXXOPENACC
+CCOPENACC
+FCOPENACC
+OACCLD
+OACCD
 METISINCFILE
 UTILLIBNAME
 METHDLIBNAME
@ -838,8 +852,14 @@ with_amd
 with_amddir
 with_amdincdir
 with_amdlibdir
-with_cuda
+enable_cuda
+with_cudadir
 with_cudacc
+enable_openacc
+with_extraopenacc
+with_ccopenacc
+with_cxxopenacc
+with_fcopenacc
 '
      ac_precious_vars='build_alias
 host_alias
@ -1490,6 +1510,9 @@ Optional Features:
  --disable-silent-rules  verbose build output (undo: "make V=0")
  --enable-openmp         Specify whether to enable openmp.
  --disable-openmp        do not use OpenMP
+  --enable-cuda           Specify whether to enable cuda.
+  --enable-openacc        Specify whether to enable openacc.
+  --disable-openacc       do not use Openacc

 Optional Packages:
  --with-PACKAGE[=ARG]    use PACKAGE [ARG=yes]
@ -1532,9 +1555,17 @@ Optional Packages:
  --with-amddir=DIR       Specify the directory for AMD library and includes.
  --with-amdincdir=DIR    Specify the directory for AMD includes.
  --with-amdlibdir=DIR    Specify the directory for AMD library.
-  --with-cuda=DIR         Specify the CUDA install directory.
+  --with-cudadir=DIR      Specify the CUDA install directory.
  --with-cudacc           A comma-separated list of CCs to compile to, for
                          example, --with-cudacc=50,60,70,75
+  --with-extraopenacc     additional [EXTRAOPENACC] flags to be added: will
+                          prepend to [EXTRAOPENACC]
+  --with-ccopenacc        additional [CCOPENACC] flags to be added: will
+                          prepend to [CCOPENACC]
+  --with-cxxopenacc       additional [CXXOPENACC] flags to be added: will
+                          prepend to [CXXOPENACC]
+  --with-fcopenacc        additional [FCOPENACC] flags to be added: will
+                          prepend to [FCOPENACC]

 Some influential environment variables:
  FC          Fortran compiler command
@ -5825,7 +5856,7 @@ fi

 # we just gave the user the chance to append values to these variables

-###############################################################################
+


 if test -n "$ac_tool_prefix"; then
@ -7392,7 +7423,7 @@ fi
 ##############################################################################
 BASEMODNAME=psb_base_mod
 PRECMODNAME=psb_prec_mod
-METHDMODNAME=psb_krylov_mod
+METHDMODNAME=psb_linsolve_mod
 UTILMODNAME=psb_util_mod

 if test "X$psblas_cv_fc" == X"cray"
@ -7402,7 +7433,7 @@ then
   FIFLAG="-I"
   BASEMODNAME=PSB_BASE_MOD
   PRECMODNAME=PSB_PREC_MOD
-   METHDMODNAME=PSB_KRYLOV_MOD
+   METHDMODNAME=PSB_LINSOLVE_MOD
   UTILMODNAME=PSB_UTIL_MOD

 else
@ -10602,12 +10633,26 @@ if test "x$psblas_cv_have_amd" == "xyes" ; then
 fi


+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether we want cuda " >&5
+printf %s "checking whether we want cuda ... " >&6; }
+# Check whether --enable-cuda was given.
+if test ${enable_cuda+y}
+then :
+  enableval=$enable_cuda;
+pac_cv_cuda="yes";
+
+
+fi
+
+
+
+if test "x$pac_cv_cuda" == "xyes"; then


-# Check whether --with-cuda was given.
-if test ${with_cuda+y}
+# Check whether --with-cudadir was given.
+if test ${with_cudadir+y}
 then :
-  withval=$with_cuda; pac_cv_cuda_dir=$withval
+  withval=$with_cudadir; pac_cv_cuda_dir=$withval
 else $as_nop
  pac_cv_cuda_dir=''
 fi
@ -10693,10 +10738,10 @@ SAVE_LIBS="$LIBS"
 SAVE_CPPFLAGS="$CPPFLAGS"
 if test "x$pac_cv_have_cuda" == "x"; then

-# Check whether --with-cuda was given.
-if test ${with_cuda+y}
+# Check whether --with-cudadir was given.
+if test ${with_cudadir+y}
 then :
-  withval=$with_cuda; pac_cv_cuda_dir=$withval
+  withval=$with_cudadir; pac_cv_cuda_dir=$withval
 else $as_nop
  pac_cv_cuda_dir=''
 fi
@ -10814,9 +10859,10 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
 LIBS="$SAVE_LIBS"
 CPPFLAGS="$SAVE_CPPFLAGS"

+      HAVE_CUDA="yes";
   CUDA_VERSION="$pac_cv_cuda_version";
   CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000);
-      HAVE_CUDA="yes";
+   CUDA_DEFINES="-DHAVE_CUDA -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}";
   SPGPU_LIBS="-lspgpu";
   CUDAD=cudad;
   CUDALD=cudald;
@ -10837,7 +10883,7 @@ fi


   if test "x$pac_cv_cudacc" == "x"; then
-      pac_cv_cudacc="50,60,70,75";
+      pac_cv_cudacc="50,60,70,75,80,86";
      CUDA_CC="$pac_cv_cudacc";
   fi
   if (( $pac_cv_cuda_version >= 11070  ))
@ -10849,24 +10895,432 @@ fi
 	CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc";
   done
   if test "x$pac_cv_cuda_version" != "xunknown"; then
-     CUDEFINES="$CUDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}"
-     FDEFINES="$FDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}"
-     CDEFINES="$CDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}"
+     CUDEFINES="$CUDEFINES ${CUDA_DEFINES}"
+     FCUDEFINES=" ${CUDA_DEFINES}"
+     CCUDEFINES=" ${CUDA_DEFINES}"
+     CXXCUDEFINES=" ${CUDA_DEFINES}"
   fi
 fi

 if test "x$pac_cv_ipk_size" != "x4"; then
  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size,  disabling CUDA/SPGPU" >&5
 printf "%s\n" "$as_me: For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size,  disabling CUDA/SPGPU" >&6;}
+  pac_cv_cuda="no";
  HAVE_CUDA="no";
  CUDA_CC="";
  SPGPU_LIBS="";
  CUDAD="";
  CUDALD="";
  CUDEFINES="";
+  CUDA_DEFINES="";
  CUDA_INCLUDES="";
  CUDA_LIBS="";
+  FCUDEFINES="";
+  CCUDEFINES="";
+  CXXCUDEFINES="";
+fi
+fi
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether we want openacc " >&5
+printf %s "checking whether we want openacc ... " >&6; }
+# Check whether --enable-openacc was given.
+if test ${enable_openacc+y}
+then :
+  enableval=$enable_openacc;
+pac_cv_openacc="yes";
+
+
+fi
+
+if test x"$pac_cv_openacc" == x"yes" ; then
+   { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes." >&5
+printf "%s\n" "yes." >&6; }
+#   AC_LANG_PUSH([Fortran])
+#   AC_OPENACC()
+#   pac_cv_openacc_fcopt="$OPENACC_FCFLAGS";
+#   AC_LANG_POP()
+#   AC_LANG_PUSH([C])
+#   AC_OPENACC()
+#   pac_cv_openacc_ccopt="$OPENACC_CFLAGS";
+#   AC_LANG_POP()
+#   AC_LANG_PUSH([C++])
+#   AC_OPENACC()
+#   pac_cv_openacc_cxxopt="$OPENACC_CXXFLAGS";
+#   AC_LANG_POP()
+else
+ pac_cv_openacc="no";
+ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no." >&5
+printf "%s\n" "no." >&6; }
+fi
+
+
+if test x"$pac_cv_openacc" == x"yes" ; then
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether additional EXTRAOPENACC flags should be added (should be invoked only once)" >&5
+printf %s "checking whether additional EXTRAOPENACC flags should be added (should be invoked only once)... " >&6; }
+
+# Check whether --with-extraopenacc was given.
+if test ${with_extraopenacc+y}
+then :
+  withval=$with_extraopenacc;
+EXTRAOPENACC="${withval} ${EXTRAOPENACC}"
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: EXTRAOPENACC = ${EXTRAOPENACC}" >&5
+printf "%s\n" "EXTRAOPENACC = ${EXTRAOPENACC}" >&6; }
+
+else $as_nop
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+
+fi
+
+
+      if test -e penmp || test -e mp; then
+  as_fn_error $? "AX_OPENACC clobbers files named 'mp' and 'penmp'. Aborting configure because one of these files already exists." "$LINENO" 5
+fi
+# Check whether --enable-openacc was given.
+if test ${enable_openacc+y}
+then :
+  enableval=$enable_openacc;
+fi
+
+  OPENACC_CFLAGS=
+  if test "$enable_openacc" != no; then
+      ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+      { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to support Openacc" >&5
+printf %s "checking for $CC option to support Openacc... " >&6; }
+if test ${ax_cv_prog_c_openacc+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  ax_cv_prog_c_openacc='not found'
+                                                                        for ac_option in '' -fopenacc -openacc -acc; do
+
+        ac_save_CFLAGS=$CFLAGS
+        CFLAGS="$CFLAGS $ac_option"
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+#ifndef _OPENACC
+#error "OpenACC not supported"
+#endif
+#include <openacc.h>
+  int main (void) { acc_init (0); return 0;}
+
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+#ifndef _OPENACC
+#error "OpenACC not supported"
+#endif
+#include <openacc.h>
+ int main (void) { acc_init (0); return 0;}
+
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  ax_cv_prog_c_openacc=$ac_option
+else $as_nop
+  ax_cv_prog_c_openacc='unsupported'
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+        CFLAGS=$ac_save_CFLAGS
+
+        if test "$ax_cv_prog_c_openacc" != 'not found'; then
+          break
+        fi
+      done
+      if test "$ax_cv_prog_c_openacc" = 'not found'; then
+        ac_cv_prog_c_openacc='unsupported'
+      elif test "$ax_cv_prog_c_openacc" = ''; then
+        ac_cv_prog_c_openacc='none needed'
+      fi
+                        rm -f penmp mp
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_prog_c_openacc" >&5
+printf "%s\n" "$ax_cv_prog_c_openacc" >&6; }
+    if test "$ax_cv_prog_c_openacc" != 'unsupported' && \
+       test "$ax_cv_prog_c_openacc" != 'none needed'; then
+      OPENACC_CFLAGS="$ax_cv_prog_c_openacc"
    fi
+   ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+  fi
+
+   CCOPENACC="$ax_cv_prog_c_openacc";
+   # Check whether --enable-openacc was given.
+if test ${enable_openacc+y}
+then :
+  enableval=$enable_openacc;
+fi
+
+  OPENACC_CFLAGS=
+  if test "$enable_openacc" != no; then
+      ac_ext=cpp
+ac_cpp='$CXXCPP $CPPFLAGS'
+ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+      { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CXX option to support Openacc" >&5
+printf %s "checking for $CXX option to support Openacc... " >&6; }
+if test ${ax_cv_prog_cxx_openacc+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  ax_cv_prog_cxx_openacc='not found'
+                                                                        for ac_option in '' -fopenacc -openacc -acc; do
+
+        ac_save_CXXFLAGS=$CXXFLAGS
+        CXXFLAGS="$CXXFLAGS $ac_option"
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+#ifndef _OPENACC
+#error "OpenACC not supported"
+#endif
+#include <openacc.h>
+ int main (void) { acc_init (acc_get_device_type()); return 0;}
+
+_ACEOF
+if ac_fn_cxx_try_compile "$LINENO"
+then :
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+#ifndef _OPENACC
+#error "OpenACC not supported"
+#endif
+#include <openacc.h>
+ int main (void) { acc_init (acc_get_device_type()); return 0;}
+
+_ACEOF
+if ac_fn_cxx_try_link "$LINENO"
+then :
+  ax_cv_prog_cxx_openacc=$ac_option
+else $as_nop
+  ax_cv_prog_cxx_openacc='unsupported'
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+        CXXFLAGS=$ac_save_CXXFLAGS
+
+        if test "$ax_cv_prog_cxx_openacc" != 'not found'; then
+          break
+        fi
+      done
+      if test "$ax_cv_prog_cxx_openacc" = 'not found'; then
+        ac_cv_prog_cxx_openacc='unsupported'
+      elif test "$ax_cv_prog_cxx_openacc" = ''; then
+        ac_cv_prog_cxx_openacc='none needed'
+      fi
+                        rm -f penmp mp
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_prog_cxx_openacc" >&5
+printf "%s\n" "$ax_cv_prog_cxx_openacc" >&6; }
+    if test "$ax_cv_prog_cxx_openacc" != 'unsupported' && \
+       test "$ax_cv_prog_cxx_openacc" != 'none needed'; then
+      OPENACC_CXXFLAGS="$ax_cv_prog_cxx_openacc"
+    fi
+   ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+  fi
+
+   CXXOPENACC="$ax_cv_prog_cxx_openacc";
+   # Check whether --enable-openacc was given.
+if test ${enable_openacc+y}
+then :
+  enableval=$enable_openacc;
+fi
+
+  OPENACC_CFLAGS=
+  if test "$enable_openacc" != no; then
+      ac_ext=${ac_fc_srcext-f}
+ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5'
+ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_fc_compiler_gnu
+
+      { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $FC option to support Openacc" >&5
+printf %s "checking for $FC option to support Openacc... " >&6; }
+if test ${ax_cv_prog_fc_openacc+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  ax_cv_prog_fc_openacc='not found'
+                                                                        for ac_option in '' -fopenacc -openacc -acc; do
+
+        ac_save_FCFLAGS=$FCFLAGS
+        FCFLAGS="$FCFLAGS $ac_option"
+        cat > conftest.$ac_ext <<_ACEOF
+
+      program main
+      use openacc
+      implicit none
+      integer tid, np
+      tid = 42
+      call acc_init(0)
+      end
+
+_ACEOF
+if ac_fn_fc_try_compile "$LINENO"
+then :
+  cat > conftest.$ac_ext <<_ACEOF
+
+      program main
+      use openacc
+      implicit none
+      integer tid, np
+      tid = 42
+      call acc_init(0)
+      end
+
+_ACEOF
+if ac_fn_fc_try_link "$LINENO"
+then :
+  ax_cv_prog_fc_openacc=$ac_option
+else $as_nop
+  ax_cv_prog_fc_openacc='unsupported'
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+        FCFLAGS=$ac_save_FCFLAGS
+
+        if test "$ax_cv_prog_fc_openacc" != 'unsupported'; then
+          break
+        fi
+      done
+      if test "$ax_cv_prog_fc_openacc" = 'not found'; then
+        ac_cv_prog_fc_openacc='unsupported'
+      elif test "$ax_cv_prog_fc_openacc" = ''; then
+        ac_cv_prog_fc_openacc='none needed'
+      fi
+                        rm -f penmp mp
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_prog_fc_openacc" >&5
+printf "%s\n" "$ax_cv_prog_fc_openacc" >&6; }
+    if test "$ax_cv_prog_fc_openacc" != 'unsupported' && \
+       test "$ax_cv_prog_fc_openacc" != 'none needed'; then
+      OPENACC_FCFLAGS="$ax_cv_prog_fc_openacc"
+    fi
+   ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+  fi
+
+   FCOPENACC="$ax_cv_prog_fc_openacc";
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether additional CCOPENACC flags should be added (should be invoked only once)" >&5
+printf %s "checking whether additional CCOPENACC flags should be added (should be invoked only once)... " >&6; }
+
+# Check whether --with-ccopenacc was given.
+if test ${with_ccopenacc+y}
+then :
+  withval=$with_ccopenacc;
+CCOPENACC="${withval} ${CCOPENACC}"
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: CCOPENACC = ${CCOPENACC}" >&5
+printf "%s\n" "CCOPENACC = ${CCOPENACC}" >&6; }
+
+else $as_nop
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+
+fi
+
+
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether additional CXXOPENACC flags should be added (should be invoked only once)" >&5
+printf %s "checking whether additional CXXOPENACC flags should be added (should be invoked only once)... " >&6; }
+
+# Check whether --with-cxxopenacc was given.
+if test ${with_cxxopenacc+y}
+then :
+  withval=$with_cxxopenacc;
+CXXOPENACC="${withval} ${CXXOPENACC}"
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: CXXOPENACC = ${CXXOPENACC}" >&5
+printf "%s\n" "CXXOPENACC = ${CXXOPENACC}" >&6; }
+
+else $as_nop
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+
+fi
+
+
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether additional FCOPENACC flags should be added (should be invoked only once)" >&5
+printf %s "checking whether additional FCOPENACC flags should be added (should be invoked only once)... " >&6; }
+
+# Check whether --with-fcopenacc was given.
+if test ${with_fcopenacc+y}
+then :
+  withval=$with_fcopenacc;
+FCOPENACC="${withval} ${FCOPENACC}"
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: FCOPENACC = ${FCOPENACC}" >&5
+printf "%s\n" "FCOPENACC = ${FCOPENACC}" >&6; }
+
+else $as_nop
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+
+fi
+
+
+   CCOPENACC="$CCOPENACC   $EXTRAOPENACC";
+   CXXOPENACC="$CXXOPENACC   $EXTRAOPENACC";
+   FCOPENACC="$FCOPENACC   $EXTRAOPENACC";
+   OACCD=oaccd;
+   OACCLD=oaccld;
+
+   #FCOPT="$FCOPT    $FCOPENACC";
+   #CCOPT="$CCOPT    $CCOPENACC"
+   #CXXOPT="$CXXOPT  $CXXOPENACC"
+   #FLINK="$FLINK $FCOPENACC";
+fi
+
+###############################################################################
+LIBRSB_DIR="$pac_cv_librsb_dir";
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for LIBRSB install dir" >&5
+printf %s "checking for LIBRSB install dir... " >&6; }
+case $LIBRSB_DIR in
+     /*) ;;
+     *)  esac
+pac_cv_status_file="$LIBRSB_DIR/librsb.a"
+if test ! -f "$pac_cv_status_file" ; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+  #AC_MSG_ERROR([Could not find an installation  in $LIBRSB_DIR.])
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $LIBRSB_DIR" >&5
+printf "%s\n" "$LIBRSB_DIR" >&6; }
+  RSBTARGETLIB=rsbd;
+  RSBTARGETOBJ=rsbobj;
+fi
+



@ -10878,7 +11332,7 @@ fi
 LIBDIR=lib
 BASELIBNAME=libpsb_base.a
 PRECLIBNAME=libpsb_prec.a
-METHDLIBNAME=libpsb_krylov.a
+METHDLIBNAME=libpsb_linsolve.a
 UTILLIBNAME=libpsb_util.a

 ###############################################################################
@ -10928,9 +11382,9 @@ UTILLIBNAME=libpsb_util.a

 PSBLASRULES='
 PSBLDLIBS=$(LAPACK) $(BLAS) $(METIS_LIB) $(AMD_LIB) $(LIBS)
-CXXDEFINES=$(PSBCXXDEFINES)  $(CUDA_DEFINES)
-CDEFINES=$(PSBCDEFINES) $(CUDA_DEFINES)
-FDEFINES=$(PSBFDEFINES) $(CUDA_DEFINES)
+CXXDEFINES=$(PSBCXXDEFINES)
+CDEFINES=$(PSBCDEFINES)
+FDEFINES=$(PSBFDEFINES)


 # These should be portable rules, arent they?
@ -10961,6 +11415,22 @@ FDEFINES=$(PSBFDEFINES) $(CUDA_DEFINES)



+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+



@ -12299,6 +12769,11 @@ fi
 	CUDA 		      : ${HAVE_CUDA}
 	CUDA_CC               : ${pac_cv_cudacc}

+	OPENACC		      : ${pac_cv_openacc}
+        FCOPENACC             : ${FCOPENACC}
+	OACCD                 : ${OACCD}
+	OACCLD                : ${OACCLD}
+
 	BLAS                  : ${BLAS_LIBS}

 	METIS usable          : ${psblas_cv_have_metis}
@ -12332,6 +12807,11 @@ printf "%s\n" "$as_me:
 	CUDA 		      : ${HAVE_CUDA}
 	CUDA_CC               : ${pac_cv_cudacc}

+	OPENACC		      : ${pac_cv_openacc}
+        FCOPENACC             : ${FCOPENACC}
+	OACCD                 : ${OACCD}
+	OACCLD                : ${OACCLD}
+
 	BLAS                  : ${BLAS_LIBS}

 	METIS usable          : ${psblas_cv_have_metis}
@ -12348,6 +12828,16 @@ printf "%s\n" "$as_me:
 	If you are satisfied, run 'make' to build ${PACKAGE_NAME} and its documentation; otherwise
 	type  ./configure --help=short for a complete list of configure options specific to ${PACKAGE_NAME}.
 " >&6;}
+if test x"${pac_cv_openacc}" == x"yes" ; then
+   if test x"${FCOPENACC}" == x ; then
+   { printf "%s\n" "$as_me:${as_lineno-$LINENO}:
+   WARNING: OpenACC enabled, but no choice for FCOPENACC compile flag.
+   You may want to rerun configure with --with-fcopenacc= " >&5
+printf "%s\n" "$as_me:
+   WARNING: OpenACC enabled, but no choice for FCOPENACC compile flag.
+   You may want to rerun configure with --with-fcopenacc= " >&6;}
+  fi
+fi

 ###############################################################################

--- a/configure.ac
+++ b/configure.ac
@ -483,7 +483,7 @@ fi
 ##############################################################################
 BASEMODNAME=psb_base_mod
 PRECMODNAME=psb_prec_mod
-METHDMODNAME=psb_krylov_mod
+METHDMODNAME=psb_linsolve_mod
 UTILMODNAME=psb_util_mod

 if test "X$psblas_cv_fc" == X"cray" 
@ -493,7 +493,7 @@ then
   FIFLAG="-I"
   BASEMODNAME=PSB_BASE_MOD
   PRECMODNAME=PSB_PREC_MOD
-   METHDMODNAME=PSB_KRYLOV_MOD
+   METHDMODNAME=PSB_LINSOLVE_MOD
   UTILMODNAME=PSB_UTIL_MOD

 else 
@ -795,16 +795,19 @@ if test "x$psblas_cv_have_amd" == "xyes" ; then
 fi


+PAC_ARG_CUDA()
+if test "x$pac_cv_cuda" == "xyes"; then

 PAC_CHECK_CUDA()

 if test "x$pac_cv_have_cuda" == "xyes"; then
   
   PAC_CHECK_CUDA_VERSION()
-   CUDA_VERSION="$pac_cv_cuda_version";
-   CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000);
   dnl PAC_CHECK_SPGPU()
   HAVE_CUDA="yes";
+   CUDA_VERSION="$pac_cv_cuda_version";
+   CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000);
+   CUDA_DEFINES="-DHAVE_CUDA -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}";
   SPGPU_LIBS="-lspgpu";	
   CUDAD=cudad;
   CUDALD=cudald;
@ -814,7 +817,7 @@ if test "x$pac_cv_have_cuda" == "xyes"; then

   PAC_ARG_WITH_CUDACC()
   if test "x$pac_cv_cudacc" == "x"; then
-      pac_cv_cudacc="50,60,70,75";
+      pac_cv_cudacc="50,60,70,75,80,86";
      CUDA_CC="$pac_cv_cudacc";
   fi
   if (( $pac_cv_cuda_version >= 11070  ))
@ -826,27 +829,72 @@ if test "x$pac_cv_have_cuda" == "xyes"; then
 	CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc";
   done
   if test "x$pac_cv_cuda_version" != "xunknown"; then
-     CUDEFINES="$CUDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}"
-     FDEFINES="$FDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}"
-     CDEFINES="$CDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}"    
+     CUDEFINES="$CUDEFINES ${CUDA_DEFINES}"
+     FCUDEFINES=" ${CUDA_DEFINES}"
+     CCUDEFINES=" ${CUDA_DEFINES}"
+     CXXCUDEFINES=" ${CUDA_DEFINES}"	
   fi
 fi

 if test "x$pac_cv_ipk_size" != "x4"; then
  AC_MSG_NOTICE([For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size,  disabling CUDA/SPGPU])
+  pac_cv_cuda="no";
  HAVE_CUDA="no";
  CUDA_CC="";	
  SPGPU_LIBS="";	
  CUDAD="";
  CUDALD="";
  CUDEFINES="";
+  CUDA_DEFINES="";
  CUDA_INCLUDES="";
  CUDA_LIBS="";
+  FCUDEFINES="";
+  CCUDEFINES="";
+  CXXCUDEFINES="";
+fi
 fi

+PAC_ARG_OPENACC()
+dnl AC_ARG_ENABLE([openacc],
+dnl    [AS_HELP_STRING([--disable-openacc], [do not use Openacc])])
+if test x"$pac_cv_openacc" == x"yes" ; then
+   PAC_ARG_WITH_FLAGS(extraopenacc,EXTRAOPENACC)
+   dnl if test false; then
+   AX_C_OPENACC()
+   CCOPENACC="$ax_cv_prog_c_openacc";
+   AX_CXX_OPENACC()
+   CXXOPENACC="$ax_cv_prog_cxx_openacc";
+   AX_FC_OPENACC()
+   FCOPENACC="$ax_cv_prog_fc_openacc";
+    dnl AX_OPENACC()
+    dnl 
+    dnl CXXOPENACC="$ax_cv_prog_cxx_openacc";	
+    dnl FCOPENACC="$ax_cv_prog_fc_openacc";   
+   dnl else   
+dnl   AC_MSG_NOTICE([OpenACC 1 flags CC $CCOPENACC CXX $CXXOPENACC FC $FCOPENACC])	   
+   PAC_ARG_WITH_FLAGS(ccopenacc,CCOPENACC)	
+   PAC_ARG_WITH_FLAGS(cxxopenacc,CXXOPENACC)
+   PAC_ARG_WITH_FLAGS(fcopenacc,FCOPENACC)
+dnl   AC_MSG_NOTICE([OpenACC 2 flags CC $CCOPENACC CXX $CXXOPENACC FC $FCOPENACC])		
+dnl   CCOPENACC="$ax_cv_prog_c_openacc";
+dnl   CXXOPENACC="$ax_cv_prog_cxx_openacc";	
+dnl   FCOPENACC="$ax_cv_prog_fc_openacc";   
+dnl   fi
+   CCOPENACC="$CCOPENACC   $EXTRAOPENACC";
+   CXXOPENACC="$CXXOPENACC   $EXTRAOPENACC";
+   FCOPENACC="$FCOPENACC   $EXTRAOPENACC";
+dnl   AC_MSG_NOTICE([OpenACC 3 flags CC $CCOPENACC CXX $CXXOPENACC FC $FCOPENACC])	   
+   OACCD=oaccd;
+   OACCLD=oaccld;
+
+   #FCOPT="$FCOPT    $FCOPENACC";
+   #CCOPT="$CCOPT    $CCOPENACC"	
+   #CXXOPT="$CXXOPT  $CXXOPENACC"		
+   #FLINK="$FLINK $FCOPENACC";
+fi

 ###############################################################################
-PAC_ARG_WITH_LIBRSB
+dnl PAC_ARG_WITH_LIBRSB()
 LIBRSB_DIR="$pac_cv_librsb_dir";
 AC_MSG_CHECKING([for LIBRSB install dir])
 case $LIBRSB_DIR in 
@ -878,7 +926,7 @@ fi
 LIBDIR=lib
 BASELIBNAME=libpsb_base.a
 PRECLIBNAME=libpsb_prec.a
-METHDLIBNAME=libpsb_krylov.a
+METHDLIBNAME=libpsb_linsolve.a
 UTILLIBNAME=libpsb_util.a

 ###############################################################################
@ -928,9 +976,9 @@ AC_SUBST(FINCLUDES)

 PSBLASRULES='
 PSBLDLIBS=$(LAPACK) $(BLAS) $(METIS_LIB) $(AMD_LIB) $(LIBS)
-CXXDEFINES=$(PSBCXXDEFINES)  $(CUDA_DEFINES)
-CDEFINES=$(PSBCDEFINES) $(CUDA_DEFINES)
-FDEFINES=$(PSBFDEFINES) $(CUDA_DEFINES)
+CXXDEFINES=$(PSBCXXDEFINES) 
+CDEFINES=$(PSBCDEFINES) 
+FDEFINES=$(PSBFDEFINES) 
 	

 # These should be portable rules, arent they?
@ -952,6 +1000,12 @@ AC_SUBST(PRECLIBNAME)
 AC_SUBST(METHDLIBNAME)
 AC_SUBST(UTILLIBNAME)
 AC_SUBST(METISINCFILE)
+AC_SUBST(OACCD)
+AC_SUBST(OACCLD)
+AC_SUBST(FCOPENACC)
+AC_SUBST(CCOPENACC)
+AC_SUBST(CXXOPENACC)
+
 AC_SUBST(SPGPU_FLAGS)
 AC_SUBST(SPGPU_LIBS)
 dnl AC_SUBST(SPGPU_DIR)
@ -960,6 +1014,9 @@ dnl AC_SUBST(SPGPU_INCDIR)
 AC_SUBST(EXTRALDLIBS)
 AC_SUBST(CUDA_DIR)
 AC_SUBST(CUDA_DEFINES)
+AC_SUBST(FCUDEFINES)
+AC_SUBST(CCUDEFINES)
+AC_SUBST(CXXCUDEFINES)	
 AC_SUBST(CUDA_INCLUDES)
 AC_SUBST(CUDA_LIBS)
 AC_SUBST(CUDA_VERSION)
@ -976,7 +1033,6 @@ AC_SUBST(LIBRSB_DIR)
 AC_SUBST(LIBRSB_DEFINES)
 AC_SUBST(LRSB)

-
 ###############################################################################
 # the following files will be created by Automake

@ -1004,6 +1060,11 @@ AC_MSG_NOTICE([
 	CUDA 		      : ${HAVE_CUDA}
 	CUDA_CC               : ${pac_cv_cudacc}

+	OPENACC		      : ${pac_cv_openacc}
+        FCOPENACC             : ${FCOPENACC}
+	OACCD                 : ${OACCD}
+	OACCLD                : ${OACCLD}
+	
 	BLAS                  : ${BLAS_LIBS}

 	METIS usable          : ${psblas_cv_have_metis}
@ -1023,6 +1084,13 @@ dnl Note : we should use LDLIBS sooner or later!
 dnl	To install the program and its documentation, run 'make install' if you are root,
 dnl	or run 'su -c "make install"' if you are not root.
 ])
+if test x"${pac_cv_openacc}" == x"yes" ; then
+   if test x"${FCOPENACC}" == x ; then
+   AC_MSG_NOTICE([
+   WARNING: OpenACC enabled, but no choice for FCOPENACC compile flag.
+   You may want to rerun configure with --with-fcopenacc= ])
+  fi  
+fi	

 ###############################################################################

--- a/cuda/Makefile
+++ b/cuda/Makefile
@ -141,3 +141,12 @@ spgpuclean:
 	$(MAKE) -C spgpu clean

 veryclean:  clean
+
+.c.o:
+	$(CC) $(CCOPT) $(CCUDEFINES) $(CINCLUDES) $(CDEFINES) -c $< -o $@
+.f90.o:
+	$(FC) $(FCOPT) $(FCUDEFINES) $(FINCLUDES) -c $< -o $@
+.F90.o:
+	$(FC) $(FCOPT) $(FCUDEFINES) $(FINCLUDES) $(FDEFINES) -c $< -o $@
+.cpp.o:
+	$(CXX) $(CXXOPT) $(CXXCUDEFINES) $(CXXINCLUDES) $(CXXDEFINES) -c $< -o $@
--- a/cuda/cvectordev.c
+++ b/cuda/cvectordev.c
@ -184,7 +184,7 @@ int iscatMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n,
 }


-int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA)
+int nrm2MultiVecDeviceFloatComplex(float* y_res, int n, void* devMultiVecA)
 { int i=0;
  spgpuHandle_t handle=psb_cudaGetHandle();
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
@ -194,7 +194,7 @@ int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiV
  return(i);
 }

-int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA)
+int amaxMultiVecDeviceFloatComplex(float* y_res, int n, void* devMultiVecA)
 { int i=0;
  spgpuHandle_t handle=psb_cudaGetHandle();
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
@ -204,7 +204,7 @@ int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiV
  return(i);
 }

-int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA)
+int asumMultiVecDeviceFloatComplex(float* y_res, int n, void* devMultiVecA)
 { int i=0;
  spgpuHandle_t handle=psb_cudaGetHandle();
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
--- a/cuda/cvectordev.h
+++ b/cuda/cvectordev.h
@ -37,6 +37,7 @@
 #include "vectordev.h"
 #include "cuda_runtime.h"
 #include "core.h"
+#include "vector.h"

 int registerMappedFloatComplex(void *, void **, int, cuFloatComplex);
 int writeMultiVecDeviceFloatComplex(void* deviceMultiVec, cuFloatComplex* hostMultiVec);
@ -63,9 +64,9 @@ int iscatMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n, int fi
 			      int hfirst, void* host_values, int indexBase, cuFloatComplex beta);

 int scalMultiVecDeviceFloatComplex(cuFloatComplex alpha, void* devMultiVecA);
-int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA);
-int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA);
-int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA);
+int nrm2MultiVecDeviceFloatComplex(float* y_res, int n, void* devVecA);
+int amaxMultiVecDeviceFloatComplex(float* y_res, int n, void* devVecA);
+int asumMultiVecDeviceFloatComplex(float* y_res, int n, void* devVecA);
 int dotMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA, void* devVecB);

 int axpbyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void* devVecX, cuFloatComplex beta, void* devVecY);
--- a/cuda/dnsdev.c
+++ b/cuda/dnsdev.c
@ -178,12 +178,12 @@ int spmvDnsDeviceFloatComplex(char transa, int m, int n, int k, float complex *a
  /* Note: the M,N,K choices according to TRANS have already been handled in the caller */  
  if (n == 1) {
    status = cublasCgemv(handle, trans, m,k,
-			 alpha, devMat->cM,devMat->pitch, x->v_,1,
-			 beta,  y->v_,1);
+			 (const cuComplex *) alpha, devMat->cM,devMat->pitch, x->v_,1,
+			 (const cuComplex *) beta,  y->v_,1);
  } else {
    status = cublasCgemm(handle, trans, CUBLAS_OP_N, m,n,k,
-			 alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_,
-			 beta,  y->v_,y->pitch_);
+			 (const cuComplex *) alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_,
+			 (const cuComplex *) beta,  y->v_,y->pitch_);
  }    
  
  if (status == CUBLAS_STATUS_SUCCESS)  
@ -205,12 +205,12 @@ int spmvDnsDeviceDoubleComplex(char transa, int m, int n, int k, double complex
  /* Note: the M,N,K choices according to TRANS have already been handled in the caller */  
  if (n == 1) {
    status = cublasZgemv(handle, trans, m,k,
-			 alpha, devMat->cM,devMat->pitch, x->v_,1,
-			 beta,  y->v_,1);
+			 (const cuDoubleComplex *) alpha, devMat->cM,devMat->pitch, x->v_,1,
+			 (const cuDoubleComplex *) beta,  y->v_,1);
  } else {
    status = cublasZgemm(handle, trans, CUBLAS_OP_N, m,n,k,
-			 alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_,
-			 beta,  y->v_,y->pitch_);
+			 (const cuDoubleComplex *) alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_,
+			 (const cuDoubleComplex *) beta,  y->v_,y->pitch_);
  }    
  
  if (status == CUBLAS_STATUS_SUCCESS)  
--- a/cuda/dvectordev.c
+++ b/cuda/dvectordev.c
@ -220,7 +220,8 @@ int dotMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA, void* devM
  struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB;
  spgpuHandle_t handle=psb_cudaGetHandle();

-  spgpuDmdot(handle, y_res, n, (double*)devVecA->v_, (double*)devVecB->v_,devVecA->count_,devVecB->pitch_);
+  spgpuDmdot(handle, y_res, n, (double*)devVecA->v_, (double*)devVecB->v_,
+	     devVecA->count_,devVecB->pitch_);
  return(i);
 }

--- a/cuda/dvectordev.h
+++ b/cuda/dvectordev.h
@ -35,6 +35,7 @@
 #include "vectordev.h"
 #include "cuda_runtime.h"
 #include "core.h"
+#include "vector.h"

 int registerMappedDouble(void *, void **, int, double);
 int writeMultiVecDeviceDouble(void* deviceMultiVec, double* hostMultiVec);
--- a/cuda/fcusparse_fct.h
+++ b/cuda/fcusparse_fct.h
@ -439,7 +439,7 @@ int T_CSRGDeviceSetMatFillMode(T_Cmat *Matrix, int type)
  T_CSRGDeviceMat *cMat= Matrix->mat;
  cusparseFillMode_t  mode=type;

-  CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->spmvDescr,
+  CHECK_CUSPARSE(cusparseSpMatSetAttribute((*(cMat->spmvDescr)),
 					   CUSPARSE_SPMAT_FILL_MODE,
 					   (const void*) &mode,
 					   sizeof(cusparseFillMode_t)));
@ -450,7 +450,7 @@ int T_CSRGDeviceSetMatDiagType(T_Cmat *Matrix, int type)
 {
  T_CSRGDeviceMat *cMat= Matrix->mat;
  cusparseDiagType_t  cutype=type;
-  CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->spmvDescr,
+  CHECK_CUSPARSE(cusparseSpMatSetAttribute((*(cMat->spmvDescr)),
 					   CUSPARSE_SPMAT_DIAG_TYPE,
 					   (const void*) &cutype,
 					   sizeof(cusparseDiagType_t)));
--- a/cuda/impl/Makefile
+++ b/cuda/impl/Makefile
@ -295,3 +295,12 @@ lib:  objs

 clean:
 	/bin/rm -f $(OBJS)
+
+.c.o:
+	$(CC) $(CCOPT) $(CCUDEFINES) $(CINCLUDES) $(CDEFINES) -c $< -o $@
+.f90.o:
+	$(FC) $(FCOPT) $(FCUDEFINES) $(FINCLUDES) -c $< -o $@
+.F90.o:
+	$(FC) $(FCOPT) $(FCUDEFINES) $(FINCLUDES) $(FDEFINES) -c $< -o $@
+.cpp.o:
+	$(CXX) $(CXXOPT) $(CXXCUDEFINES) $(CXXINCLUDES) $(CXXDEFINES) -c $< -o $@
--- a/cuda/ivectordev.h
+++ b/cuda/ivectordev.h
@ -35,6 +35,7 @@
 #include "vectordev.h"
 #include "cuda_runtime.h"
 #include "core.h"
+#include "vector.h"

 int registerMappedInt(void *, void **, int, int);
 int writeMultiVecDeviceInt(void* deviceMultiVec, int* hostMultiVec);
--- a/cuda/psb_c_cuda_vect_mod.F90
+++ b/cuda/psb_c_cuda_vect_mod.F90
@ -813,18 +813,6 @@ contains
    call x%set_dev()
    
  end subroutine c_cuda_set_scal
-!!$
-!!$  subroutine c_cuda_set_vect(x,val)
-!!$    class(psb_c_vect_cuda), intent(inout) :: x
-!!$    complex(psb_spk_), intent(in)           :: val(:)
-!!$    integer(psb_ipk_) :: nr
-!!$    integer(psb_ipk_) :: info
-!!$
-!!$    if (x%is_dev()) call x%sync()
-!!$    call x%psb_c_base_vect_type%set_vect(val)
-!!$    call x%set_host()
-!!$
-!!$  end subroutine c_cuda_set_vect



@ -834,7 +822,6 @@ contains
    class(psb_c_base_vect_type), intent(inout) :: y
    integer(psb_ipk_), intent(in)              :: n
    complex(psb_spk_)                :: res
-    complex(psb_spk_), external      :: ddot
    integer(psb_ipk_) :: info
    
    res = czero
@ -844,9 +831,6 @@ contains
    !  TYPE psb_c_vect
    !
    select type(yy => y)
-    type is (psb_c_base_vect_type)
-      if (x%is_dev()) call x%sync()
-      res = ddot(n,x%v,1,yy%v,1)
    type is (psb_c_vect_cuda)
      if (x%is_host()) call x%sync()
      if (yy%is_host()) call yy%sync()
@ -858,7 +842,7 @@ contains

    class default
      ! y%sync is done in dot_a
-      call x%sync()      
+      if (x%is_dev()) call x%sync()
      res = y%dot(n,x%v)
    end select

@ -870,10 +854,10 @@ contains
    complex(psb_spk_), intent(in)           :: y(:)
    integer(psb_ipk_), intent(in)        :: n
    complex(psb_spk_)                :: res
-    complex(psb_spk_), external      :: ddot
+    complex(psb_spk_), external      :: cdot
    
    if (x%is_dev()) call x%sync()
-    res = ddot(n,y,1,x%v,1)
+    res = cdot(n,y,1,x%v,1)

  end function c_cuda_dot_a
    
@ -1393,14 +1377,14 @@ module psb_c_cuda_multivect_mod
  end type psb_c_multivect_cuda

  public  :: psb_c_multivect_cuda
-  private :: constructor
+  private :: mconstructor
  interface psb_c_multivect_cuda
-    module procedure constructor
+    module procedure mconstructor
  end interface

 contains
  
-  function constructor(x) result(this)
+  function mconstructor(x) result(this)
    complex(psb_spk_)       :: x(:,:)
    type(psb_c_multivect_cuda) :: this
    integer(psb_ipk_) :: info
@ -1408,7 +1392,7 @@ contains
    this%v = x
    call this%asb(size(x,1),size(x,2),info)

-  end function constructor
+  end function mconstructor
    

 !!$  subroutine c_cuda_multi_gthzv_x(i,n,idx,x,y)
--- a/cuda/psb_d_cuda_vect_mod.F90
+++ b/cuda/psb_d_cuda_vect_mod.F90
@ -813,18 +813,6 @@ contains
    call x%set_dev()
    
  end subroutine d_cuda_set_scal
-!!$
-!!$  subroutine d_cuda_set_vect(x,val)
-!!$    class(psb_d_vect_cuda), intent(inout) :: x
-!!$    real(psb_dpk_), intent(in)           :: val(:)
-!!$    integer(psb_ipk_) :: nr
-!!$    integer(psb_ipk_) :: info
-!!$
-!!$    if (x%is_dev()) call x%sync()
-!!$    call x%psb_d_base_vect_type%set_vect(val)
-!!$    call x%set_host()
-!!$
-!!$  end subroutine d_cuda_set_vect



@ -834,7 +822,6 @@ contains
    class(psb_d_base_vect_type), intent(inout) :: y
    integer(psb_ipk_), intent(in)              :: n
    real(psb_dpk_)                :: res
-    real(psb_dpk_), external      :: ddot
    integer(psb_ipk_) :: info
    
    res = dzero
@ -844,9 +831,6 @@ contains
    !  TYPE psb_d_vect
    !
    select type(yy => y)
-    type is (psb_d_base_vect_type)
-      if (x%is_dev()) call x%sync()
-      res = ddot(n,x%v,1,yy%v,1)
    type is (psb_d_vect_cuda)
      if (x%is_host()) call x%sync()
      if (yy%is_host()) call yy%sync()
@ -858,7 +842,7 @@ contains

    class default
      ! y%sync is done in dot_a
-      call x%sync()      
+      if (x%is_dev()) call x%sync()
      res = y%dot(n,x%v)
    end select

@ -1393,14 +1377,14 @@ module psb_d_cuda_multivect_mod
  end type psb_d_multivect_cuda

  public  :: psb_d_multivect_cuda
-  private :: constructor
+  private :: mconstructor
  interface psb_d_multivect_cuda
-    module procedure constructor
+    module procedure mconstructor
  end interface

 contains
  
-  function constructor(x) result(this)
+  function mconstructor(x) result(this)
    real(psb_dpk_)       :: x(:,:)
    type(psb_d_multivect_cuda) :: this
    integer(psb_ipk_) :: info
@ -1408,7 +1392,7 @@ contains
    this%v = x
    call this%asb(size(x,1),size(x,2),info)

-  end function constructor
+  end function mconstructor
    

 !!$  subroutine d_cuda_multi_gthzv_x(i,n,idx,x,y)
--- a/cuda/psb_i_cuda_vect_mod.F90
+++ b/cuda/psb_i_cuda_vect_mod.F90
@ -795,18 +795,6 @@ contains
    call x%set_dev()
    
  end subroutine i_cuda_set_scal
-!!$
-!!$  subroutine i_cuda_set_vect(x,val)
-!!$    class(psb_i_vect_cuda), intent(inout) :: x
-!!$    integer(psb_ipk_), intent(in)           :: val(:)
-!!$    integer(psb_ipk_) :: nr
-!!$    integer(psb_ipk_) :: info
-!!$
-!!$    if (x%is_dev()) call x%sync()
-!!$    call x%psb_i_base_vect_type%set_vect(val)
-!!$    call x%set_host()
-!!$
-!!$  end subroutine i_cuda_set_vect



@ -949,14 +937,14 @@ module psb_i_cuda_multivect_mod
  end type psb_i_multivect_cuda

  public  :: psb_i_multivect_cuda
-  private :: constructor
+  private :: mconstructor
  interface psb_i_multivect_cuda
-    module procedure constructor
+    module procedure mconstructor
  end interface

 contains
  
-  function constructor(x) result(this)
+  function mconstructor(x) result(this)
    integer(psb_ipk_)       :: x(:,:)
    type(psb_i_multivect_cuda) :: this
    integer(psb_ipk_) :: info
@ -964,7 +952,7 @@ contains
    this%v = x
    call this%asb(size(x,1),size(x,2),info)

-  end function constructor
+  end function mconstructor
    

 !!$  subroutine i_cuda_multi_gthzv_x(i,n,idx,x,y)
--- a/cuda/psb_s_cuda_vect_mod.F90
+++ b/cuda/psb_s_cuda_vect_mod.F90
@ -813,18 +813,6 @@ contains
    call x%set_dev()
    
  end subroutine s_cuda_set_scal
-!!$
-!!$  subroutine s_cuda_set_vect(x,val)
-!!$    class(psb_s_vect_cuda), intent(inout) :: x
-!!$    real(psb_spk_), intent(in)           :: val(:)
-!!$    integer(psb_ipk_) :: nr
-!!$    integer(psb_ipk_) :: info
-!!$
-!!$    if (x%is_dev()) call x%sync()
-!!$    call x%psb_s_base_vect_type%set_vect(val)
-!!$    call x%set_host()
-!!$
-!!$  end subroutine s_cuda_set_vect



@ -834,7 +822,6 @@ contains
    class(psb_s_base_vect_type), intent(inout) :: y
    integer(psb_ipk_), intent(in)              :: n
    real(psb_spk_)                :: res
-    real(psb_spk_), external      :: ddot
    integer(psb_ipk_) :: info
    
    res = szero
@ -844,9 +831,6 @@ contains
    !  TYPE psb_s_vect
    !
    select type(yy => y)
-    type is (psb_s_base_vect_type)
-      if (x%is_dev()) call x%sync()
-      res = ddot(n,x%v,1,yy%v,1)
    type is (psb_s_vect_cuda)
      if (x%is_host()) call x%sync()
      if (yy%is_host()) call yy%sync()
@ -858,7 +842,7 @@ contains

    class default
      ! y%sync is done in dot_a
-      call x%sync()      
+      if (x%is_dev()) call x%sync()
      res = y%dot(n,x%v)
    end select

@ -870,10 +854,10 @@ contains
    real(psb_spk_), intent(in)           :: y(:)
    integer(psb_ipk_), intent(in)        :: n
    real(psb_spk_)                :: res
-    real(psb_spk_), external      :: ddot
+    real(psb_spk_), external      :: sdot
    
    if (x%is_dev()) call x%sync()
-    res = ddot(n,y,1,x%v,1)
+    res = sdot(n,y,1,x%v,1)

  end function s_cuda_dot_a
    
@ -1393,14 +1377,14 @@ module psb_s_cuda_multivect_mod
  end type psb_s_multivect_cuda

  public  :: psb_s_multivect_cuda
-  private :: constructor
+  private :: mconstructor
  interface psb_s_multivect_cuda
-    module procedure constructor
+    module procedure mconstructor
  end interface

 contains
  
-  function constructor(x) result(this)
+  function mconstructor(x) result(this)
    real(psb_spk_)       :: x(:,:)
    type(psb_s_multivect_cuda) :: this
    integer(psb_ipk_) :: info
@ -1408,7 +1392,7 @@ contains
    this%v = x
    call this%asb(size(x,1),size(x,2),info)

-  end function constructor
+  end function mconstructor
    

 !!$  subroutine s_cuda_multi_gthzv_x(i,n,idx,x,y)
--- a/cuda/psb_z_cuda_vect_mod.F90
+++ b/cuda/psb_z_cuda_vect_mod.F90
@ -813,18 +813,6 @@ contains
    call x%set_dev()
    
  end subroutine z_cuda_set_scal
-!!$
-!!$  subroutine z_cuda_set_vect(x,val)
-!!$    class(psb_z_vect_cuda), intent(inout) :: x
-!!$    complex(psb_dpk_), intent(in)           :: val(:)
-!!$    integer(psb_ipk_) :: nr
-!!$    integer(psb_ipk_) :: info
-!!$
-!!$    if (x%is_dev()) call x%sync()
-!!$    call x%psb_z_base_vect_type%set_vect(val)
-!!$    call x%set_host()
-!!$
-!!$  end subroutine z_cuda_set_vect



@ -834,7 +822,6 @@ contains
    class(psb_z_base_vect_type), intent(inout) :: y
    integer(psb_ipk_), intent(in)              :: n
    complex(psb_dpk_)                :: res
-    complex(psb_dpk_), external      :: ddot
    integer(psb_ipk_) :: info
    
    res = zzero
@ -844,9 +831,6 @@ contains
    !  TYPE psb_z_vect
    !
    select type(yy => y)
-    type is (psb_z_base_vect_type)
-      if (x%is_dev()) call x%sync()
-      res = ddot(n,x%v,1,yy%v,1)
    type is (psb_z_vect_cuda)
      if (x%is_host()) call x%sync()
      if (yy%is_host()) call yy%sync()
@ -858,7 +842,7 @@ contains

    class default
      ! y%sync is done in dot_a
-      call x%sync()      
+      if (x%is_dev()) call x%sync()
      res = y%dot(n,x%v)
    end select

@ -870,10 +854,10 @@ contains
    complex(psb_dpk_), intent(in)           :: y(:)
    integer(psb_ipk_), intent(in)        :: n
    complex(psb_dpk_)                :: res
-    complex(psb_dpk_), external      :: ddot
+    complex(psb_dpk_), external      :: zdot
    
    if (x%is_dev()) call x%sync()
-    res = ddot(n,y,1,x%v,1)
+    res = zdot(n,y,1,x%v,1)

  end function z_cuda_dot_a
    
@ -1393,14 +1377,14 @@ module psb_z_cuda_multivect_mod
  end type psb_z_multivect_cuda

  public  :: psb_z_multivect_cuda
-  private :: constructor
+  private :: mconstructor
  interface psb_z_multivect_cuda
-    module procedure constructor
+    module procedure mconstructor
  end interface

 contains
  
-  function constructor(x) result(this)
+  function mconstructor(x) result(this)
    complex(psb_dpk_)       :: x(:,:)
    type(psb_z_multivect_cuda) :: this
    integer(psb_ipk_) :: info
@ -1408,7 +1392,7 @@ contains
    this%v = x
    call this%asb(size(x,1),size(x,2),info)

-  end function constructor
+  end function mconstructor
    

 !!$  subroutine z_cuda_multi_gthzv_x(i,n,idx,x,y)
--- a/cuda/spgpu/kernels/sdot.cu
+++ b/cuda/spgpu/kernels/sdot.cu
@ -96,7 +96,6 @@ __global__ void spgpuSdot_kern(int n, float* x, float* y)
 	{
 #endif	

-
 #ifdef ASSUME_LOCK_SYNC_PARALLELISM
 		volatile float* vsSum = sSum;
 		vsSum[threadIdx.x] = res;
--- a/cuda/svectordev.c
+++ b/cuda/svectordev.c
@ -220,7 +220,8 @@ int dotMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA, void* devMul
  struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB;
  spgpuHandle_t handle=psb_cudaGetHandle();

-  spgpuSmdot(handle, y_res, n, (float*)devVecA->v_, (float*)devVecB->v_,devVecA->count_,devVecB->pitch_);
+  spgpuSmdot(handle, y_res, n, (float*)devVecA->v_, (float*)devVecB->v_,
+	     devVecA->count_,devVecB->pitch_);
  return(i);
 }

--- a/cuda/svectordev.h
+++ b/cuda/svectordev.h
@ -35,6 +35,7 @@
 #include "vectordev.h"
 #include "cuda_runtime.h"
 #include "core.h"
+#include "vector.h"

 int registerMappedFloat(void *, void **, int, float);
 int writeMultiVecDeviceFloat(void* deviceMultiVec, float* hostMultiVec);
--- a/cuda/vectordev.h
+++ b/cuda/vectordev.h
@ -34,6 +34,7 @@
 #include "cuda_runtime.h"
 //#include "common.h"
 //#include "cintrf.h"
+#include "cuda_util.h"
 #include <complex.h>

 struct MultiVectDevice
--- a/cuda/zvectordev.c
+++ b/cuda/zvectordev.c
@ -183,7 +183,7 @@ int iscatMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n,
 }


-int nrm2MultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA)
+int nrm2MultiVecDeviceDoubleComplex(double* y_res, int n, void* devMultiVecA)
 { int i=0;
  spgpuHandle_t handle=psb_cudaGetHandle();
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
@ -192,7 +192,7 @@ int nrm2MultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMult
  return(i);
 }

-int amaxMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA)
+int amaxMultiVecDeviceDoubleComplex(double* y_res, int n, void* devMultiVecA)
 { int i=0;
  spgpuHandle_t handle=psb_cudaGetHandle();
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
@ -202,7 +202,7 @@ int amaxMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMult
  return(i);
 }

-int asumMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA)
+int asumMultiVecDeviceDoubleComplex(double* y_res, int n, void* devMultiVecA)
 { int i=0;
  spgpuHandle_t handle=psb_cudaGetHandle();
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
@ -223,7 +223,8 @@ int scalMultiVecDeviceDoubleComplex(cuDoubleComplex alpha, void* devMultiVecA)
  return(i);
 }

-int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA, void* devMultiVecB)
+int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n,
+				   void* devMultiVecA, void* devMultiVecB)
 {int i=0;
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
  struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB;
--- a/cuda/zvectordev.h
+++ b/cuda/zvectordev.h
@ -37,6 +37,7 @@
 #include "vectordev.h"
 #include "cuda_runtime.h"
 #include "core.h"
+#include "vector.h"

 int registerMappedDoubleComplex(void *, void **, int, cuDoubleComplex);
 int writeMultiVecDeviceDoubleComplex(void* deviceMultiVec, cuDoubleComplex* hostMultiVec);
@ -69,9 +70,9 @@ int iscatMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n,
 				     int indexBase, cuDoubleComplex beta);

 int scalMultiVecDeviceDoubleComplex(cuDoubleComplex alpha, void* devMultiVecA);
-int nrm2MultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devVecA);
-int amaxMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devVecA);
-int asumMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devVecA);
+int nrm2MultiVecDeviceDoubleComplex(double* y_res, int n, void* devVecA);
+int amaxMultiVecDeviceDoubleComplex(double* y_res, int n, void* devVecA);
+int asumMultiVecDeviceDoubleComplex(double* y_res, int n, void* devVecA);
 int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, 
 				   void* devVecA, void* devVecB);

--- a/docs/Makefile
+++ b/docs/Makefile
@ -1,7 +1,7 @@
 all: guide 

 guide:
-	cd src && $(MAKE)
+	cd src && $(MAKE)  clean all 

 doxy: 
 	doxygen doxypsb
--- a/docs/doxypsb
+++ b/docs/doxypsb
@ -52,7 +52,7 @@ PROJECT_LOGO           =
 # If a relative path is entered, it will be relative to the location
 # where doxygen was started. If left blank the current directory will be used.

-OUTPUT_DIRECTORY       = ../../psblas-3.4-doxygen
+OUTPUT_DIRECTORY       = ../../psblas-3.9-doxygen

 # If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
 # 4096 sub-directories (in 2 levels) under the output directory of each output
--- a/docs/html/dia-.png
+++ b/docs/html/dia-.png
--- a/docs/html/ell-.png
+++ b/docs/html/ell-.png
--- a/docs/html/hdia-.png
+++ b/docs/html/hdia-.png
--- a/docs/html/hll-.png
+++ b/docs/html/hll-.png
--- a/docs/html/index.html
+++ b/docs/html/index.html
@ -10,16 +10,16 @@
 <link rel="stylesheet" type="text/css" href="userhtml.css"> 
 </head><body 
 >
-<!--l. 91--><p class="noindent" ><span 
-class="cmbx-12x-x-144">PSBLAS</span><br 
+<!--l. 99--><p class="noindent" ><span 
+class="pplb7t-x-x-172">PSBLAS</span><br 
 class="newline" /> <span 
-class="cmbx-12x-x-144">User&#8217;s and Reference Guide</span><br 
+class="pplb7t-x-x-172">User&#8217;s and Reference Guide</span><br 
 class="newline" /> <span 
-class="cmti-12">A reference guide for the Parallel Sparse BLAS library</span><br 
+class="pplri7t-x-x-120">A reference guide for the Parallel Sparse BLAS library</span><br 
 class="newline" /> <span 
-class="cmbx-10">Salvatore Filippone</span><br 
+class="pplb7t-">Salvatore Filippone</span><br 
 class="newline" /><span 
-class="cmbx-10">Alfredo Buttari  </span><br 
+class="pplb7t-">Alfredo Buttari  </span><br 
 class="newline" />Software version: 3.9.0<br 
 class="newline" />Aug 1st, 2024
                                                                  
@ -52,13 +52,13 @@ href="userhtmlse9.html#x14-1280009" id="QQ2-14-158">Utilities</a></span>
 <br />   &#x00A0;<span class="sectionToc" >10 <a 
 href="userhtmlse10.html#x15-13500010" id="QQ2-15-165">Preconditioner routines</a></span>
 <br />   &#x00A0;<span class="sectionToc" >11 <a 
-href="userhtmlse11.html#x17-14200011" id="QQ2-17-172">Iterative Methods</a></span>
+href="userhtmlse11.html#x17-14300011" id="QQ2-17-174">Iterative Methods</a></span>
 <br />   &#x00A0;<span class="sectionToc" >12 <a 
-href="userhtmlse12.html#x19-14400012" id="QQ2-19-174">Extensions</a></span>
+href="userhtmlse12.html#x19-14600012" id="QQ2-19-177">Extensions</a></span>
 <br />   &#x00A0;<span class="sectionToc" >13 <a 
-href="userhtmlse13.html#x20-15300013" id="QQ2-20-189">CUDA Environment Routines</a></span>
+href="userhtmlse13.html#x20-15500013" id="QQ2-20-192">CUDA Environment Routines</a></span>
 <br />   &#x00A0;<span class="likesectionToc" ><a 
-href="userhtmlli2.html#x21-168000" id="QQ2-21-218">References</a></span>
+href="userhtmlli2.html#x21-170000" id="QQ2-21-221">References</a></span>
   </div>


--- a/docs/html/mat-.png
+++ b/docs/html/mat-.png
--- a/docs/html/userhtml.css
+++ b/docs/html/userhtml.css
@ -1,33 +1,62 @@
 
 /* start css.sty */
-.cmr-7{font-size:70%;}
-.cmmi-5{font-size:50%;font-style: italic;}
-.cmmi-7{font-size:70%;font-style: italic;}
-.cmmi-10{font-style: italic;}
-.cmsy-7{font-size:70%;}
-.cmbx-12x-x-144{font-size:172%; font-weight: bold;}
-.cmbx-12x-x-144{ font-weight: bold;}
-.cmbx-12x-x-144{ font-weight: bold;}
-.cmti-10{ font-style: italic;}
-.cmti-12{font-size:120%; font-style: italic;}
-.cmbx-10{ font-weight: bold;}
-.cmbx-10{ font-weight: bold;}
-.cmbx-10{ font-weight: bold;}
+.pplb7t-x-x-172{font-size:172%;font-weight: bold;}
+.pplb7t-x-x-172{font-weight: bold;}
+.pplb7t-x-x-172{font-weight: bold;}
+.pplri7t-{font-style: italic;}
+.pplri7t-{font-style: italic;}
+.pplri7t-x-x-120{font-size:120%;font-style: italic;}
+.pplri7t-x-x-120{font-style: italic;}
+.pplb7t-{font-weight: bold;}
+.pplb7t-{font-weight: bold;}
+.pplb7t-{font-weight: bold;}
 .cmtt-10{font-family: monospace,monospace;}
 .cmtt-10{font-family: monospace,monospace;}
 .cmtt-10{font-family: monospace,monospace;}
-.cmr-9{font-size:90%;}
-.cmr-8{font-size:80%;}
-.cmbx-12{font-size:120%; font-weight: bold;}
-.cmbx-12{ font-weight: bold;}
-.cmbx-12{ font-weight: bold;}
+.pplr7t-x-x-76{font-size:76%;}
+.zplmr7m-{font-style: italic;}
+.zplmr7m-{font-style: italic;}
+.zplmr7m-{font-style: italic;}
+.zplmr7m-{font-style: italic;}
+.zplmr7m-{font-style: italic;}
+.zplmr7m-x-x-76{font-size:76%;font-style: italic;}
+.zplmr7m-x-x-76{font-style: italic;}
+.zplmr7m-x-x-76{font-style: italic;}
+.zplmr7m-x-x-76{font-style: italic;}
+.zplmr7m-x-x-76{font-style: italic;}
+.zplmr7m-x-x-60{font-size:60%;font-style: italic;}
+.zplmr7m-x-x-60{font-style: italic;}
+.zplmr7m-x-x-60{font-style: italic;}
+.zplmr7m-x-x-60{font-style: italic;}
+.zplmr7m-x-x-60{font-style: italic;}
+.zplmr7y-x-x-76{font-size:76%;}
+.zplmr7t-x-x-76{font-size:76%;}
+.pplr7t-x-x-90{font-size:90%;}
+.pplr7t-x-x-80{font-size:80%;}
+.pplb7t-x-x-120{font-size:120%;font-weight: bold;}
+.pplb7t-x-x-120{font-weight: bold;}
+.pplb7t-x-x-120{font-weight: bold;}
 .cmtt-8{font-size:80%;font-family: monospace,monospace;}
 .cmtt-8{font-family: monospace,monospace;}
 .cmtt-8{font-family: monospace,monospace;}
 .cmtt-9{font-size:90%;font-family: monospace,monospace;}
 .cmtt-9{font-family: monospace,monospace;}
 .cmtt-9{font-family: monospace,monospace;}
-.cmmi-8{font-size:80%;font-style: italic;}
+.pplr7t-x-x-70{font-size:70%;}
+.zplmr7m-x-x-90{font-size:90%;font-style: italic;}
+.zplmr7m-x-x-90{font-style: italic;}
+.zplmr7m-x-x-90{font-style: italic;}
+.zplmr7m-x-x-90{font-style: italic;}
+.zplmr7m-x-x-90{font-style: italic;}
+.zplmr7y-x-x-90{font-size:90%;}
+.zplmr7m-x-x-80{font-size:80%;font-style: italic;}
+.zplmr7m-x-x-80{font-style: italic;}
+.zplmr7m-x-x-80{font-style: italic;}
+.zplmr7m-x-x-80{font-style: italic;}
+.zplmr7m-x-x-80{font-style: italic;}
+.zplmr7t-x-x-80{font-size:80%;}
+.pplrc7t-x-x-90{font-size:90%;}
+.small-caps{font-variant: small-caps; }
 p{margin-top:0;margin-bottom:0}
 p.indent{text-indent:0;}
 p + p{margin-top:1em;}
@ -158,5 +187,11 @@ pre.listings{font-family: monospace,monospace; white-space: pre-wrap; margin-top
 pre.lstlisting{font-family: monospace,monospace; white-space: pre-wrap; margin-top:0.5em; margin-bottom:0.5em; }
 pre.lstinputlisting{ font-family: monospace,monospace; white-space: pre-wrap; }
 .lstinputlisting .label{margin-right:0.5em;}
+#TBL-24-1{border-left: 1px solid black;}
+#TBL-24-1{border-right:1px solid black;}
+#TBL-24-2{border-right:1px solid black;}
+#TBL-24-3{border-right:1px solid black;}
+#TBL-24-4{border-right:1px solid black;}
+#TBL-24-5{border-right:1px solid black;}
 /* end css.sty */

--- a/docs/html/userhtml.html
+++ b/docs/html/userhtml.html
@ -10,16 +10,16 @@
 <link rel="stylesheet" type="text/css" href="userhtml.css"> 
 </head><body 
 >
-<!--l. 91--><p class="noindent" ><span 
-class="cmbx-12x-x-144">PSBLAS</span><br 
+<!--l. 99--><p class="noindent" ><span 
+class="pplb7t-x-x-172">PSBLAS</span><br 
 class="newline" /> <span 
-class="cmbx-12x-x-144">User&#8217;s and Reference Guide</span><br 
+class="pplb7t-x-x-172">User&#8217;s and Reference Guide</span><br 
 class="newline" /> <span 
-class="cmti-12">A reference guide for the Parallel Sparse BLAS library</span><br 
+class="pplri7t-x-x-120">A reference guide for the Parallel Sparse BLAS library</span><br 
 class="newline" /> <span 
-class="cmbx-10">Salvatore Filippone</span><br 
+class="pplb7t-">Salvatore Filippone</span><br 
 class="newline" /><span 
-class="cmbx-10">Alfredo Buttari  </span><br 
+class="pplb7t-">Alfredo Buttari  </span><br 
 class="newline" />Software version: 3.9.0<br 
 class="newline" />Aug 1st, 2024
                                                                  
@ -52,13 +52,13 @@ href="userhtmlse9.html#x14-1280009" id="QQ2-14-158">Utilities</a></span>
 <br />   &#x00A0;<span class="sectionToc" >10 <a 
 href="userhtmlse10.html#x15-13500010" id="QQ2-15-165">Preconditioner routines</a></span>
 <br />   &#x00A0;<span class="sectionToc" >11 <a 
-href="userhtmlse11.html#x17-14200011" id="QQ2-17-172">Iterative Methods</a></span>
+href="userhtmlse11.html#x17-14300011" id="QQ2-17-174">Iterative Methods</a></span>
 <br />   &#x00A0;<span class="sectionToc" >12 <a 
-href="userhtmlse12.html#x19-14400012" id="QQ2-19-174">Extensions</a></span>
+href="userhtmlse12.html#x19-14600012" id="QQ2-19-177">Extensions</a></span>
 <br />   &#x00A0;<span class="sectionToc" >13 <a 
-href="userhtmlse13.html#x20-15300013" id="QQ2-20-189">CUDA Environment Routines</a></span>
+href="userhtmlse13.html#x20-15500013" id="QQ2-20-192">CUDA Environment Routines</a></span>
 <br />   &#x00A0;<span class="likesectionToc" ><a 
-href="userhtmlli2.html#x21-168000" id="QQ2-21-218">References</a></span>
+href="userhtmlli2.html#x21-170000" id="QQ2-21-221">References</a></span>
   </div>


--- a/docs/html/userhtml0x.png
+++ b/docs/html/userhtml0x.png
--- a/docs/html/userhtml10x.png
+++ b/docs/html/userhtml10x.png
--- a/docs/html/userhtml11x.png
+++ b/docs/html/userhtml11x.png
--- a/docs/html/userhtml12x.png
+++ b/docs/html/userhtml12x.png
--- a/docs/html/userhtml13x.png
+++ b/docs/html/userhtml13x.png
--- a/docs/html/userhtml14x.png
+++ b/docs/html/userhtml14x.png
--- a/docs/html/userhtml15x.png
+++ b/docs/html/userhtml15x.png
--- a/docs/html/userhtml16.html
+++ b/docs/html/userhtml16.html
@ -14,6 +14,6 @@
  <!--l. 72--><p class="indent" >       <span class="footnote-mark"><a 
 id="fn4x0"><a 
 id="x16-136002x10.1"></a>    <sup class="textsuperscript">4</sup></a></span><span 
-class="cmr-8">The string is case-insensitive</span></div>
+class="pplr7t-x-x-80">The string is case-insensitive</span></div>
       
 </body></html> 
--- a/docs/html/userhtml16x.png
+++ b/docs/html/userhtml16x.png
--- a/docs/html/userhtml17x.png
+++ b/docs/html/userhtml17x.png
--- a/docs/html/userhtml18.html
+++ b/docs/html/userhtml18.html
@ -13,8 +13,11 @@
  <div class="footnote-text">
  <!--l. 53--><p class="noindent" ><span class="footnote-mark"><a 
 id="fn5x0"><a 
- id="x18-143004x11.1"></a>   <sup class="textsuperscript">5</sup></a></span><span 
-class="cmr-8">Note: the implementation is for </span><span 
-class="cmmi-8">FCG</span><span 
-class="cmr-8">(1).</span></div> 
+ id="x18-144004x11.1"></a>    <sup class="textsuperscript">5</sup></a></span><span 
+class="pplr7t-x-x-80">Note: the implementation is for </span><span 
+class="zplmr7m-x-x-80">FCG</span><span 
+class="zplmr7t-x-x-80">(</span><span 
+class="pplr7t-x-x-80">1</span><span 
+class="zplmr7t-x-x-80">)</span><span 
+class="pplr7t-x-x-80">.</span></div> 
 </body></html> 
--- a/docs/html/userhtml18x.png
+++ b/docs/html/userhtml18x.png
--- a/docs/html/userhtml19x.png
+++ b/docs/html/userhtml19x.png
--- a/docs/html/userhtml1x.png
+++ b/docs/html/userhtml1x.png
--- a/docs/html/userhtml20x.png
+++ b/docs/html/userhtml20x.png
--- a/docs/html/userhtml21x.png
+++ b/docs/html/userhtml21x.png
--- a/docs/html/userhtml22x.png
+++ b/docs/html/userhtml22x.png
--- a/docs/html/userhtml23x.png
+++ b/docs/html/userhtml23x.png
--- a/docs/html/userhtml24x.png
+++ b/docs/html/userhtml24x.png
--- a/docs/html/userhtml25x.png
+++ b/docs/html/userhtml25x.png
--- a/docs/html/userhtml26x.png
+++ b/docs/html/userhtml26x.png
--- a/docs/html/userhtml27x.png
+++ b/docs/html/userhtml27x.png
--- a/docs/html/userhtml28x.png
+++ b/docs/html/userhtml28x.png
--- a/docs/html/userhtml29x.png
+++ b/docs/html/userhtml29x.png
--- a/docs/html/userhtml2x.png
+++ b/docs/html/userhtml2x.png
--- a/docs/html/userhtml30x.png
+++ b/docs/html/userhtml30x.png
--- a/docs/html/userhtml31x.png
+++ b/docs/html/userhtml31x.png
--- a/docs/html/userhtml32x.png
+++ b/docs/html/userhtml32x.png
--- a/docs/html/userhtml33x.png
+++ b/docs/html/userhtml33x.png
--- a/docs/html/userhtml34x.png
+++ b/docs/html/userhtml34x.png
--- a/docs/html/userhtml35x.png
+++ b/docs/html/userhtml35x.png
--- a/docs/html/userhtml36x.png
+++ b/docs/html/userhtml36x.png
--- a/docs/html/userhtml3x.png
+++ b/docs/html/userhtml3x.png
--- a/docs/html/userhtml4x.png
+++ b/docs/html/userhtml4x.png
--- a/Show More
+++ b/Show More