Merge branch 'cmake2' into cmake

2 months ago · ef71a32484
parent 5b81cbac12 61976812be
commit ef71a32484
375 changed files with 40725 additions and 19657 deletions
--- a/.VERSION
+++ b/.VERSION
@ -0,0 +1,12 @@
 $Format:%d%n%n$
 # Fall back version, probably last release:
 3.9.0
 # PSBLAS version file.
 #
 # Release archive created from commit:
 # $Format:%H %d$
 # $Format:Created on %ci by %cN, and$
 # $Format:signed by %GS using %GK.$
 # $Format:Signature status: %G?$
 $Format:%GG$
--- a/.gitignore
+++ b/.gitignore
@ -1,6 +1,7 @@
 *.a
 *.o
 *.mod
 *.smod
 *~
 # header files generated
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -430,7 +430,11 @@ endif()
 #---------------------------------------
 # Link order, left to right:
 <<<<<<< HEAD
 # cbind.a, util.a krylov.a prec.a base.a
 =======
 # cbind.a, util.a linsolve.a prec.a base.a
 >>>>>>> cmake2
 include(${CMAKE_CURRENT_LIST_DIR}/base/CMakeLists.txt)
@ -515,6 +519,7 @@ target_include_directories(prec PUBLIC
  $<INSTALL_INTERFACE:modules>)
 target_link_libraries(prec PUBLIC base)
 <<<<<<< HEAD
 include(${CMAKE_CURRENT_LIST_DIR}/krylov/CMakeLists.txt)
 add_library(krylov ${krylov_source_files})
 set_target_properties(krylov
@ -528,6 +533,21 @@ target_include_directories(krylov PUBLIC
  $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/modules>
  $<INSTALL_INTERFACE:modules>)
 target_link_libraries(krylov PUBLIC base prec)
 =======
 include(${CMAKE_CURRENT_LIST_DIR}/linsolve/CMakeLists.txt)
 add_library(linsolve ${linsolve_source_files})
 set_target_properties(linsolve
  PROPERTIES
  Fortran_MODULE_DIRECTORY "${CMAKE_BINARY_DIR}/modules"
  POSITION_INDEPENDENT_CODE TRUE
  OUTPUT_NAME psb_linsolve
  LINKER_LANGUAGE Fortran
  )
 target_include_directories(linsolve PUBLIC
  $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/modules>
  $<INSTALL_INTERFACE:modules>)
 target_link_libraries(linsolve PUBLIC base prec)
 >>>>>>> cmake2
 include(${CMAKE_CURRENT_LIST_DIR}/ext/CMakeLists.txt)
@ -644,7 +664,11 @@ target_include_directories(cbind PUBLIC
  $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/modules>  # Path for building
  $<INSTALL_INTERFACE:modules>  # Path for installation
 )
 <<<<<<< HEAD
 target_link_libraries(cbind PUBLIC base prec krylov ext util)
 =======
 target_link_libraries(cbind PUBLIC base prec linsolve ext util)
 >>>>>>> cmake2
 # Custom command to copy all header files
@ -706,13 +730,21 @@ if(MPI_FOUND)
  endif()
 <<<<<<< HEAD
  foreach(lib base prec krylov ext util cbind)
 =======
  foreach(lib base prec linsolve ext util cbind)
 >>>>>>> cmake2
    target_link_libraries(${lib} PUBLIC ${MPI_C_LIBRARIES} ${MPI_Fortran_LIBRARIES})
  endforeach()
 endif()
 if(OpenCoarrays_FOUND)
 <<<<<<< HEAD
  foreach(lib base prec krylov ext util cbind)  #TODO: check if cbind goes here!
 =======
  foreach(lib base prec linsolve ext util cbind)  #TODO: check if cbind goes here!
 >>>>>>> cmake2
    target_link_libraries(${lib} PUBLIC OpenCoarrays::caf_mpi_static)
  endforeach()
 endif()
@ -731,7 +763,11 @@ install(DIRECTORY "${CMAKE_BINARY_DIR}/include" DESTINATION "include"
 install(DIRECTORY "${CMAKE_BINARY_DIR}/modules" DESTINATION "modules"
  FILES_MATCHING PATTERN "*.mod")
 <<<<<<< HEAD
 install(TARGETS base prec krylov ext util cbind
 =======
 install(TARGETS base prec linsolve ext util cbind
 >>>>>>> cmake2
  EXPORT ${CMAKE_PROJECT_NAME}-targets
  DESTINATION "${CMAKE_INSTALL_LIBDIR}"
  LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}"
--- a/Make.inc.in
+++ b/Make.inc.in
@ -67,6 +67,12 @@ UTILMODNAME=@UTILMODNAME@
 CBINDLIBNAME=libpsb_cbind.a
 OACCD=@OACCD@
 OACCLD=@OACCLD@
 FCOPENACC=@FCOPENACC@
 CCOPENACC=@CCOPENACC@
 CXXOPENACC=@CXXOPENACC@	
 CUDAD=@CUDAD@
 CUDALD=@CUDALD@
 LCUDA=@LCUDA@
@ -74,14 +80,18 @@ LCUDA=@LCUDA@
 SPGPU_LIBS=@SPGPU_LIBS@
 CUDA_DIR=@CUDA_DIR@
 CUDA_DEFINES=@CUDA_DEFINES@
 CUDA_INCLUDES=@CUDA_INCLUDES@
 CUDA_LIBS=@CUDA_LIBS@
 CUDA_VERSION=@CUDA_VERSION@
 CUDA_SHORT_VERSION=@CUDA_SHORT_VERSION@
 CUDA_DEFINES=@CUDA_DEFINES@
 FCUDEFINES=@FCUDEFINES@
 CCUDEFINES=@CCUDEFINES@
 CXXCUDEFINES=@CXXCUDEFINES@
 NVCC=@CUDA_NVCC@
 CUDEFINES=@CUDEFINES@
 .SUFFIXES: .cu
 .cu.o:
 	$(NVCC)   $(CINCLUDES) $(CDEFINES) $(CUDEFINES) -c $<
--- a/25
+++ b/25
@ -1,6 +1,6 @@
 include Make.inc
-all: dirs based precd kryld utild cbindd extd  $(CUDAD) libd
+all: dirs based precd linslvd utild cbindd extd  $(CUDAD) $(OACCD) libd
 	@echo "====================================="
 	@echo "PSBLAS libraries Compilation Successful."
@ -11,28 +11,31 @@ dirs:
 precd: based
 utild: based	
-kryld: precd 
+linslvd: precd 
 extd:  based
 cudad:  extd
-cbindd: based precd kryld utild 
+oaccd:  extd	
 cbindd: based precd linslvd utild 
-libd: based precd kryld utild cbindd extd $(CUDALD)
+libd: based precd linslvd utild cbindd extd $(CUDALD) $(OACCLD)
 	$(MAKE) -C base lib
 	$(MAKE) -C prec lib
-	$(MAKE) -C krylov lib
+	$(MAKE) -C linsolve lib
 	$(MAKE) -C util lib 
 	$(MAKE) -C cbind lib
 	$(MAKE) -C ext lib
 cudald:  cudad
 	$(MAKE) -C cuda lib
 oaccld:  oaccd
 	$(MAKE) -C openacc lib
 based:
 	$(MAKE) -C base objs
 precd:
 	$(MAKE) -C prec objs
-kryld:
+linslvd:
-	$(MAKE) -C krylov objs
+	$(MAKE) -C linsolve objs
 utild:
 	$(MAKE) -C util objs 
 cbindd:
@ -41,6 +44,8 @@ extd:   based
 	$(MAKE) -C ext objs
 cudad:   based extd
 	$(MAKE) -C cuda objs
 oaccd:   based extd
 	$(MAKE) -C openacc objs 
 install: all
@ -62,11 +67,12 @@ install: all
 clean: 
 	$(MAKE) -C base clean
 	$(MAKE) -C prec clean 
-	$(MAKE) -C krylov clean
+	$(MAKE) -C linsolve clean
 	$(MAKE) -C util clean
 	$(MAKE) -C cbind clean
 	$(MAKE) -C ext clean
 	$(MAKE) -C cuda clean
 	$(MAKE) -C openacc clean
 check: all
 	make check -C test/serial
@ -79,11 +85,12 @@ cleanlib:
 veryclean: cleanlib
 	cd base && $(MAKE) veryclean
 	cd prec && $(MAKE) veryclean 
-	cd krylov && $(MAKE) veryclean
+	cd linsolve && $(MAKE) veryclean
 	cd util && $(MAKE) veryclean
 	cd cbind && $(MAKE) veryclean
 	cd ext && $(MAKE) veryclean
 	cd cuda && $(MAKE) veryclean
 	cd openacc && $(MAKE) veryclean
 	cd test/fileread && $(MAKE) clean
 	cd test/pargen && $(MAKE) clean
 	cd test/util && $(MAKE) clean
--- a/README.md
+++ b/README.md
@ -1,37 +1,42 @@
-PSBLAS library, version 3.9
+# PSBLAS library, version 3.9
 ===========================
 The architecture of the Fortran 2003 sparse BLAS is described in:
 >S. Filippone, A. Buttari. Object-Oriented Techniques for Sparse Matrix
 >Computations in Fortran 2003, ACM Trans. on Math. Software, vol. 38, No.
 4, 2012.
-The ideas are explored further with the paper:
+The PSBLAS library, developed with the aim to facilitate the parallelization of computationally intensive scientific applications, is designed to address parallel implementation of iterative solvers for sparse linear systems through the distributed memory paradigm. It includes routines for multiplying sparse matrices by dense matrices, solving block diagonal systems with triangular diagonal entries, preprocessing sparse matrices, and contains additional routines for dense matrix operations. The current implementation of PSBLAS addresses a distributed memory execution model operating with message passing.
 >V. Cardellini, S. Filippone and D. Rouson. Design Patterns for
 >sparse-matrix computations on hybrid CPU/GPU platforms, Scientific 
 >Programming, 22(2014), pp.1-19.
-Version 1.0 of the library is described in:
+The PSBLAS library version 3 is implemented in the Fortran 2008 programming language, with reuse and/or adaptation of existing Fortran 77 and Fortran 95 software, plus a handful of C routines. 
 >S. Filippone, M. Colajanni. PSBLAS: A library for parallel linear
 >algebra computation on sparse matrices, ACM Trans. on Math. Software,
 >26(4), Dec. 2000, pp. 527-550.
 ## References
 UTILITIES
 ---------
 The `test/util` directory contains some utilities to convert to/from
 Harwell-Boeing and MatrixMarket file formats.
 The architecture, philosophy and implementation details of the library are contained in the following papers:
-DOCUMENTATION
+- The architecture of the Fortran 2003 sparse BLAS is described in:
-------------
+  >S. Filippone, A. Buttari. Object-Oriented Techniques for Sparse Matrix
-See docs/psblas-3.9.pdf; an HTML version of the same document is
+  >Computations in Fortran 2003, ACM Trans. on Math. Software, vol. 38, No.
-available in docs/html. Please consult the sample programs, especially
+  4, 2012.
-test/pargen/psb_[sd]_pde[23]d.f90
+
 - The ideas are explored further with the paper:
  >V. Cardellini, S. Filippone and D. Rouson. Design Patterns for
  >sparse-matrix computations on hybrid CPU/GPU platforms, Scientific 
  >Programming, 22(2014), pp.1-19.
 - Version 1.0 of the library is described in:
  >S. Filippone, M. Colajanni. PSBLAS: A library for parallel linear
  >algebra computation on sparse matrices, ACM Trans. on Math. Software,
  >26(4), Dec. 2000, pp. 527-550.
 - The software infrastructure changes required to accommodate the implementation of the
  Additive-Schwarz preconditioners available in [AMG4PSBLAS](https://github.com/sfilippone/amg4psblas/) are detailed in:
  > A. Buttari, P. D'Ambra, D. di Serafino, S. Filippone, Extending PSBLAS to build parallel Schwarz preconditioners, Applied Parallel Computing. State of the Art in Scientific Computing: 7th International Workshop, PARA 2004, LNCS 3732, 2006, pp. 593-602.
  > A. Buttari,  P. D'Ambra, D. Di Serafino, S. Filippone, 2LEV-D2P4: A package of high-performance preconditioners for scientific and engineering applications, Applicable Algebra in Engineering, Communications and Computing, 2007, 18(3), pp. 223-239.
  > P. D'Ambra, D. Di Serafino, S. Filippone, MLD2P4: A package of parallel algebraic multilevel domain decomposition preconditioners in Fortran 95 ACM Transactions on Mathematical Software, 2010, 37(3), 30
 PSBLAS is the backbone of the Parallel Sparse Computation Toolkit ([PSCToolkit](https://psctoolkit.github.io/)) suite of libraries. See the paper:
 > D’Ambra, P., Durastante, F., & Filippone, S. (2023). Parallel Sparse Computation Toolkit. Software Impacts, 15, 100463.
 ### Other Software credits 
 OTHER SOFTWARE CREDITS 
 ----------------------
 We originally included a modified implementation of some of the Sparker
 (serial sparse BLAS)  material; this has been completely rewritten, way
 beyond the intention(s) and responsibilities of the original developers.
@ -40,18 +45,8 @@ The main reference for the serial sparse BLAS is:
 >linear algebra subprograms for sparse matrices: a user level interface,
 >ACM Trans. Math. Softw., 23(3), 379-401, 1997.
-CUDA and GPU support
+## Installing
--------------------
+
 This version of PSBLAS incorporates into a single package three
 entities that were previouslty separated:
 1. PSBLAS     -- the base library
 2. PSBLAS-EXT -- a library providing additional storage formats
 3. SPGPU      -- a package of kernels for NVIDIA GPUs originally
   	      	 written by Davide Barbieri and Salvatore Filippone;
 		 see the license file cuda/License-spgpu.md
 INSTALLING
 ----------
 To compile and run our software you will need the following
 prerequisites (see also SERIAL below):
@ -71,7 +66,8 @@ prerequisites (see also SERIAL below):
   We use the C interface to AMD.
 5. If you have CUDA available, use
-   --with-cuda=<path>      to specify the CUDA toolkit location
+   --enable-cuda           to compile CUDA-enabled methods
   --with-cudadir=<path>   to specify the CUDA toolkit location
   --with-cudacc=XX,YY,ZZ  to specify a list of target CCs (compute
   			   capabilities) to compile the CUDA code for.
@ -79,21 +75,27 @@ The configure script will generate a Make.inc file suitable for building
 the library. The script is capable of recognizing the needed libraries
 with their default names; if they are in unusual places consider adding
 the paths with `--with-libs`, or explicitly specifying the names in
-`--with-blas`, etc. Please note that a common way for the configure script
+`--with-blas`, etc. 
-to fail is to specify inconsistent MPI vs. plain compilers, either
+
-directly or indirectly via environment variables; e.g. specifying the
+>[!CAUTION]
-Intel compiler with `FC=ifort` while at the same time having an 
+> Please note that a common way for the configure script
-`MPIFC=mpif90` which points to GNU Fortran. The best way to avoid this
+> to fail is to specify inconsistent MPI vs. plain compilers, either
-situation is (in our opinion) to use the environment modules package
+> directly or indirectly via environment variables; e.g. specifying the
-(see http://modules.sourceforge.net/), and load the relevant
+> Intel compiler with `FC=ifort` while at the same time having an 
-variables with (e.g.) 
+> `MPIFC=mpif90` which points to GNU Fortran. 
-```
+
-module load gnu46 openmpi
+>[!TIP]
-```
+> The best way to avoid this
-This will delegate to the modules setup to make sure that the version of
+> situation is (in our opinion) to use the environment modules package
-openmpi in use is the one compiled with the gnu46 compilers. After the
+> (see [http://modules.sourceforge.net/](http://modules.sourceforge.net/)), and load the relevant
-configure script has completed you can always tweak the Make.inc file
+> variables with (e.g.) 
-yourself. 
+> ```
 > module load gcc/13.2.0 openmpi/4.1.6
 > ```
 > This will delegate to the modules setup to make sure that the version of
 > openmpi in use is the one compiled with the gnu46 compilers. After the
 > configure script has completed you can always tweak the Make.inc file
 > yourself.
 After you have Make.inc fixed,  run 
 ```
@ -105,58 +107,117 @@ install and the libraries will be installed under `/path/lib`, while the
 module files will be installed under `/path/modules`. The regular and
 experimental C interface header files are under `/path/include`.
-SERIAL
+### CUDA and GPU support
------
+
 This version of PSBLAS incorporates into a single package three
 entities that were previouslty separated:
 | Library |                    |
 |---------|--------------------|
 | PSBLAS  | the base library   |
 | PSBLAS-EXT | a library providing additional storage formats for matrices and vectors |
 | SPGPU      | a package of kernels for NVIDIA GPUs originally written by Davide Barbieri and Salvatore Filippone; see the license file [cuda/License-spgpu.md](cuda/License-spgpu.md) |
 ### OpenACC
 There is a highly experimental version of an OpenACC interface,
 you can access it by speficifying
 ```bash
 --enable-openacc  --with-extraopenacc="-foffload=nvptx-none=-march=sm_70"
 ```
 where the argument to the extraopenacc option depends on the compiler
 you are using (the example shown here is relevant for the GNU
 compiler). 
 ### Serial
 Configuring with `--enable-serial` will provide a fake MPI stub library
 that enables running in pure serial mode; no MPI installation is needed
 in this case (but note that the fake MPI stubs are only guaranteed to
 cover what we use internally, it's not a complete replacement). 
-INTEGER SIZES
+### Integers
-------------
+
 We have two kind of integers: IPK for local indices, and LPK for
 global indices. They can be specified independently at configure time,
 e.g.
 ```bash
 --with-ipk=4 --with-lpk=8
 ```
 which is asking for 4-bytes local indices, and 8-bytes global indices
 (this is the default). 
-TODO
+## Documentation
 ----
 Fix all reamining bugs. Bugs? We dont' have any ! ;-)
 Further information on installation and configuration can be found in the documentation.
 See [docs/psblas-3.9.pdf](docs/psblas-3.9.pdf); an HTML version of the same document is
 available in docs/html. Please consult the sample programs, especially
 - [test/pargen/psb_s_pde2d.F90](test/pargen/psb_s_pde2d.F90) [test/pargen/psb_d_pde2d.F90](test/pargen/psb_d_pde2d.F90)
 - [test/pargen/psb_s_pde2d.F90](test/pargen/psb_s_pde3d.F90) [test/pargen/psb_d_pde2d.F90](test/pargen/psb_d_pde3d.F90)
 which contain examples for the solution of linear systems obtained by the discretization of a generic second-order differential equation in two:
 ```math
 - a_1 \frac{\partial^2 u}{\partial x^2} 
 - a_2 \frac{\partial^2 u}{\partial y^2} 
 + b_1 \frac{\partial u}{\partial x} 
 + b_2 \frac{\partial u}{\partial y} 
 + c u = f
 ```
 or three
 ```math
 - a_1 \frac{\partial^2 u}{\partial x^2} 
 - a_2 \frac{\partial^2 u}{\partial y^2} 
 - a_3 \frac{\partial^2 u}{\partial z^2} 
 + b_1 \frac{\partial u}{\partial x} 
 + b_2 \frac{\partial u}{\partial y} 
 + b_3 \frac{\partial u}{\partial z} 
 + c u = f
 ```
 dimensions on the unit square/cube with Dirichlet boundary conditions.
-The PSBLAS team. 
+### Utilities
---------------
+
-Project lead:
+The [test/util](test/util) directory contains some utilities to convert to/from
-Salvatore Filippone
+Harwell-Boeing and MatrixMarket file formats.
-Contributors (roughly reverse cronological order):
+## TODO and bugs
-Dimitri    Walther
+- [ ] Improving OpenACC support
-Andea      Di Iorio
+- [ ] Improving OpenMP support
-Stefano    Petrilli
+- [X] Fix all reamining bugs. Bugs? We dont' have any ! 🤓
 Soren 	   Rasmussen
 Zaak       Beekman
 Ambra	   Abdullahi Hassan
 Pasqua	   D'Ambra
 Alfredo    Buttari
 Daniela    di Serafino
 Michele    Martone
 Michele    Colajanni
 Fabio      Cerioni
 Stefano    Maiolatesi
 Dario      Pascucci
 > [!NOTE]
 > To report bugs 🐛 or issues ❓ please use the [GitHub issue system](https://github.com/sfilippone/psblas3/issues).
-RELATED SOFTWARE
+
----------------
+## The PSBLAS team. 
 **Project lead:**
 Salvatore Filippone
 **Contributors** (_roughly reverse cronological order_):
 - Theophane  Loloum
 - Fabio      Durastante
 - Dimitri    Walther
 - Andea      Di Iorio
 - Stefano    Petrilli
 - Soren 	   Rasmussen
 - Zaak       Beekman
 - Ambra	   Abdullahi Hassan
 - Pasqua	   D'Ambra
 - Alfredo    Buttari
 - Daniela    di Serafino
 - Michele    Martone
 - Michele    Colajanni
 - Fabio      Cerioni
 - Stefano    Maiolatesi
 - Dario      Pascucci
 ## RELATED SOFTWARE
 If you are looking for more sophisticated preconditioners, you may be
 interested in the package AMG4PSBLAS from
-<http://github.com/sfilippone/amg4psblas>
+<http://github.com/sfilippone/amg4psblas> and the whole [PSCTooolkit suite](https://psctoolkit.github.io/).
 Contact: <https://github.com/sfilippone/psblas3>
--- a/base/modules/Makefile
+++ b/base/modules/Makefile
@ -10,13 +10,14 @@ BASIC_MODS= psb_const_mod.o psb_cbind_const_mod.o psb_error_mod.o psb_realloc_mo
 	auxil/psb_z_realloc_mod.o
 COMMINT= penv/psi_penv_mod.o \
-	 penv/psi_p2p_mod.o penv/psi_m_p2p_mod.o \
+	 penv/psi_p2p_mod.o penv/psi_m_p2p_mod.o penv/psi_i2_p2p_mod.o \
 	 penv/psi_e_p2p_mod.o \
 	 penv/psi_s_p2p_mod.o \
 	 penv/psi_d_p2p_mod.o \
 	 penv/psi_c_p2p_mod.o \
 	 penv/psi_z_p2p_mod.o \
 	 penv/psi_collective_mod.o \
 	 penv/psi_i2_collective_mod.o \
 	 penv/psi_e_collective_mod.o \
 	 penv/psi_m_collective_mod.o \
 	 penv/psi_s_collective_mod.o \
@ -149,23 +150,25 @@ psb_realloc_mod.o: auxil/psb_m_realloc_mod.o \
 	auxil/psb_c_realloc_mod.o \
 	auxil/psb_z_realloc_mod.o
-penv/psi_p2p_mod.o: penv/psi_m_p2p_mod.o \
+penv/psi_p2p_mod.o:  penv/psi_i2_p2p_mod.o \
-	 penv/psi_e_p2p_mod.o \
+	penv/psi_m_p2p_mod.o \
-	 penv/psi_s_p2p_mod.o \
+	penv/psi_e_p2p_mod.o \
-	 penv/psi_d_p2p_mod.o \
+	penv/psi_s_p2p_mod.o \
-	 penv/psi_c_p2p_mod.o \
+	penv/psi_d_p2p_mod.o \
-	 penv/psi_z_p2p_mod.o
+	penv/psi_c_p2p_mod.o \
-penv/psi_collective_mod.o: penv/psi_e_collective_mod.o \
+	penv/psi_z_p2p_mod.o
-	 penv/psi_m_collective_mod.o \
+penv/psi_collective_mod.o: penv/psi_i2_collective_mod.o \
-	 penv/psi_s_collective_mod.o \
+	penv/psi_e_collective_mod.o \
-	 penv/psi_d_collective_mod.o \
+	penv/psi_m_collective_mod.o \
-	 penv/psi_c_collective_mod.o \
+	penv/psi_s_collective_mod.o \
-	 penv/psi_z_collective_mod.o 
+	penv/psi_d_collective_mod.o \
-
+	penv/psi_c_collective_mod.o \
-penv/psi_m_p2p_mod.o penv/psi_e_p2p_mod.o  penv/psi_s_p2p_mod.o \
+	penv/psi_z_collective_mod.o 
 penv/psi_i2_p2p_mod.o penv/psi_m_p2p_mod.o penv/psi_e_p2p_mod.o  penv/psi_s_p2p_mod.o \
 penv/psi_d_p2p_mod.o penv/psi_c_p2p_mod.o  penv/psi_z_p2p_mod.o: penv/psi_penv_mod.o 
-penv/psi_e_collective_mod.o  penv/psi_m_collective_mod.o penv/psi_s_collective_mod.o \
+penv/psi_i2_collective_mod.o  penv/psi_e_collective_mod.o  penv/psi_m_collective_mod.o penv/psi_s_collective_mod.o \
 penv/psi_d_collective_mod.o  penv/psi_c_collective_mod.o penv/psi_z_collective_mod.o: penv/psi_penv_mod.o \
 	penv/psi_m_p2p_mod.o penv/psi_e_p2p_mod.o  penv/psi_s_p2p_mod.o \
 	penv/psi_d_p2p_mod.o penv/psi_c_p2p_mod.o  penv/psi_z_p2p_mod.o
--- a/base/modules/desc/psb_desc_const_mod.f90
+++ b/base/modules/desc/psb_desc_const_mod.f90
@ -35,7 +35,7 @@
 !    Auxiliary module for descriptor: constant values. 
 !
 module psb_desc_const_mod
-  use psb_const_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_
+  use psb_const_mod, only : psb_ipk_, psb_lpk_, psb_mpk_, psb_epk_, psb_i2pk_
  !
  !     Communication, prolongation & restriction
  !
--- a/base/modules/penv/psi_penv_mod.F90
+++ b/base/modules/penv/psi_penv_mod.F90
@ -167,6 +167,7 @@ module psi_penv_mod
 #else 
  integer(psb_mpk_), save :: mpi_i2amx_op, mpi_i2amn_op
  integer(psb_mpk_), save :: mpi_iamx_op, mpi_iamn_op
  integer(psb_mpk_), save :: mpi_mamx_op, mpi_mamn_op
  integer(psb_mpk_), save :: mpi_eamx_op, mpi_eamn_op
@ -181,6 +182,7 @@ module psi_penv_mod
 #endif
  private :: psi_get_sizes,  psi_register_mpi_extras
  private :: psi_i2amx_op, psi_i2amn_op
  private :: psi_iamx_op, psi_iamn_op 
  private :: psi_mamx_op, psi_mamn_op 
  private :: psi_eamx_op, psi_eamn_op 
@ -773,6 +775,8 @@ contains
 #if defined(SERIAL_MPI)
 #else 
    if (info == 0) call mpi_op_create(psi_i2amx_op,.true.,mpi_i2amx_op,info)
    if (info == 0) call mpi_op_create(psi_i2amn_op,.true.,mpi_i2amn_op,info)
    if (info == 0) call mpi_op_create(psi_mamx_op,.true.,mpi_mamx_op,info)
    if (info == 0) call mpi_op_create(psi_mamn_op,.true.,mpi_mamn_op,info)
    if (info == 0) call mpi_op_create(psi_eamx_op,.true.,mpi_eamx_op,info)
@ -997,6 +1001,8 @@ contains
           & call mpi_comm_Free(ctxt%ctxt,info)
    end if
    if (close_) then 
      if (info == 0) call mpi_op_free(mpi_i2amx_op,info)
      if (info == 0) call mpi_op_free(mpi_i2amn_op,info)
      if (info == 0) call mpi_op_free(mpi_mamx_op,info)
      if (info == 0) call mpi_op_free(mpi_mamn_op,info)
      if (info == 0) call mpi_op_free(mpi_eamx_op,info)
@ -1188,6 +1194,26 @@ contains
  ! Note: len & type are always default integer.
  !
  ! !!!!!!!!!!!!!!!!!!!!!!
  subroutine psi_i2amx_op(inv, outv,len,type) 
    integer(psb_i2pk_) :: inv(len), outv(len)
    integer(psb_mpk_) :: len,type
    integer(psb_mpk_) :: i
    do i=1, len
      if (abs(inv(i)) > abs(outv(i))) outv(i) = inv(i)
    end do
  end subroutine psi_i2amx_op
  subroutine psi_i2amn_op(inv, outv,len,type) 
    integer(psb_i2pk_) :: inv(len), outv(len)
    integer(psb_mpk_) :: len,type
    integer(psb_mpk_) :: i
    do i=1, len
      if (abs(inv(i)) < abs(outv(i))) outv(i) = inv(i)
    end do
  end subroutine psi_i2amn_op
  subroutine psi_mamx_op(inv, outv,len,type) 
    integer(psb_mpk_) :: inv(len), outv(len)
    integer(psb_mpk_) :: len,type
--- a/base/serial/impl/psb_c_coo_impl.F90
+++ b/base/serial/impl/psb_c_coo_impl.F90
@ -595,12 +595,13 @@ subroutine  psb_c_coo_clean_zeros(a, info)
  integer(psb_ipk_), intent(out) :: info
  !
  integer(psb_ipk_) :: i,j,k, nzin
-
+  
  info = 0
  nzin = a%get_nzeros()
  j = 0
  do i=1, nzin
-    if (a%val(i) /= czero) then
+    ! Always keep the diagonal, even if numerically zero
    if ((a%val(i) /= czero).or.(a%ia(i) == a%ja(i))) then
      j = j + 1
      a%val(j) = a%val(i)
      a%ia(j)  = a%ia(i)
@ -5926,12 +5927,13 @@ subroutine  psb_lc_coo_clean_zeros(a, info)
  integer(psb_ipk_), intent(out) :: info
  !
  integer(psb_lpk_) :: i,j,k, nzin
-
+  
  info = 0
  nzin = a%get_nzeros()
  j = 0
  do i=1, nzin
-    if (a%val(i) /= czero) then
+    ! Always keep the diagonal, even if numerically zero
    if ((a%val(i) /= czero).or.(a%ia(i) == a%ja(i))) then 
      j = j + 1
      a%val(j) = a%val(i)
      a%ia(j)  = a%ia(i)
--- a/base/serial/impl/psb_c_csc_impl.F90
+++ b/base/serial/impl/psb_c_csc_impl.F90
@ -2421,7 +2421,8 @@ subroutine  psb_c_csc_clean_zeros(a, info)
  j        = a%icp(1)
  do i=1, nc
    do k = ilcp(i), ilcp(i+1) -1
-      if (a%val(k) /= czero) then
+      ! Always keep the diagonal, even if numerically zero
      if ((a%val(k) /= czero).or.(i == a%ia(k))) then
        a%val(j) = a%val(k)
        a%ia(j)  = a%ia(k)
        j = j + 1
@ -4313,7 +4314,7 @@ subroutine  psb_lc_csc_clean_zeros(a, info)
  !
  integer(psb_lpk_) :: i, j, k, nc
  integer(psb_lpk_), allocatable :: ilcp(:)
-
+  
  info = 0
  call a%sync()
  nc   = a%get_ncols()
@ -4322,7 +4323,8 @@ subroutine  psb_lc_csc_clean_zeros(a, info)
  j        = a%icp(1)
  do i=1, nc
    do k = ilcp(i), ilcp(i+1) -1
-      if (a%val(k) /= czero) then
+      ! Always keep the diagonal, even if numerically zero
      if ((a%val(k) /= czero).or.(i == a%ia(k))) then
        a%val(j) = a%val(k)
        a%ia(j)  = a%ia(k)
        j = j + 1
--- a/base/serial/impl/psb_c_csr_impl.F90
+++ b/base/serial/impl/psb_c_csr_impl.F90
@ -3642,7 +3642,8 @@ subroutine  psb_c_csr_clean_zeros(a, info)
  j        = a%irp(1)
  do i=1, nr
    do k = ilrp(i), ilrp(i+1) -1
-      if (a%val(k) /= czero) then
+      ! Always keep the diagonal, even if numerically zero
      if ((a%val(k) /= czero).or.(i == a%ja(k))) then
        a%val(j) = a%val(k)
        a%ja(j)  = a%ja(k)
        j = j + 1
@ -6552,7 +6553,7 @@ subroutine  psb_lc_csr_clean_zeros(a, info)
  !
  integer(psb_lpk_) :: i, j, k, nr
  integer(psb_lpk_), allocatable :: ilrp(:)
-
+  
  info = 0
  call a%sync()
  nr   = a%get_nrows()
@ -6561,7 +6562,8 @@ subroutine  psb_lc_csr_clean_zeros(a, info)
  j        = a%irp(1)
  do i=1, nr
    do k = ilrp(i), ilrp(i+1) -1
-      if (a%val(k) /= czero) then
+      ! Always keep the diagonal, even if numerically zero
      if ((a%val(k) /= czero).or.(i == a%ja(k))) then 
        a%val(j) = a%val(k)
        a%ja(j)  = a%ja(k)
        j = j + 1
--- a/base/serial/impl/psb_d_coo_impl.F90
+++ b/base/serial/impl/psb_d_coo_impl.F90
@ -595,12 +595,13 @@ subroutine  psb_d_coo_clean_zeros(a, info)
  integer(psb_ipk_), intent(out) :: info
  !
  integer(psb_ipk_) :: i,j,k, nzin
-
+  
  info = 0
  nzin = a%get_nzeros()
  j = 0
  do i=1, nzin
-    if (a%val(i) /= dzero) then
+    ! Always keep the diagonal, even if numerically zero
    if ((a%val(i) /= dzero).or.(a%ia(i) == a%ja(i))) then
      j = j + 1
      a%val(j) = a%val(i)
      a%ia(j)  = a%ia(i)
@ -5926,12 +5927,13 @@ subroutine  psb_ld_coo_clean_zeros(a, info)
  integer(psb_ipk_), intent(out) :: info
  !
  integer(psb_lpk_) :: i,j,k, nzin
-
+  
  info = 0
  nzin = a%get_nzeros()
  j = 0
  do i=1, nzin
-    if (a%val(i) /= dzero) then
+    ! Always keep the diagonal, even if numerically zero
    if ((a%val(i) /= dzero).or.(a%ia(i) == a%ja(i))) then 
      j = j + 1
      a%val(j) = a%val(i)
      a%ia(j)  = a%ia(i)
--- a/base/serial/impl/psb_d_csc_impl.F90
+++ b/base/serial/impl/psb_d_csc_impl.F90
@ -2421,7 +2421,8 @@ subroutine  psb_d_csc_clean_zeros(a, info)
  j        = a%icp(1)
  do i=1, nc
    do k = ilcp(i), ilcp(i+1) -1
-      if (a%val(k) /= dzero) then
+      ! Always keep the diagonal, even if numerically zero
      if ((a%val(k) /= dzero).or.(i == a%ia(k))) then
        a%val(j) = a%val(k)
        a%ia(j)  = a%ia(k)
        j = j + 1
@ -4313,7 +4314,7 @@ subroutine  psb_ld_csc_clean_zeros(a, info)
  !
  integer(psb_lpk_) :: i, j, k, nc
  integer(psb_lpk_), allocatable :: ilcp(:)
-
+  
  info = 0
  call a%sync()
  nc   = a%get_ncols()
@ -4322,7 +4323,8 @@ subroutine  psb_ld_csc_clean_zeros(a, info)
  j        = a%icp(1)
  do i=1, nc
    do k = ilcp(i), ilcp(i+1) -1
-      if (a%val(k) /= dzero) then
+      ! Always keep the diagonal, even if numerically zero
      if ((a%val(k) /= dzero).or.(i == a%ia(k))) then
        a%val(j) = a%val(k)
        a%ia(j)  = a%ia(k)
        j = j + 1
--- a/base/serial/impl/psb_d_csr_impl.F90
+++ b/base/serial/impl/psb_d_csr_impl.F90
@ -3642,7 +3642,8 @@ subroutine  psb_d_csr_clean_zeros(a, info)
  j        = a%irp(1)
  do i=1, nr
    do k = ilrp(i), ilrp(i+1) -1
-      if (a%val(k) /= dzero) then
+      ! Always keep the diagonal, even if numerically zero
      if ((a%val(k) /= dzero).or.(i == a%ja(k))) then
        a%val(j) = a%val(k)
        a%ja(j)  = a%ja(k)
        j = j + 1
@ -6552,7 +6553,7 @@ subroutine  psb_ld_csr_clean_zeros(a, info)
  !
  integer(psb_lpk_) :: i, j, k, nr
  integer(psb_lpk_), allocatable :: ilrp(:)
-
+  
  info = 0
  call a%sync()
  nr   = a%get_nrows()
@ -6561,7 +6562,8 @@ subroutine  psb_ld_csr_clean_zeros(a, info)
  j        = a%irp(1)
  do i=1, nr
    do k = ilrp(i), ilrp(i+1) -1
-      if (a%val(k) /= dzero) then
+      ! Always keep the diagonal, even if numerically zero
      if ((a%val(k) /= dzero).or.(i == a%ja(k))) then 
        a%val(j) = a%val(k)
        a%ja(j)  = a%ja(k)
        j = j + 1
--- a/base/serial/impl/psb_s_coo_impl.F90
+++ b/base/serial/impl/psb_s_coo_impl.F90
@ -595,12 +595,13 @@ subroutine  psb_s_coo_clean_zeros(a, info)
  integer(psb_ipk_), intent(out) :: info
  !
  integer(psb_ipk_) :: i,j,k, nzin
-
+  
  info = 0
  nzin = a%get_nzeros()
  j = 0
  do i=1, nzin
-    if (a%val(i) /= szero) then
+    ! Always keep the diagonal, even if numerically zero
    if ((a%val(i) /= szero).or.(a%ia(i) == a%ja(i))) then
      j = j + 1
      a%val(j) = a%val(i)
      a%ia(j)  = a%ia(i)
@ -5926,12 +5927,13 @@ subroutine  psb_ls_coo_clean_zeros(a, info)
  integer(psb_ipk_), intent(out) :: info
  !
  integer(psb_lpk_) :: i,j,k, nzin
-
+  
  info = 0
  nzin = a%get_nzeros()
  j = 0
  do i=1, nzin
-    if (a%val(i) /= szero) then
+    ! Always keep the diagonal, even if numerically zero
    if ((a%val(i) /= szero).or.(a%ia(i) == a%ja(i))) then 
      j = j + 1
      a%val(j) = a%val(i)
      a%ia(j)  = a%ia(i)
--- a/base/serial/impl/psb_s_csc_impl.F90
+++ b/base/serial/impl/psb_s_csc_impl.F90
@ -2421,7 +2421,8 @@ subroutine  psb_s_csc_clean_zeros(a, info)
  j        = a%icp(1)
  do i=1, nc
    do k = ilcp(i), ilcp(i+1) -1
-      if (a%val(k) /= szero) then
+      ! Always keep the diagonal, even if numerically zero
      if ((a%val(k) /= szero).or.(i == a%ia(k))) then
        a%val(j) = a%val(k)
        a%ia(j)  = a%ia(k)
        j = j + 1
@ -4313,7 +4314,7 @@ subroutine  psb_ls_csc_clean_zeros(a, info)
  !
  integer(psb_lpk_) :: i, j, k, nc
  integer(psb_lpk_), allocatable :: ilcp(:)
-
+  
  info = 0
  call a%sync()
  nc   = a%get_ncols()
@ -4322,7 +4323,8 @@ subroutine  psb_ls_csc_clean_zeros(a, info)
  j        = a%icp(1)
  do i=1, nc
    do k = ilcp(i), ilcp(i+1) -1
-      if (a%val(k) /= szero) then
+      ! Always keep the diagonal, even if numerically zero
      if ((a%val(k) /= szero).or.(i == a%ia(k))) then
        a%val(j) = a%val(k)
        a%ia(j)  = a%ia(k)
        j = j + 1
--- a/base/serial/impl/psb_s_csr_impl.F90
+++ b/base/serial/impl/psb_s_csr_impl.F90
@ -3642,7 +3642,8 @@ subroutine  psb_s_csr_clean_zeros(a, info)
  j        = a%irp(1)
  do i=1, nr
    do k = ilrp(i), ilrp(i+1) -1
-      if (a%val(k) /= szero) then
+      ! Always keep the diagonal, even if numerically zero
      if ((a%val(k) /= szero).or.(i == a%ja(k))) then
        a%val(j) = a%val(k)
        a%ja(j)  = a%ja(k)
        j = j + 1
@ -6552,7 +6553,7 @@ subroutine  psb_ls_csr_clean_zeros(a, info)
  !
  integer(psb_lpk_) :: i, j, k, nr
  integer(psb_lpk_), allocatable :: ilrp(:)
-
+  
  info = 0
  call a%sync()
  nr   = a%get_nrows()
@ -6561,7 +6562,8 @@ subroutine  psb_ls_csr_clean_zeros(a, info)
  j        = a%irp(1)
  do i=1, nr
    do k = ilrp(i), ilrp(i+1) -1
-      if (a%val(k) /= szero) then
+      ! Always keep the diagonal, even if numerically zero
      if ((a%val(k) /= szero).or.(i == a%ja(k))) then 
        a%val(j) = a%val(k)
        a%ja(j)  = a%ja(k)
        j = j + 1
--- a/base/serial/impl/psb_z_coo_impl.F90
+++ b/base/serial/impl/psb_z_coo_impl.F90
@ -595,12 +595,13 @@ subroutine  psb_z_coo_clean_zeros(a, info)
  integer(psb_ipk_), intent(out) :: info
  !
  integer(psb_ipk_) :: i,j,k, nzin
-
+  
  info = 0
  nzin = a%get_nzeros()
  j = 0
  do i=1, nzin
-    if (a%val(i) /= zzero) then
+    ! Always keep the diagonal, even if numerically zero
    if ((a%val(i) /= zzero).or.(a%ia(i) == a%ja(i))) then
      j = j + 1
      a%val(j) = a%val(i)
      a%ia(j)  = a%ia(i)
@ -5926,12 +5927,13 @@ subroutine  psb_lz_coo_clean_zeros(a, info)
  integer(psb_ipk_), intent(out) :: info
  !
  integer(psb_lpk_) :: i,j,k, nzin
-
+  
  info = 0
  nzin = a%get_nzeros()
  j = 0
  do i=1, nzin
-    if (a%val(i) /= zzero) then
+    ! Always keep the diagonal, even if numerically zero
    if ((a%val(i) /= zzero).or.(a%ia(i) == a%ja(i))) then 
      j = j + 1
      a%val(j) = a%val(i)
      a%ia(j)  = a%ia(i)
--- a/base/serial/impl/psb_z_csc_impl.F90
+++ b/base/serial/impl/psb_z_csc_impl.F90
@ -2421,7 +2421,8 @@ subroutine  psb_z_csc_clean_zeros(a, info)
  j        = a%icp(1)
  do i=1, nc
    do k = ilcp(i), ilcp(i+1) -1
-      if (a%val(k) /= zzero) then
+      ! Always keep the diagonal, even if numerically zero
      if ((a%val(k) /= zzero).or.(i == a%ia(k))) then
        a%val(j) = a%val(k)
        a%ia(j)  = a%ia(k)
        j = j + 1
@ -4313,7 +4314,7 @@ subroutine  psb_lz_csc_clean_zeros(a, info)
  !
  integer(psb_lpk_) :: i, j, k, nc
  integer(psb_lpk_), allocatable :: ilcp(:)
-
+  
  info = 0
  call a%sync()
  nc   = a%get_ncols()
@ -4322,7 +4323,8 @@ subroutine  psb_lz_csc_clean_zeros(a, info)
  j        = a%icp(1)
  do i=1, nc
    do k = ilcp(i), ilcp(i+1) -1
-      if (a%val(k) /= zzero) then
+      ! Always keep the diagonal, even if numerically zero
      if ((a%val(k) /= zzero).or.(i == a%ia(k))) then
        a%val(j) = a%val(k)
        a%ia(j)  = a%ia(k)
        j = j + 1
--- a/base/serial/impl/psb_z_csr_impl.F90
+++ b/base/serial/impl/psb_z_csr_impl.F90
@ -3642,7 +3642,8 @@ subroutine  psb_z_csr_clean_zeros(a, info)
  j        = a%irp(1)
  do i=1, nr
    do k = ilrp(i), ilrp(i+1) -1
-      if (a%val(k) /= zzero) then
+      ! Always keep the diagonal, even if numerically zero
      if ((a%val(k) /= zzero).or.(i == a%ja(k))) then
        a%val(j) = a%val(k)
        a%ja(j)  = a%ja(k)
        j = j + 1
@ -6552,7 +6553,7 @@ subroutine  psb_lz_csr_clean_zeros(a, info)
  !
  integer(psb_lpk_) :: i, j, k, nr
  integer(psb_lpk_), allocatable :: ilrp(:)
-
+  
  info = 0
  call a%sync()
  nr   = a%get_nrows()
@ -6561,7 +6562,8 @@ subroutine  psb_lz_csr_clean_zeros(a, info)
  j        = a%irp(1)
  do i=1, nr
    do k = ilrp(i), ilrp(i+1) -1
-      if (a%val(k) /= zzero) then
+      ! Always keep the diagonal, even if numerically zero
      if ((a%val(k) /= zzero).or.(i == a%ja(k))) then 
        a%val(j) = a%val(k)
        a%ja(j)  = a%ja(k)
        j = j + 1
--- a/base/serial/psb_cgeprt.f90
+++ b/base/serial/psb_cgeprt.f90
@ -29,7 +29,7 @@
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
 !    
-! File:  psb_scsprt.f90 
+! File:  psb_geprt.f90 
 ! Subroutine: 
 ! Arguments:
@ -131,6 +131,7 @@ subroutine psb_cgeprt2(iout,a,head)
  ncol = size(a,2) 
  write(iout,*) nrow,ncol
  write(frmtv,'(a,i3.3,a)') '(',ncol,'2(es26.18,1x))'
  do i=1,nrow
@ -161,7 +162,7 @@ subroutine psb_cgeprt1(iout,a,head)
  write(iout,'(a)') '% '
  nrow = size(a,1) 
  ncol = 1
-  write(iout,*) nrow
+  write(iout,*) nrow,ncol
  write(frmtv,'(a,i3.3,a)') '(',ncol,'2(es26.18,1x))'
--- a/base/serial/psb_dgeprt.f90
+++ b/base/serial/psb_dgeprt.f90
@ -29,7 +29,7 @@
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
 !    
-! File:  psb_dcsprt.f90 
+! File:  psb_geprt.f90 
 ! Subroutine: 
 ! Arguments:
@ -124,13 +124,14 @@ subroutine psb_dgeprt2(iout,a,head)
  character(len=80)              :: frmtv 
  integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nrow, ncol
-  write(iout,'(a)') '%%MatrixMarket matrix array real general'
+  write(iout,'(a)') '%%MatrixMarket matrix array complex general'
  write(iout,'(a)') '% '//trim(head)
  write(iout,'(a)') '% '
  nrow = size(a,1) 
  ncol = size(a,2) 
  write(iout,*) nrow,ncol
  write(frmtv,'(a,i3.3,a)') '(',ncol,'(es26.18,1x))'
  do i=1,nrow
@ -156,12 +157,12 @@ subroutine psb_dgeprt1(iout,a,head)
  character(len=80)              :: frmtv 
  integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nrow, ncol
-  write(iout,'(a)') '%%MatrixMarket matrix array real general'
+  write(iout,'(a)') '%%MatrixMarket matrix array complex general'
  write(iout,'(a)') '% '//trim(head)
  write(iout,'(a)') '% '
  nrow = size(a,1) 
  ncol = 1
-  write(iout,*) nrow
+  write(iout,*) nrow,ncol
  write(frmtv,'(a,i3.3,a)') '(',ncol,'(es26.18,1x))'
--- a/base/serial/psb_sgeprt.f90
+++ b/base/serial/psb_sgeprt.f90
@ -29,7 +29,7 @@
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
 !    
-! File:  psb_scsprt.f90 
+! File:  psb_geprt.f90 
 ! Subroutine: 
 ! Arguments:
@ -124,13 +124,14 @@ subroutine psb_sgeprt2(iout,a,head)
  character(len=80)              :: frmtv 
  integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nrow, ncol
-  write(iout,'(a)') '%%MatrixMarket matrix array real general'
+  write(iout,'(a)') '%%MatrixMarket matrix array complex general'
  write(iout,'(a)') '% '//trim(head)
  write(iout,'(a)') '% '
  nrow = size(a,1) 
  ncol = size(a,2) 
  write(iout,*) nrow,ncol
  write(frmtv,'(a,i3.3,a)') '(',ncol,'(es26.18,1x))'
  do i=1,nrow
@ -156,12 +157,12 @@ subroutine psb_sgeprt1(iout,a,head)
  character(len=80)              :: frmtv 
  integer(psb_ipk_) :: irs,ics,i,j, nmx, ni, nrow, ncol
-  write(iout,'(a)') '%%MatrixMarket matrix array real general'
+  write(iout,'(a)') '%%MatrixMarket matrix array complex general'
  write(iout,'(a)') '% '//trim(head)
  write(iout,'(a)') '% '
  nrow = size(a,1) 
  ncol = 1
-  write(iout,*) nrow
+  write(iout,*) nrow,ncol
  write(frmtv,'(a,i3.3,a)') '(',ncol,'(es26.18,1x))'
--- a/base/serial/psb_zgeprt.f90
+++ b/base/serial/psb_zgeprt.f90
@ -29,7 +29,7 @@
 !    POSSIBILITY OF SUCH DAMAGE.
 !   
 !    
-! File:  psb_scsprt.f90 
+! File:  psb_geprt.f90 
 ! Subroutine: 
 ! Arguments:
@ -131,6 +131,7 @@ subroutine psb_zgeprt2(iout,a,head)
  ncol = size(a,2) 
  write(iout,*) nrow,ncol
  write(frmtv,'(a,i3.3,a)') '(',ncol,'2(es26.18,1x))'
  do i=1,nrow
@ -161,7 +162,7 @@ subroutine psb_zgeprt1(iout,a,head)
  write(iout,'(a)') '% '
  nrow = size(a,1) 
  ncol = 1
-  write(iout,*) nrow
+  write(iout,*) nrow,ncol
  write(frmtv,'(a,i3.3,a)') '(',ncol,'2(es26.18,1x))'
--- a/cbind/krylov/psb_ckrylov_cbind_mod.f90
+++ b/cbind/krylov/psb_ckrylov_cbind_mod.f90
@ -8,7 +8,7 @@ contains
       & ah,ph,bh,xh,cdh,options) bind(c) result(res)
    use psb_base_mod
    use psb_prec_mod
-    use psb_krylov_mod
+    use psb_linsolve_mod
    use psb_objhandle_mod
    use psb_prec_cbind_mod
    use psb_base_string_cbind_mod
@ -33,7 +33,7 @@ contains
       & ah,ph,bh,xh,eps,cdh,itmax,iter,err,itrace,irst,istop) bind(c) result(res)
    use psb_base_mod
    use psb_prec_mod
-    use psb_krylov_mod
+    use psb_linsolve_mod
    use psb_objhandle_mod
    use psb_prec_cbind_mod
    use psb_base_string_cbind_mod
--- a/cbind/krylov/psb_dkrylov_cbind_mod.f90
+++ b/cbind/krylov/psb_dkrylov_cbind_mod.f90
@ -8,7 +8,7 @@ contains
       & ah,ph,bh,xh,cdh,options) bind(c) result(res)
    use psb_base_mod
    use psb_prec_mod
-    use psb_krylov_mod
+    use psb_linsolve_mod
    use psb_objhandle_mod
    use psb_prec_cbind_mod
    use psb_base_string_cbind_mod
@ -33,7 +33,7 @@ contains
       & ah,ph,bh,xh,eps,cdh,itmax,iter,err,itrace,irst,istop) bind(c) result(res)
    use psb_base_mod
    use psb_prec_mod
-    use psb_krylov_mod
+    use psb_linsolve_mod
    use psb_objhandle_mod
    use psb_prec_cbind_mod
    use psb_base_string_cbind_mod
--- a/cbind/krylov/psb_skrylov_cbind_mod.f90
+++ b/cbind/krylov/psb_skrylov_cbind_mod.f90
@ -8,7 +8,7 @@ contains
       & ah,ph,bh,xh,cdh,options) bind(c) result(res)
    use psb_base_mod
    use psb_prec_mod
-    use psb_krylov_mod
+    use psb_linsolve_mod
    use psb_objhandle_mod
    use psb_prec_cbind_mod
    use psb_base_string_cbind_mod
@ -33,7 +33,7 @@ contains
       & ah,ph,bh,xh,eps,cdh,itmax,iter,err,itrace,irst,istop) bind(c) result(res)
    use psb_base_mod
    use psb_prec_mod
-    use psb_krylov_mod
+    use psb_linsolve_mod
    use psb_objhandle_mod
    use psb_prec_cbind_mod
    use psb_base_string_cbind_mod
--- a/cbind/krylov/psb_zkrylov_cbind_mod.f90
+++ b/cbind/krylov/psb_zkrylov_cbind_mod.f90
@ -8,7 +8,7 @@ contains
       & ah,ph,bh,xh,cdh,options) bind(c) result(res)
    use psb_base_mod
    use psb_prec_mod
-    use psb_krylov_mod
+    use psb_linsolve_mod
    use psb_objhandle_mod
    use psb_prec_cbind_mod
    use psb_base_string_cbind_mod
@ -33,7 +33,7 @@ contains
       & ah,ph,bh,xh,eps,cdh,itmax,iter,err,itrace,irst,istop) bind(c) result(res)
    use psb_base_mod
    use psb_prec_mod
-    use psb_krylov_mod
+    use psb_linsolve_mod
    use psb_objhandle_mod
    use psb_prec_cbind_mod
    use psb_base_string_cbind_mod
--- a/cbind/test/pargen/Makefile
+++ b/cbind/test/pargen/Makefile
@ -9,7 +9,7 @@ FINCLUDES=$(FMFLAG). $(FMFLAG)$(HERE) $(FMFLAG)$(MODDIR)
 CINCLUDES=-I. -I$(HERE) -I$(INCLUDEDIR)
 PSBC_LIBS= -L$(LIBDIR) -lpsb_cbind
-PSB_LIBS=-lpsb_util -lpsb_krylov -lpsb_prec -lpsb_base -L$(LIBDIR)
+PSB_LIBS=-lpsb_util -lpsb_linsolve -lpsb_prec -lpsb_base -L$(LIBDIR)
 #
 # Compilers and such
--- a/config/ax_c_openacc.m4
+++ b/config/ax_c_openacc.m4
@ -0,0 +1,104 @@
 # AC_OPENACC
 # ---------
 # Check which options need to be passed to the C compiler to support Openacc.
 # Set the OPENACC_CFLAGS / OPENACC_CXXFLAGS / OPENACC_FFLAGS variable to these
 # options.
 # The options are necessary at compile time (so the #pragmas are understood)
 # and at link time (so the appropriate library is linked with).
 # This macro takes care to not produce redundant options if $CC $CFLAGS already
 # supports Openacc.
 #
 # For each candidate option, we do a compile test first, then a link test;
 # if the compile test succeeds but the link test fails, that means we have
 # found the correct option but it doesn't work because the libraries are
 # broken.  (This can happen, for instance, with SunPRO C and a bad combination
 # of operating system patches.)
 #
 # Several of the options in our candidate list can be misinterpreted by
 # compilers that don't use them to activate Openacc support; for example,
 # many compilers understand "-openacc" to mean "write output to a file
 # named 'penmp'" rather than "enable Openacc".  We can't completely avoid
 # the possibility of clobbering files named 'penmp' or 'mp' in configure's
 # working directory; therefore, this macro will bomb out if any such file
 # already exists when it's invoked.
 AC_DEFUN([AX_C_OPENACC],
 [AC_REQUIRE([_AX_OPENACC_SAFE_WD])]dnl
 [AC_ARG_ENABLE([openacc],
   [AS_HELP_STRING([--disable-openacc], [do not use Openacc])])]dnl
 [
  OPENACC_[]_AC_LANG_PREFIX[]FLAGS=
  if test "$enable_openacc" != no; then
      AC_LANG_PUSH([C])
      AC_CACHE_CHECK([for $[]_AC_CC[] option to support Openacc],
      [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc],
      [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='not found'
      dnl Try these flags:
      dnl   (on by default)      ''
      dnl   GCC >= 4.2           -fopenacc
      dnl   SunPRO C             -xopenacc
      dnl   Intel C              -openacc
      dnl   SGI C, PGI C         -mp
      dnl   Tru64 Compaq C       -omp
      dnl   IBM XL C (AIX, Linux) -qsmp=omp
      dnl   Cray CCE             -homp
      dnl   NEC SX               -Popenacc
      dnl   Lahey Fortran (Linux)  --openacc
      for ac_option in '' -fopenacc -openacc -acc; do
        ac_save_[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS
        _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $ac_option"
        AC_COMPILE_IFELSE([
 #ifndef _OPENACC
 #error "OpenACC not supported"
 #endif
 #include <openacc.h>
  int main (void) { acc_init (0); return 0;}
 ],
          [AC_LINK_IFELSE([
 #ifndef _OPENACC
 #error "OpenACC not supported"
 #endif
 #include <openacc.h>
 int main (void) { acc_init (0); return 0;}
 ],
            [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc=$ac_option],
            [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'])])
        _AC_LANG_PREFIX[]FLAGS=$ac_save_[]_AC_LANG_PREFIX[]FLAGS
        if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'not found'; then
          break
        fi
      done
      if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = 'not found'; then
        ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'
      elif test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = ''; then
        ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='none needed'
      fi
      dnl _AX_OPENACC_SAFE_WD checked that these files did not exist before we
      dnl started probing for Openacc support, so if they exist now, they were
      dnl created by the probe loop and it's safe to delete them.
      rm -f penmp mp])
    if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'unsupported' && \
       test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'none needed'; then
      OPENACC_[]_AC_LANG_PREFIX[]FLAGS="$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc"
    fi
   AC_LANG_POP([C])
  fi
 ])
 # _AC_OPENACC_SAFE_WD
 # ------------------
 # AC_REQUIREd by AC_OPENACC.  Checks both at autoconf time and at
 # configure time for files that AC_OPENACC clobbers.
 AC_DEFUN([_AX_OPENACC_SAFE_WD],
 [m4_syscmd([test ! -e penmp && test ! -e mp])]dnl
 [m4_if(sysval, [0], [], [m4_fatal(m4_normalize(
  [AX_OPENACC clobbers files named 'mp' and 'penmp'.
   To use AX_OPENACC you must not have either of these files
   at the top level of your source tree.]))])]dnl
 [if test -e penmp || test -e mp; then
  AC_MSG_ERROR(m4_normalize(
    [AX@&t@_OPENACC clobbers files named 'mp' and 'penmp'.
     Aborting configure because one of these files already exists.]))
 fi])
--- a/config/ax_cxx_openacc.m4
+++ b/config/ax_cxx_openacc.m4
@ -0,0 +1,104 @@
 # AC_OPENACC
 # ---------
 # Check which options need to be passed to the C compiler to support Openacc.
 # Set the OPENACC_CFLAGS / OPENACC_CXXFLAGS / OPENACC_FFLAGS variable to these
 # options.
 # The options are necessary at compile time (so the #pragmas are understood)
 # and at link time (so the appropriate library is linked with).
 # This macro takes care to not produce redundant options if $CC $CFLAGS already
 # supports Openacc.
 #
 # For each candidate option, we do a compile test first, then a link test;
 # if the compile test succeeds but the link test fails, that means we have
 # found the correct option but it doesn't work because the libraries are
 # broken.  (This can happen, for instance, with SunPRO C and a bad combination
 # of operating system patches.)
 #
 # Several of the options in our candidate list can be misinterpreted by
 # compilers that don't use them to activate Openacc support; for example,
 # many compilers understand "-openacc" to mean "write output to a file
 # named 'penmp'" rather than "enable Openacc".  We can't completely avoid
 # the possibility of clobbering files named 'penmp' or 'mp' in configure's
 # working directory; therefore, this macro will bomb out if any such file
 # already exists when it's invoked.
 AC_DEFUN([AX_CXX_OPENACC],
 [AC_REQUIRE([_AX_OPENACC_SAFE_WD])]dnl
 [AC_ARG_ENABLE([openacc],
   [AS_HELP_STRING([--disable-openacc], [do not use Openacc])])]dnl
 [
  OPENACC_[]_AC_LANG_PREFIX[]FLAGS=
  if test "$enable_openacc" != no; then
      AC_LANG_PUSH([C++])
      AC_CACHE_CHECK([for $[]_AC_CC[] option to support Openacc],
      [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc],
      [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='not found'
      dnl Try these flags:
      dnl   (on by default)      ''
      dnl   GCC >= 4.2           -fopenacc
      dnl   SunPRO C             -xopenacc
      dnl   Intel C              -openacc
      dnl   SGI C, PGI C         -mp
      dnl   Tru64 Compaq C       -omp
      dnl   IBM XL C (AIX, Linux) -qsmp=omp
      dnl   Cray CCE             -homp
      dnl   NEC SX               -Popenacc
      dnl   Lahey Fortran (Linux)  --openacc
      for ac_option in '' -fopenacc -openacc -acc; do
        ac_save_[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS
        _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $ac_option"
        AC_COMPILE_IFELSE([
 #ifndef _OPENACC
 #error "OpenACC not supported"
 #endif
 #include <openacc.h>
 int main (void) { acc_init (acc_get_device_type()); return 0;}
 ],
          [AC_LINK_IFELSE([
 #ifndef _OPENACC
 #error "OpenACC not supported"
 #endif
 #include <openacc.h>
 int main (void) { acc_init (acc_get_device_type()); return 0;}
 ],
            [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc=$ac_option],
            [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'])])
        _AC_LANG_PREFIX[]FLAGS=$ac_save_[]_AC_LANG_PREFIX[]FLAGS
        if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'not found'; then
          break
        fi
      done
      if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = 'not found'; then
        ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'
      elif test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = ''; then
        ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='none needed'
      fi
      dnl _AX_OPENACC_SAFE_WD checked that these files did not exist before we
      dnl started probing for Openacc support, so if they exist now, they were
      dnl created by the probe loop and it's safe to delete them.
      rm -f penmp mp])
    if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'unsupported' && \
       test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'none needed'; then
      OPENACC_[]_AC_LANG_PREFIX[]FLAGS="$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc"
    fi
   AC_LANG_POP([C++])
  fi
 ])
 dnl _AC_OPENACC_SAFE_WD
 dnl ------------------
 dnl AC_REQUIREd by AC_OPENACC.  Checks both at autoconf time and at
 dnl configure time for files that AC_OPENACC clobbers.
 dnl AC_DEFUN([_AX_OPENACC_SAFE_WD],
 dnl [m4_syscmd([test ! -e penmp && test ! -e mp])]dnl
 dnl [m4_if(sysval, [0], [], [m4_fatal(m4_normalize(
 dnl   [AX_OPENACC clobbers files named 'mp' and 'penmp'.
 dnl    To use AX_OPENACC you must not have either of these files
 dnl    at the top level of your source tree.]))])]dnl
 dnl [if test -e penmp || test -e mp; then
 dnl   AC_MSG_ERROR(m4_normalize(
 dnl     [AX@&t@_OPENACC clobbers files named 'mp' and 'penmp'.
 dnl      Aborting configure because one of these files already exists.]))
 dnl fi])
--- a/config/ax_fc_openacc.m4
+++ b/config/ax_fc_openacc.m4
@ -0,0 +1,108 @@
 # AC_OPENACC
 # ---------
 # Check which options need to be passed to the C compiler to support Openacc.
 # Set the OPENACC_CFLAGS / OPENACC_CXXFLAGS / OPENACC_FFLAGS variable to these
 # options.
 # The options are necessary at compile time (so the #pragmas are understood)
 # and at link time (so the appropriate library is linked with).
 # This macro takes care to not produce redundant options if $CC $CFLAGS already
 # supports Openacc.
 #
 # For each candidate option, we do a compile test first, then a link test;
 # if the compile test succeeds but the link test fails, that means we have
 # found the correct option but it doesn't work because the libraries are
 # broken.  (This can happen, for instance, with SunPRO C and a bad combination
 # of operating system patches.)
 #
 # Several of the options in our candidate list can be misinterpreted by
 # compilers that don't use them to activate Openacc support; for example,
 # many compilers understand "-openacc" to mean "write output to a file
 # named 'penmp'" rather than "enable Openacc".  We can't completely avoid
 # the possibility of clobbering files named 'penmp' or 'mp' in configure's
 # working directory; therefore, this macro will bomb out if any such file
 # already exists when it's invoked.
 AC_DEFUN([AX_FC_OPENACC],
 [AC_REQUIRE([_AX_OPENACC_SAFE_WD])]dnl
 [AC_ARG_ENABLE([openacc],
   [AS_HELP_STRING([--disable-openacc], [do not use Openacc])])]dnl
 [
  OPENACC_[]_AC_LANG_PREFIX[]FLAGS=
  if test "$enable_openacc" != no; then
      AC_LANG_PUSH([Fortran])
      AC_CACHE_CHECK([for $[]_AC_CC[] option to support Openacc],
      [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc],
      [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='not found'
      dnl Try these flags:
      dnl   (on by default)      ''
      dnl   GCC >= 4.2           -fopenacc
      dnl   SunPRO C             -xopenacc
      dnl   Intel C              -openacc
      dnl   SGI C, PGI C         -mp
      dnl   Tru64 Compaq C       -omp
      dnl   IBM XL C (AIX, Linux) -qsmp=omp
      dnl   Cray CCE             -homp
      dnl   NEC SX               -Popenacc
      dnl   Lahey Fortran (Linux)  --openacc
      for ac_option in '' -fopenacc -openacc -acc; do
        ac_save_[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS
        _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $ac_option"
        AC_COMPILE_IFELSE([
      program main
      use openacc
      implicit none
      integer tid, np
      tid = 42
      call acc_init(0)
      end
 ],
          [AC_LINK_IFELSE([
      program main
      use openacc
      implicit none
      integer tid, np
      tid = 42
      call acc_init(0)
      end
 ],
            [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc=$ac_option],
            [ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'])])
        _AC_LANG_PREFIX[]FLAGS=$ac_save_[]_AC_LANG_PREFIX[]FLAGS
        if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'unsupported'; then
          break
        fi
      done
      if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = 'not found'; then
        ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='unsupported'
      elif test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" = ''; then
        ac_cv_prog_[]_AC_LANG_ABBREV[]_openacc='none needed'
      fi
      dnl _AX_OPENACC_SAFE_WD checked that these files did not exist before we
      dnl started probing for Openacc support, so if they exist now, they were
      dnl created by the probe loop and it's safe to delete them.
      rm -f penmp mp])
    if test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'unsupported' && \
       test "$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc" != 'none needed'; then
      OPENACC_[]_AC_LANG_PREFIX[]FLAGS="$ax_cv_prog_[]_AC_LANG_ABBREV[]_openacc"
    fi
   AC_LANG_POP([Fortran])
  fi
 ])
 # _AC_OPENACC_SAFE_WD
 # ------------------
 # AC_REQUIREd by AC_OPENACC.  Checks both at autoconf time and at
 # configure time for files that AC_OPENACC clobbers.
 AC_DEFUN([_AX_OPENACC_SAFE_WD],
 [m4_syscmd([test ! -e penmp && test ! -e mp])]dnl
 [m4_if(sysval, [0], [], [m4_fatal(m4_normalize(
  [AX_OPENACC clobbers files named 'mp' and 'penmp'.
   To use AX_OPENACC you must not have either of these files
   at the top level of your source tree.]))])]dnl
 [if test -e penmp || test -e mp; then
  AC_MSG_ERROR(m4_normalize(
    [AX@&t@_OPENACC clobbers files named 'mp' and 'penmp'.
     Aborting configure because one of these files already exists.]))
 fi])
--- a/config/pac.m4
+++ b/config/pac.m4
@ -2157,6 +2157,31 @@ CPPFLAGS="$SAVE_CPPFLAGS"
 dnl @synopsis PAC_ARG_CUDA
 dnl
 dnl Test for --enable-cuda
 dnl 
 dnl 
 dnl
 dnl Example use:
 dnl
 dnl
 dnl @author Salvatore Filippone <salvatore.filippone@uniroma2.it>
 dnl
 AC_DEFUN([PAC_ARG_CUDA],
 [AC_MSG_CHECKING([whether we want cuda ])
 AC_ARG_ENABLE(cuda,
 AS_HELP_STRING([--enable-cuda], 
 [Specify whether to enable cuda. ]),
 [
 pac_cv_cuda="yes";
 ]
 dnl ,
 dnl [pac_cv_cuda="no";]
 	     )
 ]
 )
 dnl @synopsis PAC_CHECK_CUDA
 dnl
@ -2173,7 +2198,7 @@ dnl
 dnl @author Salvatore Filippone <salvatore.filippone@uniroma2.it>
 dnl
 AC_DEFUN(PAC_CHECK_CUDA,
-[AC_ARG_WITH(cuda, AC_HELP_STRING([--with-cuda=DIR], [Specify the CUDA install directory.]),
+[AC_ARG_WITH(cudadir, AC_HELP_STRING([--with-cudadir=DIR], [Specify the CUDA install directory.]),
        [pac_cv_cuda_dir=$withval],
        [pac_cv_cuda_dir=''])
@ -2234,56 +2259,6 @@ AC_HELP_STRING([--with-cudacc], [A comma-separated list of CCs to compile to, fo
 [pac_cv_cudacc=''])
 ])
 AC_DEFUN(PAC_ARG_WITH_LIBRSB,
 	 [SAVE_LIBS="$LIBS"
 	  SAVE_CPPFLAGS="$CPPFLAGS"
 	  AC_ARG_WITH(librsb,
 	  AC_HELP_STRING([--with-librsb], [The directory for LIBRSB, for example,
 	  --with-librsb=/opt/packages/librsb]),
 	  [pac_cv_librsb_dir=$withval],
 	  [pac_cv_librsb_dir=''])
 	  if test "x$pac_cv_librsb_dir" != "x"; then 
 	  LIBS="-L$pac_cv_librsb_dir $LIBS"
 	  RSB_INCLUDES="-I$pac_cv_librsb_dir"
 	  # CPPFLAGS="$GPU_INCLUDES $CUDA_INCLUDES $CPPFLAGS"
 	  RSB_LIBDIR="-L$pac_cv_librsb_dir"
 	  fi
 	  #AC_MSG_CHECKING([librsb dir $pac_cv_librsb_dir])
 	  AC_CHECK_HEADER([$pac_cv_librsb_dir/rsb.h],
 			  [pac_rsb_header_ok=yes],
 			  [pac_rsb_header_ok=no; RSB_INCLUDES=""])
 	  if test "x$pac_rsb_header_ok" == "xyes" ; then 
 	  RSB_LIBS="-lrsb $RSB_LIBDIR"
 	  # LIBS="$GPU_LIBS $CUDA_LIBS -lm $LIBS";
 	  # AC_MSG_CHECKING([for spgpuCreate in $GPU_LIBS])
 	  # AC_TRY_LINK_FUNC(spgpuCreate, 
 	  # 		   [pac_cv_have_spgpu=yes;pac_gpu_lib_ok=yes; ],
 	  # 		   [pac_cv_have_spgpu=no;pac_gpu_lib_ok=no; GPU_LIBS=""])
 	  # AC_MSG_RESULT($pac_gpu_lib_ok)
 	  # if test "x$pac_cv_have_spgpu" == "xyes" ; then 
 	  # AC_MSG_NOTICE([Have found SPGPU])
 	  RSBLIBNAME="librsb.a";
 	  LIBRSB_DIR="$pac_cv_librsb_dir";
 	  # SPGPU_DEFINES="-DHAVE_SPGPU";
 	  LIBRSB_INCDIR="$LIBRSB_DIR";
 	  LIBRSB_INCLUDES="-I$LIBRSB_INCDIR";
 	  LIBRSB_LIBS="-lrsb -L$LIBRSB_DIR";
 	  # CUDA_DIR="$pac_cv_cuda_dir";
 	  LIBRSB_DEFINES="-DHAVE_RSB";
 	  LRSB=-lpsb_rsb
 	  # CUDA_INCLUDES="-I$pac_cv_cuda_dir/include"
 	  # CUDA_LIBDIR="-L$pac_cv_cuda_dir/lib64 -L$pac_cv_cuda_dir/lib"
 	  FDEFINES="$LIBRSB_DEFINES $psblas_cv_define_prepend $FDEFINES";
 	  CDEFINES="$LIBRSB_DEFINES $CDEFINES";#CDEFINES="-DHAVE_SPGPU -DHAVE_CUDA $CDEFINES";
 	  fi
 #  fi
 LIBS="$SAVE_LIBS"
 CPPFLAGS="$SAVE_CPPFLAGS"
 ])
 dnl
 dnl @synopsis PAC_CHECK_CUDA_VERSION
 dnl
@ -2336,3 +2311,47 @@ CPPFLAGS="$SAVE_CPPFLAGS"
 ])dnl 
 dnl @synopsis PAC_ARG_OPENACC
 dnl
 dnl Test for --enable-openacc
 dnl 
 dnl 
 dnl
 dnl Example use:
 dnl
 dnl
 dnl @author Salvatore Filippone <salvatore.filippone@uniroma2.it>
 dnl
 AC_DEFUN([PAC_ARG_OPENACC],
 [AC_MSG_CHECKING([whether we want openacc ])
 AC_ARG_ENABLE(openacc,
 AS_HELP_STRING([--enable-openacc], 
 [Specify whether to enable openacc. ]),
 [
 pac_cv_openacc="yes";
 ]
 dnl ,
 dnl [pac_cv_openacc="no";]
 	     )
 if test x"$pac_cv_openacc" == x"yes" ; then
   AC_MSG_RESULT([yes.])
 #   AC_LANG_PUSH([Fortran])
 #   AC_OPENACC() 
 #   pac_cv_openacc_fcopt="$OPENACC_FCFLAGS";
 #   AC_LANG_POP()
 #   AC_LANG_PUSH([C])
 #   AC_OPENACC() 
 #   pac_cv_openacc_ccopt="$OPENACC_CFLAGS";
 #   AC_LANG_POP()
 #   AC_LANG_PUSH([C++])
 #   AC_OPENACC() 
 #   pac_cv_openacc_cxxopt="$OPENACC_CXXFLAGS";
 #   AC_LANG_POP()
 else
 pac_cv_openacc="no";
 AC_MSG_RESULT([no.])
 fi
 ]
 )
--- a/530
+++ b/530
@ -653,6 +653,12 @@ ac_subst_vars='am__EXEEXT_FALSE
 am__EXEEXT_TRUE
 LTLIBOBJS
 LIBOBJS
 LRSB
 LIBRSB_DEFINES
 LIBRSB_DIR
 LIBRSB_INCDIR
 LIBRSB_INCLUDES
 LIBRSB_LIBS
 LCUDA
 CUDALD
 CUDAD
@ -662,11 +668,19 @@ CUDA_SHORT_VERSION
 CUDA_VERSION
 CUDA_LIBS
 CUDA_INCLUDES
 CXXCUDEFINES
 CCUDEFINES
 FCUDEFINES
 CUDA_DEFINES
 CUDA_DIR
 EXTRALDLIBS
 SPGPU_LIBS
 SPGPU_FLAGS
 CXXOPENACC
 CCOPENACC
 FCOPENACC
 OACCLD
 OACCD
 METISINCFILE
 UTILLIBNAME
 METHDLIBNAME
@ -838,8 +852,14 @@ with_amd
 with_amddir
 with_amdincdir
 with_amdlibdir
-with_cuda
+enable_cuda
 with_cudadir
 with_cudacc
 enable_openacc
 with_extraopenacc
 with_ccopenacc
 with_cxxopenacc
 with_fcopenacc
 '
      ac_precious_vars='build_alias
 host_alias
@ -1490,6 +1510,9 @@ Optional Features:
  --disable-silent-rules  verbose build output (undo: "make V=0")
  --enable-openmp         Specify whether to enable openmp.
  --disable-openmp        do not use OpenMP
  --enable-cuda           Specify whether to enable cuda.
  --enable-openacc        Specify whether to enable openacc.
  --disable-openacc       do not use Openacc
 Optional Packages:
  --with-PACKAGE[=ARG]    use PACKAGE [ARG=yes]
@ -1532,9 +1555,17 @@ Optional Packages:
  --with-amddir=DIR       Specify the directory for AMD library and includes.
  --with-amdincdir=DIR    Specify the directory for AMD includes.
  --with-amdlibdir=DIR    Specify the directory for AMD library.
-  --with-cuda=DIR         Specify the CUDA install directory.
+  --with-cudadir=DIR      Specify the CUDA install directory.
  --with-cudacc           A comma-separated list of CCs to compile to, for
                          example, --with-cudacc=50,60,70,75
  --with-extraopenacc     additional [EXTRAOPENACC] flags to be added: will
                          prepend to [EXTRAOPENACC]
  --with-ccopenacc        additional [CCOPENACC] flags to be added: will
                          prepend to [CCOPENACC]
  --with-cxxopenacc       additional [CXXOPENACC] flags to be added: will
                          prepend to [CXXOPENACC]
  --with-fcopenacc        additional [FCOPENACC] flags to be added: will
                          prepend to [FCOPENACC]
 Some influential environment variables:
  FC          Fortran compiler command
@ -5825,7 +5856,7 @@ fi
 # we just gave the user the chance to append values to these variables
-###############################################################################
+
 if test -n "$ac_tool_prefix"; then
@ -7392,7 +7423,7 @@ fi
 ##############################################################################
 BASEMODNAME=psb_base_mod
 PRECMODNAME=psb_prec_mod
-METHDMODNAME=psb_krylov_mod
+METHDMODNAME=psb_linsolve_mod
 UTILMODNAME=psb_util_mod
 if test "X$psblas_cv_fc" == X"cray"
@ -7402,7 +7433,7 @@ then
   FIFLAG="-I"
   BASEMODNAME=PSB_BASE_MOD
   PRECMODNAME=PSB_PREC_MOD
-   METHDMODNAME=PSB_KRYLOV_MOD
+   METHDMODNAME=PSB_LINSOLVE_MOD
   UTILMODNAME=PSB_UTIL_MOD
 else
@ -10602,12 +10633,26 @@ if test "x$psblas_cv_have_amd" == "xyes" ; then
 fi
 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether we want cuda " >&5
 printf %s "checking whether we want cuda ... " >&6; }
 # Check whether --enable-cuda was given.
 if test ${enable_cuda+y}
 then :
  enableval=$enable_cuda;
 pac_cv_cuda="yes";
 fi
 if test "x$pac_cv_cuda" == "xyes"; then
-# Check whether --with-cuda was given.
+# Check whether --with-cudadir was given.
-if test ${with_cuda+y}
+if test ${with_cudadir+y}
 then :
-  withval=$with_cuda; pac_cv_cuda_dir=$withval
+  withval=$with_cudadir; pac_cv_cuda_dir=$withval
 else $as_nop
  pac_cv_cuda_dir=''
 fi
@ -10693,10 +10738,10 @@ SAVE_LIBS="$LIBS"
 SAVE_CPPFLAGS="$CPPFLAGS"
 if test "x$pac_cv_have_cuda" == "x"; then
-# Check whether --with-cuda was given.
+# Check whether --with-cudadir was given.
-if test ${with_cuda+y}
+if test ${with_cudadir+y}
 then :
-  withval=$with_cuda; pac_cv_cuda_dir=$withval
+  withval=$with_cudadir; pac_cv_cuda_dir=$withval
 else $as_nop
  pac_cv_cuda_dir=''
 fi
@ -10814,9 +10859,10 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
 LIBS="$SAVE_LIBS"
 CPPFLAGS="$SAVE_CPPFLAGS"
      HAVE_CUDA="yes";
   CUDA_VERSION="$pac_cv_cuda_version";
   CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000);
-      HAVE_CUDA="yes";
+   CUDA_DEFINES="-DHAVE_CUDA -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}";
   SPGPU_LIBS="-lspgpu";
   CUDAD=cudad;
   CUDALD=cudald;
@ -10837,7 +10883,7 @@ fi
   if test "x$pac_cv_cudacc" == "x"; then
-      pac_cv_cudacc="50,60,70,75";
+      pac_cv_cudacc="50,60,70,75,80,86";
      CUDA_CC="$pac_cv_cudacc";
   fi
   if (( $pac_cv_cuda_version >= 11070  ))
@ -10849,24 +10895,432 @@ fi
 	CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc";
   done
   if test "x$pac_cv_cuda_version" != "xunknown"; then
-     CUDEFINES="$CUDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}"
+     CUDEFINES="$CUDEFINES ${CUDA_DEFINES}"
-     FDEFINES="$FDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}"
+     FCUDEFINES=" ${CUDA_DEFINES}"
-     CDEFINES="$CDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}"
+     CCUDEFINES=" ${CUDA_DEFINES}"
     CXXCUDEFINES=" ${CUDA_DEFINES}"
   fi
 fi
 if test "x$pac_cv_ipk_size" != "x4"; then
  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size,  disabling CUDA/SPGPU" >&5
 printf "%s\n" "$as_me: For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size,  disabling CUDA/SPGPU" >&6;}
  pac_cv_cuda="no";
  HAVE_CUDA="no";
  CUDA_CC="";
  SPGPU_LIBS="";
  CUDAD="";
  CUDALD="";
  CUDEFINES="";
  CUDA_DEFINES="";
  CUDA_INCLUDES="";
  CUDA_LIBS="";
  FCUDEFINES="";
  CCUDEFINES="";
  CXXCUDEFINES="";
 fi
 fi
 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether we want openacc " >&5
 printf %s "checking whether we want openacc ... " >&6; }
 # Check whether --enable-openacc was given.
 if test ${enable_openacc+y}
 then :
  enableval=$enable_openacc;
 pac_cv_openacc="yes";
 fi
 if test x"$pac_cv_openacc" == x"yes" ; then
   { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes." >&5
 printf "%s\n" "yes." >&6; }
 #   AC_LANG_PUSH([Fortran])
 #   AC_OPENACC()
 #   pac_cv_openacc_fcopt="$OPENACC_FCFLAGS";
 #   AC_LANG_POP()
 #   AC_LANG_PUSH([C])
 #   AC_OPENACC()
 #   pac_cv_openacc_ccopt="$OPENACC_CFLAGS";
 #   AC_LANG_POP()
 #   AC_LANG_PUSH([C++])
 #   AC_OPENACC()
 #   pac_cv_openacc_cxxopt="$OPENACC_CXXFLAGS";
 #   AC_LANG_POP()
 else
 pac_cv_openacc="no";
 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no." >&5
 printf "%s\n" "no." >&6; }
 fi
 if test x"$pac_cv_openacc" == x"yes" ; then
 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether additional EXTRAOPENACC flags should be added (should be invoked only once)" >&5
 printf %s "checking whether additional EXTRAOPENACC flags should be added (should be invoked only once)... " >&6; }
 # Check whether --with-extraopenacc was given.
 if test ${with_extraopenacc+y}
 then :
  withval=$with_extraopenacc;
 EXTRAOPENACC="${withval} ${EXTRAOPENACC}"
 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: EXTRAOPENACC = ${EXTRAOPENACC}" >&5
 printf "%s\n" "EXTRAOPENACC = ${EXTRAOPENACC}" >&6; }
 else $as_nop
  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
 printf "%s\n" "no" >&6; }
 fi
      if test -e penmp || test -e mp; then
  as_fn_error $? "AX_OPENACC clobbers files named 'mp' and 'penmp'. Aborting configure because one of these files already exists." "$LINENO" 5
 fi
 # Check whether --enable-openacc was given.
 if test ${enable_openacc+y}
 then :
  enableval=$enable_openacc;
 fi
  OPENACC_CFLAGS=
  if test "$enable_openacc" != no; then
      ac_ext=c
 ac_cpp='$CPP $CPPFLAGS'
 ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
 ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
 ac_compiler_gnu=$ac_cv_c_compiler_gnu
      { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to support Openacc" >&5
 printf %s "checking for $CC option to support Openacc... " >&6; }
 if test ${ax_cv_prog_c_openacc+y}
 then :
  printf %s "(cached) " >&6
 else $as_nop
  ax_cv_prog_c_openacc='not found'
                                                                        for ac_option in '' -fopenacc -openacc -acc; do
        ac_save_CFLAGS=$CFLAGS
        CFLAGS="$CFLAGS $ac_option"
        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 #ifndef _OPENACC
 #error "OpenACC not supported"
 #endif
 #include <openacc.h>
  int main (void) { acc_init (0); return 0;}
 _ACEOF
 if ac_fn_c_try_compile "$LINENO"
 then :
  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 #ifndef _OPENACC
 #error "OpenACC not supported"
 #endif
 #include <openacc.h>
 int main (void) { acc_init (0); return 0;}
 _ACEOF
 if ac_fn_c_try_link "$LINENO"
 then :
  ax_cv_prog_c_openacc=$ac_option
 else $as_nop
  ax_cv_prog_c_openacc='unsupported'
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.beam \
    conftest$ac_exeext conftest.$ac_ext
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
        CFLAGS=$ac_save_CFLAGS
        if test "$ax_cv_prog_c_openacc" != 'not found'; then
          break
        fi
      done
      if test "$ax_cv_prog_c_openacc" = 'not found'; then
        ac_cv_prog_c_openacc='unsupported'
      elif test "$ax_cv_prog_c_openacc" = ''; then
        ac_cv_prog_c_openacc='none needed'
      fi
                        rm -f penmp mp
 fi
 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_prog_c_openacc" >&5
 printf "%s\n" "$ax_cv_prog_c_openacc" >&6; }
    if test "$ax_cv_prog_c_openacc" != 'unsupported' && \
       test "$ax_cv_prog_c_openacc" != 'none needed'; then
      OPENACC_CFLAGS="$ax_cv_prog_c_openacc"
    fi
   ac_ext=c
 ac_cpp='$CPP $CPPFLAGS'
 ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
 ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
 ac_compiler_gnu=$ac_cv_c_compiler_gnu
  fi
   CCOPENACC="$ax_cv_prog_c_openacc";
   # Check whether --enable-openacc was given.
 if test ${enable_openacc+y}
 then :
  enableval=$enable_openacc;
 fi
  OPENACC_CFLAGS=
  if test "$enable_openacc" != no; then
      ac_ext=cpp
 ac_cpp='$CXXCPP $CPPFLAGS'
 ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
 ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
 ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
      { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CXX option to support Openacc" >&5
 printf %s "checking for $CXX option to support Openacc... " >&6; }
 if test ${ax_cv_prog_cxx_openacc+y}
 then :
  printf %s "(cached) " >&6
 else $as_nop
  ax_cv_prog_cxx_openacc='not found'
                                                                        for ac_option in '' -fopenacc -openacc -acc; do
        ac_save_CXXFLAGS=$CXXFLAGS
        CXXFLAGS="$CXXFLAGS $ac_option"
        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 #ifndef _OPENACC
 #error "OpenACC not supported"
 #endif
 #include <openacc.h>
 int main (void) { acc_init (acc_get_device_type()); return 0;}
 _ACEOF
 if ac_fn_cxx_try_compile "$LINENO"
 then :
  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 #ifndef _OPENACC
 #error "OpenACC not supported"
 #endif
 #include <openacc.h>
 int main (void) { acc_init (acc_get_device_type()); return 0;}
 _ACEOF
 if ac_fn_cxx_try_link "$LINENO"
 then :
  ax_cv_prog_cxx_openacc=$ac_option
 else $as_nop
  ax_cv_prog_cxx_openacc='unsupported'
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.beam \
    conftest$ac_exeext conftest.$ac_ext
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
        CXXFLAGS=$ac_save_CXXFLAGS
        if test "$ax_cv_prog_cxx_openacc" != 'not found'; then
          break
        fi
      done
      if test "$ax_cv_prog_cxx_openacc" = 'not found'; then
        ac_cv_prog_cxx_openacc='unsupported'
      elif test "$ax_cv_prog_cxx_openacc" = ''; then
        ac_cv_prog_cxx_openacc='none needed'
      fi
                        rm -f penmp mp
 fi
 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_prog_cxx_openacc" >&5
 printf "%s\n" "$ax_cv_prog_cxx_openacc" >&6; }
    if test "$ax_cv_prog_cxx_openacc" != 'unsupported' && \
       test "$ax_cv_prog_cxx_openacc" != 'none needed'; then
      OPENACC_CXXFLAGS="$ax_cv_prog_cxx_openacc"
    fi
   ac_ext=c
 ac_cpp='$CPP $CPPFLAGS'
 ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
 ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
 ac_compiler_gnu=$ac_cv_c_compiler_gnu
  fi
   CXXOPENACC="$ax_cv_prog_cxx_openacc";
   # Check whether --enable-openacc was given.
 if test ${enable_openacc+y}
 then :
  enableval=$enable_openacc;
 fi
  OPENACC_CFLAGS=
  if test "$enable_openacc" != no; then
      ac_ext=${ac_fc_srcext-f}
 ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5'
 ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5'
 ac_compiler_gnu=$ac_cv_fc_compiler_gnu
      { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $FC option to support Openacc" >&5
 printf %s "checking for $FC option to support Openacc... " >&6; }
 if test ${ax_cv_prog_fc_openacc+y}
 then :
  printf %s "(cached) " >&6
 else $as_nop
  ax_cv_prog_fc_openacc='not found'
                                                                        for ac_option in '' -fopenacc -openacc -acc; do
        ac_save_FCFLAGS=$FCFLAGS
        FCFLAGS="$FCFLAGS $ac_option"
        cat > conftest.$ac_ext <<_ACEOF
      program main
      use openacc
      implicit none
      integer tid, np
      tid = 42
      call acc_init(0)
      end
 _ACEOF
 if ac_fn_fc_try_compile "$LINENO"
 then :
  cat > conftest.$ac_ext <<_ACEOF
      program main
      use openacc
      implicit none
      integer tid, np
      tid = 42
      call acc_init(0)
      end
 _ACEOF
 if ac_fn_fc_try_link "$LINENO"
 then :
  ax_cv_prog_fc_openacc=$ac_option
 else $as_nop
  ax_cv_prog_fc_openacc='unsupported'
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.beam \
    conftest$ac_exeext conftest.$ac_ext
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
        FCFLAGS=$ac_save_FCFLAGS
        if test "$ax_cv_prog_fc_openacc" != 'unsupported'; then
          break
        fi
      done
      if test "$ax_cv_prog_fc_openacc" = 'not found'; then
        ac_cv_prog_fc_openacc='unsupported'
      elif test "$ax_cv_prog_fc_openacc" = ''; then
        ac_cv_prog_fc_openacc='none needed'
      fi
                        rm -f penmp mp
 fi
 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ax_cv_prog_fc_openacc" >&5
 printf "%s\n" "$ax_cv_prog_fc_openacc" >&6; }
    if test "$ax_cv_prog_fc_openacc" != 'unsupported' && \
       test "$ax_cv_prog_fc_openacc" != 'none needed'; then
      OPENACC_FCFLAGS="$ax_cv_prog_fc_openacc"
    fi
   ac_ext=c
 ac_cpp='$CPP $CPPFLAGS'
 ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
 ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5'
 ac_compiler_gnu=$ac_cv_c_compiler_gnu
  fi
   FCOPENACC="$ax_cv_prog_fc_openacc";
 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether additional CCOPENACC flags should be added (should be invoked only once)" >&5
 printf %s "checking whether additional CCOPENACC flags should be added (should be invoked only once)... " >&6; }
 # Check whether --with-ccopenacc was given.
 if test ${with_ccopenacc+y}
 then :
  withval=$with_ccopenacc;
 CCOPENACC="${withval} ${CCOPENACC}"
 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: CCOPENACC = ${CCOPENACC}" >&5
 printf "%s\n" "CCOPENACC = ${CCOPENACC}" >&6; }
 else $as_nop
  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
 printf "%s\n" "no" >&6; }
 fi
 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether additional CXXOPENACC flags should be added (should be invoked only once)" >&5
 printf %s "checking whether additional CXXOPENACC flags should be added (should be invoked only once)... " >&6; }
 # Check whether --with-cxxopenacc was given.
 if test ${with_cxxopenacc+y}
 then :
  withval=$with_cxxopenacc;
 CXXOPENACC="${withval} ${CXXOPENACC}"
 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: CXXOPENACC = ${CXXOPENACC}" >&5
 printf "%s\n" "CXXOPENACC = ${CXXOPENACC}" >&6; }
 else $as_nop
  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
 printf "%s\n" "no" >&6; }
 fi
 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether additional FCOPENACC flags should be added (should be invoked only once)" >&5
 printf %s "checking whether additional FCOPENACC flags should be added (should be invoked only once)... " >&6; }
 # Check whether --with-fcopenacc was given.
 if test ${with_fcopenacc+y}
 then :
  withval=$with_fcopenacc;
 FCOPENACC="${withval} ${FCOPENACC}"
 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: FCOPENACC = ${FCOPENACC}" >&5
 printf "%s\n" "FCOPENACC = ${FCOPENACC}" >&6; }
 else $as_nop
  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
 printf "%s\n" "no" >&6; }
 fi
   CCOPENACC="$CCOPENACC   $EXTRAOPENACC";
   CXXOPENACC="$CXXOPENACC   $EXTRAOPENACC";
   FCOPENACC="$FCOPENACC   $EXTRAOPENACC";
   OACCD=oaccd;
   OACCLD=oaccld;
   #FCOPT="$FCOPT    $FCOPENACC";
   #CCOPT="$CCOPT    $CCOPENACC"
   #CXXOPT="$CXXOPT  $CXXOPENACC"
   #FLINK="$FLINK $FCOPENACC";
 fi
 ###############################################################################
 LIBRSB_DIR="$pac_cv_librsb_dir";
 { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for LIBRSB install dir" >&5
 printf %s "checking for LIBRSB install dir... " >&6; }
 case $LIBRSB_DIR in
     /*) ;;
     *)  esac
 pac_cv_status_file="$LIBRSB_DIR/librsb.a"
 if test ! -f "$pac_cv_status_file" ; then
  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
 printf "%s\n" "no" >&6; }
  #AC_MSG_ERROR([Could not find an installation  in $LIBRSB_DIR.])
 else
  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $LIBRSB_DIR" >&5
 printf "%s\n" "$LIBRSB_DIR" >&6; }
  RSBTARGETLIB=rsbd;
  RSBTARGETOBJ=rsbobj;
 fi
@ -10878,7 +11332,7 @@ fi
 LIBDIR=lib
 BASELIBNAME=libpsb_base.a
 PRECLIBNAME=libpsb_prec.a
-METHDLIBNAME=libpsb_krylov.a
+METHDLIBNAME=libpsb_linsolve.a
 UTILLIBNAME=libpsb_util.a
 ###############################################################################
@ -10928,9 +11382,9 @@ UTILLIBNAME=libpsb_util.a
 PSBLASRULES='
 PSBLDLIBS=$(LAPACK) $(BLAS) $(METIS_LIB) $(AMD_LIB) $(LIBS)
-CXXDEFINES=$(PSBCXXDEFINES)  $(CUDA_DEFINES)
+CXXDEFINES=$(PSBCXXDEFINES)
-CDEFINES=$(PSBCDEFINES) $(CUDA_DEFINES)
+CDEFINES=$(PSBCDEFINES)
-FDEFINES=$(PSBFDEFINES) $(CUDA_DEFINES)
+FDEFINES=$(PSBFDEFINES)
 # These should be portable rules, arent they?
@ -10961,6 +11415,22 @@ FDEFINES=$(PSBFDEFINES) $(CUDA_DEFINES)
@ -12299,6 +12769,11 @@ fi
 	CUDA 		      : ${HAVE_CUDA}
 	CUDA_CC               : ${pac_cv_cudacc}
 	OPENACC		      : ${pac_cv_openacc}
        FCOPENACC             : ${FCOPENACC}
 	OACCD                 : ${OACCD}
 	OACCLD                : ${OACCLD}
 	BLAS                  : ${BLAS_LIBS}
 	METIS usable          : ${psblas_cv_have_metis}
@ -12332,6 +12807,11 @@ printf "%s\n" "$as_me:
 	CUDA 		      : ${HAVE_CUDA}
 	CUDA_CC               : ${pac_cv_cudacc}
 	OPENACC		      : ${pac_cv_openacc}
        FCOPENACC             : ${FCOPENACC}
 	OACCD                 : ${OACCD}
 	OACCLD                : ${OACCLD}
 	BLAS                  : ${BLAS_LIBS}
 	METIS usable          : ${psblas_cv_have_metis}
@ -12348,6 +12828,16 @@ printf "%s\n" "$as_me:
 	If you are satisfied, run 'make' to build ${PACKAGE_NAME} and its documentation; otherwise
 	type  ./configure --help=short for a complete list of configure options specific to ${PACKAGE_NAME}.
 " >&6;}
 if test x"${pac_cv_openacc}" == x"yes" ; then
   if test x"${FCOPENACC}" == x ; then
   { printf "%s\n" "$as_me:${as_lineno-$LINENO}:
   WARNING: OpenACC enabled, but no choice for FCOPENACC compile flag.
   You may want to rerun configure with --with-fcopenacc= " >&5
 printf "%s\n" "$as_me:
   WARNING: OpenACC enabled, but no choice for FCOPENACC compile flag.
   You may want to rerun configure with --with-fcopenacc= " >&6;}
  fi
 fi
 ###############################################################################
--- a/configure.ac
+++ b/configure.ac
@ -483,7 +483,7 @@ fi
 ##############################################################################
 BASEMODNAME=psb_base_mod
 PRECMODNAME=psb_prec_mod
-METHDMODNAME=psb_krylov_mod
+METHDMODNAME=psb_linsolve_mod
 UTILMODNAME=psb_util_mod
 if test "X$psblas_cv_fc" == X"cray" 
@ -493,7 +493,7 @@ then
   FIFLAG="-I"
   BASEMODNAME=PSB_BASE_MOD
   PRECMODNAME=PSB_PREC_MOD
-   METHDMODNAME=PSB_KRYLOV_MOD
+   METHDMODNAME=PSB_LINSOLVE_MOD
   UTILMODNAME=PSB_UTIL_MOD
 else 
@ -795,16 +795,19 @@ if test "x$psblas_cv_have_amd" == "xyes" ; then
 fi
 PAC_ARG_CUDA()
 if test "x$pac_cv_cuda" == "xyes"; then
 PAC_CHECK_CUDA()
 if test "x$pac_cv_have_cuda" == "xyes"; then
   PAC_CHECK_CUDA_VERSION()
   CUDA_VERSION="$pac_cv_cuda_version";
   CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000);
   dnl PAC_CHECK_SPGPU()
   HAVE_CUDA="yes";
   CUDA_VERSION="$pac_cv_cuda_version";
   CUDA_SHORT_VERSION=$(expr $pac_cv_cuda_version / 1000);
   CUDA_DEFINES="-DHAVE_CUDA -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}";
   SPGPU_LIBS="-lspgpu";	
   CUDAD=cudad;
   CUDALD=cudald;
@ -814,7 +817,7 @@ if test "x$pac_cv_have_cuda" == "xyes"; then
   PAC_ARG_WITH_CUDACC()
   if test "x$pac_cv_cudacc" == "x"; then
-      pac_cv_cudacc="50,60,70,75";
+      pac_cv_cudacc="50,60,70,75,80,86";
      CUDA_CC="$pac_cv_cudacc";
   fi
   if (( $pac_cv_cuda_version >= 11070  ))
@ -826,27 +829,72 @@ if test "x$pac_cv_have_cuda" == "xyes"; then
 	CUDEFINES="$CUDEFINES -gencode arch=compute_$cc,code=sm_$cc";
   done
   if test "x$pac_cv_cuda_version" != "xunknown"; then
-     CUDEFINES="$CUDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}"
+     CUDEFINES="$CUDEFINES ${CUDA_DEFINES}"
-     FDEFINES="$FDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}"
+     FCUDEFINES=" ${CUDA_DEFINES}"
-     CDEFINES="$CDEFINES -DCUDA_SHORT_VERSION=${CUDA_SHORT_VERSION} -DCUDA_VERSION=${CUDA_VERSION}"    
+     CCUDEFINES=" ${CUDA_DEFINES}"
     CXXCUDEFINES=" ${CUDA_DEFINES}"	
   fi
 fi
 if test "x$pac_cv_ipk_size" != "x4"; then
  AC_MSG_NOTICE([For CUDA I need psb_ipk_ to be 4 bytes but it is $pac_cv_ipk_size,  disabling CUDA/SPGPU])
  pac_cv_cuda="no";
  HAVE_CUDA="no";
  CUDA_CC="";	
  SPGPU_LIBS="";	
  CUDAD="";
  CUDALD="";
  CUDEFINES="";
  CUDA_DEFINES="";
  CUDA_INCLUDES="";
  CUDA_LIBS="";
  FCUDEFINES="";
  CCUDEFINES="";
  CXXCUDEFINES="";
 fi
 fi
 PAC_ARG_OPENACC()
 dnl AC_ARG_ENABLE([openacc],
 dnl    [AS_HELP_STRING([--disable-openacc], [do not use Openacc])])
 if test x"$pac_cv_openacc" == x"yes" ; then
   PAC_ARG_WITH_FLAGS(extraopenacc,EXTRAOPENACC)
   dnl if test false; then
   AX_C_OPENACC()
   CCOPENACC="$ax_cv_prog_c_openacc";
   AX_CXX_OPENACC()
   CXXOPENACC="$ax_cv_prog_cxx_openacc";
   AX_FC_OPENACC()
   FCOPENACC="$ax_cv_prog_fc_openacc";
    dnl AX_OPENACC()
    dnl 
    dnl CXXOPENACC="$ax_cv_prog_cxx_openacc";	
    dnl FCOPENACC="$ax_cv_prog_fc_openacc";   
   dnl else   
 dnl   AC_MSG_NOTICE([OpenACC 1 flags CC $CCOPENACC CXX $CXXOPENACC FC $FCOPENACC])	   
   PAC_ARG_WITH_FLAGS(ccopenacc,CCOPENACC)	
   PAC_ARG_WITH_FLAGS(cxxopenacc,CXXOPENACC)
   PAC_ARG_WITH_FLAGS(fcopenacc,FCOPENACC)
 dnl   AC_MSG_NOTICE([OpenACC 2 flags CC $CCOPENACC CXX $CXXOPENACC FC $FCOPENACC])		
 dnl   CCOPENACC="$ax_cv_prog_c_openacc";
 dnl   CXXOPENACC="$ax_cv_prog_cxx_openacc";	
 dnl   FCOPENACC="$ax_cv_prog_fc_openacc";   
 dnl   fi
   CCOPENACC="$CCOPENACC   $EXTRAOPENACC";
   CXXOPENACC="$CXXOPENACC   $EXTRAOPENACC";
   FCOPENACC="$FCOPENACC   $EXTRAOPENACC";
 dnl   AC_MSG_NOTICE([OpenACC 3 flags CC $CCOPENACC CXX $CXXOPENACC FC $FCOPENACC])	   
   OACCD=oaccd;
   OACCLD=oaccld;
   #FCOPT="$FCOPT    $FCOPENACC";
   #CCOPT="$CCOPT    $CCOPENACC"	
   #CXXOPT="$CXXOPT  $CXXOPENACC"		
   #FLINK="$FLINK $FCOPENACC";
 fi
 ###############################################################################
-PAC_ARG_WITH_LIBRSB
+dnl PAC_ARG_WITH_LIBRSB()
 LIBRSB_DIR="$pac_cv_librsb_dir";
 AC_MSG_CHECKING([for LIBRSB install dir])
 case $LIBRSB_DIR in 
@ -878,7 +926,7 @@ fi
 LIBDIR=lib
 BASELIBNAME=libpsb_base.a
 PRECLIBNAME=libpsb_prec.a
-METHDLIBNAME=libpsb_krylov.a
+METHDLIBNAME=libpsb_linsolve.a
 UTILLIBNAME=libpsb_util.a
 ###############################################################################
@ -928,9 +976,9 @@ AC_SUBST(FINCLUDES)
 PSBLASRULES='
 PSBLDLIBS=$(LAPACK) $(BLAS) $(METIS_LIB) $(AMD_LIB) $(LIBS)
-CXXDEFINES=$(PSBCXXDEFINES)  $(CUDA_DEFINES)
+CXXDEFINES=$(PSBCXXDEFINES) 
-CDEFINES=$(PSBCDEFINES) $(CUDA_DEFINES)
+CDEFINES=$(PSBCDEFINES) 
-FDEFINES=$(PSBFDEFINES) $(CUDA_DEFINES)
+FDEFINES=$(PSBFDEFINES) 
 # These should be portable rules, arent they?
@ -952,6 +1000,12 @@ AC_SUBST(PRECLIBNAME)
 AC_SUBST(METHDLIBNAME)
 AC_SUBST(UTILLIBNAME)
 AC_SUBST(METISINCFILE)
 AC_SUBST(OACCD)
 AC_SUBST(OACCLD)
 AC_SUBST(FCOPENACC)
 AC_SUBST(CCOPENACC)
 AC_SUBST(CXXOPENACC)
 AC_SUBST(SPGPU_FLAGS)
 AC_SUBST(SPGPU_LIBS)
 dnl AC_SUBST(SPGPU_DIR)
@ -960,6 +1014,9 @@ dnl AC_SUBST(SPGPU_INCDIR)
 AC_SUBST(EXTRALDLIBS)
 AC_SUBST(CUDA_DIR)
 AC_SUBST(CUDA_DEFINES)
 AC_SUBST(FCUDEFINES)
 AC_SUBST(CCUDEFINES)
 AC_SUBST(CXXCUDEFINES)	
 AC_SUBST(CUDA_INCLUDES)
 AC_SUBST(CUDA_LIBS)
 AC_SUBST(CUDA_VERSION)
@ -976,7 +1033,6 @@ AC_SUBST(LIBRSB_DIR)
 AC_SUBST(LIBRSB_DEFINES)
 AC_SUBST(LRSB)
 ###############################################################################
 # the following files will be created by Automake
@ -1004,6 +1060,11 @@ AC_MSG_NOTICE([
 	CUDA 		      : ${HAVE_CUDA}
 	CUDA_CC               : ${pac_cv_cudacc}
 	OPENACC		      : ${pac_cv_openacc}
        FCOPENACC             : ${FCOPENACC}
 	OACCD                 : ${OACCD}
 	OACCLD                : ${OACCLD}
 	BLAS                  : ${BLAS_LIBS}
 	METIS usable          : ${psblas_cv_have_metis}
@ -1023,6 +1084,13 @@ dnl Note : we should use LDLIBS sooner or later!
 dnl	To install the program and its documentation, run 'make install' if you are root,
 dnl	or run 'su -c "make install"' if you are not root.
 ])
 if test x"${pac_cv_openacc}" == x"yes" ; then
   if test x"${FCOPENACC}" == x ; then
   AC_MSG_NOTICE([
   WARNING: OpenACC enabled, but no choice for FCOPENACC compile flag.
   You may want to rerun configure with --with-fcopenacc= ])
  fi  
 fi	
 ###############################################################################
--- a/cuda/Makefile
+++ b/cuda/Makefile
@ -141,3 +141,12 @@ spgpuclean:
 	$(MAKE) -C spgpu clean
 veryclean:  clean
 .c.o:
 	$(CC) $(CCOPT) $(CCUDEFINES) $(CINCLUDES) $(CDEFINES) -c $< -o $@
 .f90.o:
 	$(FC) $(FCOPT) $(FCUDEFINES) $(FINCLUDES) -c $< -o $@
 .F90.o:
 	$(FC) $(FCOPT) $(FCUDEFINES) $(FINCLUDES) $(FDEFINES) -c $< -o $@
 .cpp.o:
 	$(CXX) $(CXXOPT) $(CXXCUDEFINES) $(CXXINCLUDES) $(CXXDEFINES) -c $< -o $@
--- a/cuda/cvectordev.c
+++ b/cuda/cvectordev.c
@ -184,7 +184,7 @@ int iscatMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n,
 }
-int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA)
+int nrm2MultiVecDeviceFloatComplex(float* y_res, int n, void* devMultiVecA)
 { int i=0;
  spgpuHandle_t handle=psb_cudaGetHandle();
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
@ -194,7 +194,7 @@ int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiV
  return(i);
 }
-int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA)
+int amaxMultiVecDeviceFloatComplex(float* y_res, int n, void* devMultiVecA)
 { int i=0;
  spgpuHandle_t handle=psb_cudaGetHandle();
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
@ -204,7 +204,7 @@ int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiV
  return(i);
 }
-int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devMultiVecA)
+int asumMultiVecDeviceFloatComplex(float* y_res, int n, void* devMultiVecA)
 { int i=0;
  spgpuHandle_t handle=psb_cudaGetHandle();
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
--- a/cuda/cvectordev.h
+++ b/cuda/cvectordev.h
@ -37,6 +37,7 @@
 #include "vectordev.h"
 #include "cuda_runtime.h"
 #include "core.h"
 #include "vector.h"
 int registerMappedFloatComplex(void *, void **, int, cuFloatComplex);
 int writeMultiVecDeviceFloatComplex(void* deviceMultiVec, cuFloatComplex* hostMultiVec);
@ -63,9 +64,9 @@ int iscatMultiVecDeviceFloatComplex(void* deviceVec, int vectorId, int n, int fi
 			      int hfirst, void* host_values, int indexBase, cuFloatComplex beta);
 int scalMultiVecDeviceFloatComplex(cuFloatComplex alpha, void* devMultiVecA);
-int nrm2MultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA);
+int nrm2MultiVecDeviceFloatComplex(float* y_res, int n, void* devVecA);
-int amaxMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA);
+int amaxMultiVecDeviceFloatComplex(float* y_res, int n, void* devVecA);
-int asumMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA);
+int asumMultiVecDeviceFloatComplex(float* y_res, int n, void* devVecA);
 int dotMultiVecDeviceFloatComplex(cuFloatComplex* y_res, int n, void* devVecA, void* devVecB);
 int axpbyMultiVecDeviceFloatComplex(int n, cuFloatComplex alpha, void* devVecX, cuFloatComplex beta, void* devVecY);
--- a/cuda/dnsdev.c
+++ b/cuda/dnsdev.c
@ -178,12 +178,12 @@ int spmvDnsDeviceFloatComplex(char transa, int m, int n, int k, float complex *a
  /* Note: the M,N,K choices according to TRANS have already been handled in the caller */  
  if (n == 1) {
    status = cublasCgemv(handle, trans, m,k,
-			 alpha, devMat->cM,devMat->pitch, x->v_,1,
+			 (const cuComplex *) alpha, devMat->cM,devMat->pitch, x->v_,1,
-			 beta,  y->v_,1);
+			 (const cuComplex *) beta,  y->v_,1);
  } else {
    status = cublasCgemm(handle, trans, CUBLAS_OP_N, m,n,k,
-			 alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_,
+			 (const cuComplex *) alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_,
-			 beta,  y->v_,y->pitch_);
+			 (const cuComplex *) beta,  y->v_,y->pitch_);
  }    
  if (status == CUBLAS_STATUS_SUCCESS)  
@ -205,12 +205,12 @@ int spmvDnsDeviceDoubleComplex(char transa, int m, int n, int k, double complex
  /* Note: the M,N,K choices according to TRANS have already been handled in the caller */  
  if (n == 1) {
    status = cublasZgemv(handle, trans, m,k,
-			 alpha, devMat->cM,devMat->pitch, x->v_,1,
+			 (const cuDoubleComplex *) alpha, devMat->cM,devMat->pitch, x->v_,1,
-			 beta,  y->v_,1);
+			 (const cuDoubleComplex *) beta,  y->v_,1);
  } else {
    status = cublasZgemm(handle, trans, CUBLAS_OP_N, m,n,k,
-			 alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_,
+			 (const cuDoubleComplex *) alpha, devMat->cM,devMat->pitch, x->v_,x->pitch_,
-			 beta,  y->v_,y->pitch_);
+			 (const cuDoubleComplex *) beta,  y->v_,y->pitch_);
  }    
  if (status == CUBLAS_STATUS_SUCCESS)  
--- a/cuda/dvectordev.c
+++ b/cuda/dvectordev.c
@ -220,7 +220,8 @@ int dotMultiVecDeviceDouble(double* y_res, int n, void* devMultiVecA, void* devM
  struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB;
  spgpuHandle_t handle=psb_cudaGetHandle();
-  spgpuDmdot(handle, y_res, n, (double*)devVecA->v_, (double*)devVecB->v_,devVecA->count_,devVecB->pitch_);
+  spgpuDmdot(handle, y_res, n, (double*)devVecA->v_, (double*)devVecB->v_,
 	     devVecA->count_,devVecB->pitch_);
  return(i);
 }
--- a/cuda/dvectordev.h
+++ b/cuda/dvectordev.h
@ -35,6 +35,7 @@
 #include "vectordev.h"
 #include "cuda_runtime.h"
 #include "core.h"
 #include "vector.h"
 int registerMappedDouble(void *, void **, int, double);
 int writeMultiVecDeviceDouble(void* deviceMultiVec, double* hostMultiVec);
--- a/cuda/fcusparse_fct.h
+++ b/cuda/fcusparse_fct.h
@ -439,7 +439,7 @@ int T_CSRGDeviceSetMatFillMode(T_Cmat *Matrix, int type)
  T_CSRGDeviceMat *cMat= Matrix->mat;
  cusparseFillMode_t  mode=type;
-  CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->spmvDescr,
+  CHECK_CUSPARSE(cusparseSpMatSetAttribute((*(cMat->spmvDescr)),
 					   CUSPARSE_SPMAT_FILL_MODE,
 					   (const void*) &mode,
 					   sizeof(cusparseFillMode_t)));
@ -450,7 +450,7 @@ int T_CSRGDeviceSetMatDiagType(T_Cmat *Matrix, int type)
 {
  T_CSRGDeviceMat *cMat= Matrix->mat;
  cusparseDiagType_t  cutype=type;
-  CHECK_CUSPARSE(cusparseSpMatSetAttribute(cMat->spmvDescr,
+  CHECK_CUSPARSE(cusparseSpMatSetAttribute((*(cMat->spmvDescr)),
 					   CUSPARSE_SPMAT_DIAG_TYPE,
 					   (const void*) &cutype,
 					   sizeof(cusparseDiagType_t)));
--- a/cuda/impl/Makefile
+++ b/cuda/impl/Makefile
@ -295,3 +295,12 @@ lib:  objs
 clean:
 	/bin/rm -f $(OBJS)
 .c.o:
 	$(CC) $(CCOPT) $(CCUDEFINES) $(CINCLUDES) $(CDEFINES) -c $< -o $@
 .f90.o:
 	$(FC) $(FCOPT) $(FCUDEFINES) $(FINCLUDES) -c $< -o $@
 .F90.o:
 	$(FC) $(FCOPT) $(FCUDEFINES) $(FINCLUDES) $(FDEFINES) -c $< -o $@
 .cpp.o:
 	$(CXX) $(CXXOPT) $(CXXCUDEFINES) $(CXXINCLUDES) $(CXXDEFINES) -c $< -o $@
--- a/cuda/ivectordev.h
+++ b/cuda/ivectordev.h
@ -35,6 +35,7 @@
 #include "vectordev.h"
 #include "cuda_runtime.h"
 #include "core.h"
 #include "vector.h"
 int registerMappedInt(void *, void **, int, int);
 int writeMultiVecDeviceInt(void* deviceMultiVec, int* hostMultiVec);
--- a/cuda/psb_c_cuda_vect_mod.F90
+++ b/cuda/psb_c_cuda_vect_mod.F90
@ -813,18 +813,6 @@ contains
    call x%set_dev()
  end subroutine c_cuda_set_scal
 !!$
 !!$  subroutine c_cuda_set_vect(x,val)
 !!$    class(psb_c_vect_cuda), intent(inout) :: x
 !!$    complex(psb_spk_), intent(in)           :: val(:)
 !!$    integer(psb_ipk_) :: nr
 !!$    integer(psb_ipk_) :: info
 !!$
 !!$    if (x%is_dev()) call x%sync()
 !!$    call x%psb_c_base_vect_type%set_vect(val)
 !!$    call x%set_host()
 !!$
 !!$  end subroutine c_cuda_set_vect
@ -834,7 +822,6 @@ contains
    class(psb_c_base_vect_type), intent(inout) :: y
    integer(psb_ipk_), intent(in)              :: n
    complex(psb_spk_)                :: res
    complex(psb_spk_), external      :: ddot
    integer(psb_ipk_) :: info
    res = czero
@ -844,9 +831,6 @@ contains
    !  TYPE psb_c_vect
    !
    select type(yy => y)
    type is (psb_c_base_vect_type)
      if (x%is_dev()) call x%sync()
      res = ddot(n,x%v,1,yy%v,1)
    type is (psb_c_vect_cuda)
      if (x%is_host()) call x%sync()
      if (yy%is_host()) call yy%sync()
@ -858,7 +842,7 @@ contains
    class default
      ! y%sync is done in dot_a
-      call x%sync()      
+      if (x%is_dev()) call x%sync()
      res = y%dot(n,x%v)
    end select
@ -870,10 +854,10 @@ contains
    complex(psb_spk_), intent(in)           :: y(:)
    integer(psb_ipk_), intent(in)        :: n
    complex(psb_spk_)                :: res
-    complex(psb_spk_), external      :: ddot
+    complex(psb_spk_), external      :: cdot
    if (x%is_dev()) call x%sync()
-    res = ddot(n,y,1,x%v,1)
+    res = cdot(n,y,1,x%v,1)
  end function c_cuda_dot_a
@ -1393,14 +1377,14 @@ module psb_c_cuda_multivect_mod
  end type psb_c_multivect_cuda
  public  :: psb_c_multivect_cuda
-  private :: constructor
+  private :: mconstructor
  interface psb_c_multivect_cuda
-    module procedure constructor
+    module procedure mconstructor
  end interface
 contains
-  function constructor(x) result(this)
+  function mconstructor(x) result(this)
    complex(psb_spk_)       :: x(:,:)
    type(psb_c_multivect_cuda) :: this
    integer(psb_ipk_) :: info
@ -1408,7 +1392,7 @@ contains
    this%v = x
    call this%asb(size(x,1),size(x,2),info)
-  end function constructor
+  end function mconstructor
 !!$  subroutine c_cuda_multi_gthzv_x(i,n,idx,x,y)
--- a/cuda/psb_d_cuda_vect_mod.F90
+++ b/cuda/psb_d_cuda_vect_mod.F90
@ -813,18 +813,6 @@ contains
    call x%set_dev()
  end subroutine d_cuda_set_scal
 !!$
 !!$  subroutine d_cuda_set_vect(x,val)
 !!$    class(psb_d_vect_cuda), intent(inout) :: x
 !!$    real(psb_dpk_), intent(in)           :: val(:)
 !!$    integer(psb_ipk_) :: nr
 !!$    integer(psb_ipk_) :: info
 !!$
 !!$    if (x%is_dev()) call x%sync()
 !!$    call x%psb_d_base_vect_type%set_vect(val)
 !!$    call x%set_host()
 !!$
 !!$  end subroutine d_cuda_set_vect
@ -834,7 +822,6 @@ contains
    class(psb_d_base_vect_type), intent(inout) :: y
    integer(psb_ipk_), intent(in)              :: n
    real(psb_dpk_)                :: res
    real(psb_dpk_), external      :: ddot
    integer(psb_ipk_) :: info
    res = dzero
@ -844,9 +831,6 @@ contains
    !  TYPE psb_d_vect
    !
    select type(yy => y)
    type is (psb_d_base_vect_type)
      if (x%is_dev()) call x%sync()
      res = ddot(n,x%v,1,yy%v,1)
    type is (psb_d_vect_cuda)
      if (x%is_host()) call x%sync()
      if (yy%is_host()) call yy%sync()
@ -858,7 +842,7 @@ contains
    class default
      ! y%sync is done in dot_a
-      call x%sync()      
+      if (x%is_dev()) call x%sync()
      res = y%dot(n,x%v)
    end select
@ -1393,14 +1377,14 @@ module psb_d_cuda_multivect_mod
  end type psb_d_multivect_cuda
  public  :: psb_d_multivect_cuda
-  private :: constructor
+  private :: mconstructor
  interface psb_d_multivect_cuda
-    module procedure constructor
+    module procedure mconstructor
  end interface
 contains
-  function constructor(x) result(this)
+  function mconstructor(x) result(this)
    real(psb_dpk_)       :: x(:,:)
    type(psb_d_multivect_cuda) :: this
    integer(psb_ipk_) :: info
@ -1408,7 +1392,7 @@ contains
    this%v = x
    call this%asb(size(x,1),size(x,2),info)
-  end function constructor
+  end function mconstructor
 !!$  subroutine d_cuda_multi_gthzv_x(i,n,idx,x,y)
--- a/cuda/psb_i_cuda_vect_mod.F90
+++ b/cuda/psb_i_cuda_vect_mod.F90
@ -795,18 +795,6 @@ contains
    call x%set_dev()
  end subroutine i_cuda_set_scal
 !!$
 !!$  subroutine i_cuda_set_vect(x,val)
 !!$    class(psb_i_vect_cuda), intent(inout) :: x
 !!$    integer(psb_ipk_), intent(in)           :: val(:)
 !!$    integer(psb_ipk_) :: nr
 !!$    integer(psb_ipk_) :: info
 !!$
 !!$    if (x%is_dev()) call x%sync()
 !!$    call x%psb_i_base_vect_type%set_vect(val)
 !!$    call x%set_host()
 !!$
 !!$  end subroutine i_cuda_set_vect
@ -949,14 +937,14 @@ module psb_i_cuda_multivect_mod
  end type psb_i_multivect_cuda
  public  :: psb_i_multivect_cuda
-  private :: constructor
+  private :: mconstructor
  interface psb_i_multivect_cuda
-    module procedure constructor
+    module procedure mconstructor
  end interface
 contains
-  function constructor(x) result(this)
+  function mconstructor(x) result(this)
    integer(psb_ipk_)       :: x(:,:)
    type(psb_i_multivect_cuda) :: this
    integer(psb_ipk_) :: info
@ -964,7 +952,7 @@ contains
    this%v = x
    call this%asb(size(x,1),size(x,2),info)
-  end function constructor
+  end function mconstructor
 !!$  subroutine i_cuda_multi_gthzv_x(i,n,idx,x,y)
--- a/cuda/psb_s_cuda_vect_mod.F90
+++ b/cuda/psb_s_cuda_vect_mod.F90
@ -813,18 +813,6 @@ contains
    call x%set_dev()
  end subroutine s_cuda_set_scal
 !!$
 !!$  subroutine s_cuda_set_vect(x,val)
 !!$    class(psb_s_vect_cuda), intent(inout) :: x
 !!$    real(psb_spk_), intent(in)           :: val(:)
 !!$    integer(psb_ipk_) :: nr
 !!$    integer(psb_ipk_) :: info
 !!$
 !!$    if (x%is_dev()) call x%sync()
 !!$    call x%psb_s_base_vect_type%set_vect(val)
 !!$    call x%set_host()
 !!$
 !!$  end subroutine s_cuda_set_vect
@ -834,7 +822,6 @@ contains
    class(psb_s_base_vect_type), intent(inout) :: y
    integer(psb_ipk_), intent(in)              :: n
    real(psb_spk_)                :: res
    real(psb_spk_), external      :: ddot
    integer(psb_ipk_) :: info
    res = szero
@ -844,9 +831,6 @@ contains
    !  TYPE psb_s_vect
    !
    select type(yy => y)
    type is (psb_s_base_vect_type)
      if (x%is_dev()) call x%sync()
      res = ddot(n,x%v,1,yy%v,1)
    type is (psb_s_vect_cuda)
      if (x%is_host()) call x%sync()
      if (yy%is_host()) call yy%sync()
@ -858,7 +842,7 @@ contains
    class default
      ! y%sync is done in dot_a
-      call x%sync()      
+      if (x%is_dev()) call x%sync()
      res = y%dot(n,x%v)
    end select
@ -870,10 +854,10 @@ contains
    real(psb_spk_), intent(in)           :: y(:)
    integer(psb_ipk_), intent(in)        :: n
    real(psb_spk_)                :: res
-    real(psb_spk_), external      :: ddot
+    real(psb_spk_), external      :: sdot
    if (x%is_dev()) call x%sync()
-    res = ddot(n,y,1,x%v,1)
+    res = sdot(n,y,1,x%v,1)
  end function s_cuda_dot_a
@ -1393,14 +1377,14 @@ module psb_s_cuda_multivect_mod
  end type psb_s_multivect_cuda
  public  :: psb_s_multivect_cuda
-  private :: constructor
+  private :: mconstructor
  interface psb_s_multivect_cuda
-    module procedure constructor
+    module procedure mconstructor
  end interface
 contains
-  function constructor(x) result(this)
+  function mconstructor(x) result(this)
    real(psb_spk_)       :: x(:,:)
    type(psb_s_multivect_cuda) :: this
    integer(psb_ipk_) :: info
@ -1408,7 +1392,7 @@ contains
    this%v = x
    call this%asb(size(x,1),size(x,2),info)
-  end function constructor
+  end function mconstructor
 !!$  subroutine s_cuda_multi_gthzv_x(i,n,idx,x,y)
--- a/cuda/psb_z_cuda_vect_mod.F90
+++ b/cuda/psb_z_cuda_vect_mod.F90
@ -813,18 +813,6 @@ contains
    call x%set_dev()
  end subroutine z_cuda_set_scal
 !!$
 !!$  subroutine z_cuda_set_vect(x,val)
 !!$    class(psb_z_vect_cuda), intent(inout) :: x
 !!$    complex(psb_dpk_), intent(in)           :: val(:)
 !!$    integer(psb_ipk_) :: nr
 !!$    integer(psb_ipk_) :: info
 !!$
 !!$    if (x%is_dev()) call x%sync()
 !!$    call x%psb_z_base_vect_type%set_vect(val)
 !!$    call x%set_host()
 !!$
 !!$  end subroutine z_cuda_set_vect
@ -834,7 +822,6 @@ contains
    class(psb_z_base_vect_type), intent(inout) :: y
    integer(psb_ipk_), intent(in)              :: n
    complex(psb_dpk_)                :: res
    complex(psb_dpk_), external      :: ddot
    integer(psb_ipk_) :: info
    res = zzero
@ -844,9 +831,6 @@ contains
    !  TYPE psb_z_vect
    !
    select type(yy => y)
    type is (psb_z_base_vect_type)
      if (x%is_dev()) call x%sync()
      res = ddot(n,x%v,1,yy%v,1)
    type is (psb_z_vect_cuda)
      if (x%is_host()) call x%sync()
      if (yy%is_host()) call yy%sync()
@ -858,7 +842,7 @@ contains
    class default
      ! y%sync is done in dot_a
-      call x%sync()      
+      if (x%is_dev()) call x%sync()
      res = y%dot(n,x%v)
    end select
@ -870,10 +854,10 @@ contains
    complex(psb_dpk_), intent(in)           :: y(:)
    integer(psb_ipk_), intent(in)        :: n
    complex(psb_dpk_)                :: res
-    complex(psb_dpk_), external      :: ddot
+    complex(psb_dpk_), external      :: zdot
    if (x%is_dev()) call x%sync()
-    res = ddot(n,y,1,x%v,1)
+    res = zdot(n,y,1,x%v,1)
  end function z_cuda_dot_a
@ -1393,14 +1377,14 @@ module psb_z_cuda_multivect_mod
  end type psb_z_multivect_cuda
  public  :: psb_z_multivect_cuda
-  private :: constructor
+  private :: mconstructor
  interface psb_z_multivect_cuda
-    module procedure constructor
+    module procedure mconstructor
  end interface
 contains
-  function constructor(x) result(this)
+  function mconstructor(x) result(this)
    complex(psb_dpk_)       :: x(:,:)
    type(psb_z_multivect_cuda) :: this
    integer(psb_ipk_) :: info
@ -1408,7 +1392,7 @@ contains
    this%v = x
    call this%asb(size(x,1),size(x,2),info)
-  end function constructor
+  end function mconstructor
 !!$  subroutine z_cuda_multi_gthzv_x(i,n,idx,x,y)
--- a/cuda/spgpu/kernels/sdot.cu
+++ b/cuda/spgpu/kernels/sdot.cu
@ -96,7 +96,6 @@ __global__ void spgpuSdot_kern(int n, float* x, float* y)
 	{
 #endif	
 #ifdef ASSUME_LOCK_SYNC_PARALLELISM
 		volatile float* vsSum = sSum;
 		vsSum[threadIdx.x] = res;
--- a/cuda/svectordev.c
+++ b/cuda/svectordev.c
@ -220,7 +220,8 @@ int dotMultiVecDeviceFloat(float* y_res, int n, void* devMultiVecA, void* devMul
  struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB;
  spgpuHandle_t handle=psb_cudaGetHandle();
-  spgpuSmdot(handle, y_res, n, (float*)devVecA->v_, (float*)devVecB->v_,devVecA->count_,devVecB->pitch_);
+  spgpuSmdot(handle, y_res, n, (float*)devVecA->v_, (float*)devVecB->v_,
 	     devVecA->count_,devVecB->pitch_);
  return(i);
 }
--- a/cuda/svectordev.h
+++ b/cuda/svectordev.h
@ -35,6 +35,7 @@
 #include "vectordev.h"
 #include "cuda_runtime.h"
 #include "core.h"
 #include "vector.h"
 int registerMappedFloat(void *, void **, int, float);
 int writeMultiVecDeviceFloat(void* deviceMultiVec, float* hostMultiVec);
--- a/cuda/vectordev.h
+++ b/cuda/vectordev.h
@ -34,6 +34,7 @@
 #include "cuda_runtime.h"
 //#include "common.h"
 //#include "cintrf.h"
 #include "cuda_util.h"
 #include <complex.h>
 struct MultiVectDevice
--- a/cuda/zvectordev.c
+++ b/cuda/zvectordev.c
@ -183,7 +183,7 @@ int iscatMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n,
 }
-int nrm2MultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA)
+int nrm2MultiVecDeviceDoubleComplex(double* y_res, int n, void* devMultiVecA)
 { int i=0;
  spgpuHandle_t handle=psb_cudaGetHandle();
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
@ -192,7 +192,7 @@ int nrm2MultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMult
  return(i);
 }
-int amaxMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA)
+int amaxMultiVecDeviceDoubleComplex(double* y_res, int n, void* devMultiVecA)
 { int i=0;
  spgpuHandle_t handle=psb_cudaGetHandle();
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
@ -202,7 +202,7 @@ int amaxMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMult
  return(i);
 }
-int asumMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA)
+int asumMultiVecDeviceDoubleComplex(double* y_res, int n, void* devMultiVecA)
 { int i=0;
  spgpuHandle_t handle=psb_cudaGetHandle();
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
@ -223,7 +223,8 @@ int scalMultiVecDeviceDoubleComplex(cuDoubleComplex alpha, void* devMultiVecA)
  return(i);
 }
-int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devMultiVecA, void* devMultiVecB)
+int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n,
 				   void* devMultiVecA, void* devMultiVecB)
 {int i=0;
  struct MultiVectDevice *devVecA = (struct MultiVectDevice *) devMultiVecA;
  struct MultiVectDevice *devVecB = (struct MultiVectDevice *) devMultiVecB;
--- a/cuda/zvectordev.h
+++ b/cuda/zvectordev.h
@ -37,6 +37,7 @@
 #include "vectordev.h"
 #include "cuda_runtime.h"
 #include "core.h"
 #include "vector.h"
 int registerMappedDoubleComplex(void *, void **, int, cuDoubleComplex);
 int writeMultiVecDeviceDoubleComplex(void* deviceMultiVec, cuDoubleComplex* hostMultiVec);
@ -69,9 +70,9 @@ int iscatMultiVecDeviceDoubleComplex(void* deviceVec, int vectorId, int n,
 				     int indexBase, cuDoubleComplex beta);
 int scalMultiVecDeviceDoubleComplex(cuDoubleComplex alpha, void* devMultiVecA);
-int nrm2MultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devVecA);
+int nrm2MultiVecDeviceDoubleComplex(double* y_res, int n, void* devVecA);
-int amaxMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devVecA);
+int amaxMultiVecDeviceDoubleComplex(double* y_res, int n, void* devVecA);
-int asumMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, void* devVecA);
+int asumMultiVecDeviceDoubleComplex(double* y_res, int n, void* devVecA);
 int dotMultiVecDeviceDoubleComplex(cuDoubleComplex* y_res, int n, 
 				   void* devVecA, void* devVecB);
--- a/docs/Makefile
+++ b/docs/Makefile
@ -1,7 +1,7 @@
 all: guide 
 guide:
-	cd src && $(MAKE)
+	cd src && $(MAKE)  clean all 
 doxy: 
-	doxygen doxypsb
+	doxygen doxypsb
--- a/docs/doxypsb
+++ b/docs/doxypsb
@ -52,7 +52,7 @@ PROJECT_LOGO           =
 # If a relative path is entered, it will be relative to the location
 # where doxygen was started. If left blank the current directory will be used.
-OUTPUT_DIRECTORY       = ../../psblas-3.4-doxygen
+OUTPUT_DIRECTORY       = ../../psblas-3.9-doxygen
 # If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
 # 4096 sub-directories (in 2 levels) under the output directory of each output
--- a/docs/html/dia-.png
+++ b/docs/html/dia-.png
--- a/docs/html/ell-.png
+++ b/docs/html/ell-.png
--- a/docs/html/hdia-.png
+++ b/docs/html/hdia-.png
--- a/docs/html/hll-.png
+++ b/docs/html/hll-.png
--- a/docs/html/index.html
+++ b/docs/html/index.html
@ -10,16 +10,16 @@
 <link rel="stylesheet" type="text/css" href="userhtml.css"> 
 </head><body 
 >
-<!--l. 91--><p class="noindent" ><span 
+<!--l. 99--><p class="noindent" ><span 
-class="cmbx-12x-x-144">PSBLAS</span><br 
+class="pplb7t-x-x-172">PSBLAS</span><br 
 class="newline" /> <span 
-class="cmbx-12x-x-144">User&#8217;s and Reference Guide</span><br 
+class="pplb7t-x-x-172">User&#8217;s and Reference Guide</span><br 
 class="newline" /> <span 
-class="cmti-12">A reference guide for the Parallel Sparse BLAS library</span><br 
+class="pplri7t-x-x-120">A reference guide for the Parallel Sparse BLAS library</span><br 
 class="newline" /> <span 
-class="cmbx-10">Salvatore Filippone</span><br 
+class="pplb7t-">Salvatore Filippone</span><br 
 class="newline" /><span 
-class="cmbx-10">Alfredo Buttari  </span><br 
+class="pplb7t-">Alfredo Buttari  </span><br 
 class="newline" />Software version: 3.9.0<br 
 class="newline" />Aug 1st, 2024
@ -52,13 +52,13 @@ href="userhtmlse9.html#x14-1280009" id="QQ2-14-158">Utilities</a></span>
 <br />   &#x00A0;<span class="sectionToc" >10 <a 
 href="userhtmlse10.html#x15-13500010" id="QQ2-15-165">Preconditioner routines</a></span>
 <br />   &#x00A0;<span class="sectionToc" >11 <a 
-href="userhtmlse11.html#x17-14200011" id="QQ2-17-172">Iterative Methods</a></span>
+href="userhtmlse11.html#x17-14300011" id="QQ2-17-174">Iterative Methods</a></span>
 <br />   &#x00A0;<span class="sectionToc" >12 <a 
-href="userhtmlse12.html#x19-14400012" id="QQ2-19-174">Extensions</a></span>
+href="userhtmlse12.html#x19-14600012" id="QQ2-19-177">Extensions</a></span>
 <br />   &#x00A0;<span class="sectionToc" >13 <a 
-href="userhtmlse13.html#x20-15300013" id="QQ2-20-189">CUDA Environment Routines</a></span>
+href="userhtmlse13.html#x20-15500013" id="QQ2-20-192">CUDA Environment Routines</a></span>
 <br />   &#x00A0;<span class="likesectionToc" ><a 
-href="userhtmlli2.html#x21-168000" id="QQ2-21-218">References</a></span>
+href="userhtmlli2.html#x21-170000" id="QQ2-21-221">References</a></span>
   </div>
--- a/docs/html/mat-.png
+++ b/docs/html/mat-.png
--- a/docs/html/userhtml.css
+++ b/docs/html/userhtml.css
@ -1,33 +1,62 @@
 /* start css.sty */
-.cmr-7{font-size:70%;}
+.pplb7t-x-x-172{font-size:172%;font-weight: bold;}
-.cmmi-5{font-size:50%;font-style: italic;}
+.pplb7t-x-x-172{font-weight: bold;}
-.cmmi-7{font-size:70%;font-style: italic;}
+.pplb7t-x-x-172{font-weight: bold;}
-.cmmi-10{font-style: italic;}
+.pplri7t-{font-style: italic;}
-.cmsy-7{font-size:70%;}
+.pplri7t-{font-style: italic;}
-.cmbx-12x-x-144{font-size:172%; font-weight: bold;}
+.pplri7t-x-x-120{font-size:120%;font-style: italic;}
-.cmbx-12x-x-144{ font-weight: bold;}
+.pplri7t-x-x-120{font-style: italic;}
-.cmbx-12x-x-144{ font-weight: bold;}
+.pplb7t-{font-weight: bold;}
-.cmti-10{ font-style: italic;}
+.pplb7t-{font-weight: bold;}
-.cmti-12{font-size:120%; font-style: italic;}
+.pplb7t-{font-weight: bold;}
 .cmbx-10{ font-weight: bold;}
 .cmbx-10{ font-weight: bold;}
 .cmbx-10{ font-weight: bold;}
 .cmtt-10{font-family: monospace,monospace;}
 .cmtt-10{font-family: monospace,monospace;}
 .cmtt-10{font-family: monospace,monospace;}
-.cmr-9{font-size:90%;}
+.pplr7t-x-x-76{font-size:76%;}
-.cmr-8{font-size:80%;}
+.zplmr7m-{font-style: italic;}
-.cmbx-12{font-size:120%; font-weight: bold;}
+.zplmr7m-{font-style: italic;}
-.cmbx-12{ font-weight: bold;}
+.zplmr7m-{font-style: italic;}
-.cmbx-12{ font-weight: bold;}
+.zplmr7m-{font-style: italic;}
 .zplmr7m-{font-style: italic;}
 .zplmr7m-x-x-76{font-size:76%;font-style: italic;}
 .zplmr7m-x-x-76{font-style: italic;}
 .zplmr7m-x-x-76{font-style: italic;}
 .zplmr7m-x-x-76{font-style: italic;}
 .zplmr7m-x-x-76{font-style: italic;}
 .zplmr7m-x-x-60{font-size:60%;font-style: italic;}
 .zplmr7m-x-x-60{font-style: italic;}
 .zplmr7m-x-x-60{font-style: italic;}
 .zplmr7m-x-x-60{font-style: italic;}
 .zplmr7m-x-x-60{font-style: italic;}
 .zplmr7y-x-x-76{font-size:76%;}
 .zplmr7t-x-x-76{font-size:76%;}
 .pplr7t-x-x-90{font-size:90%;}
 .pplr7t-x-x-80{font-size:80%;}
 .pplb7t-x-x-120{font-size:120%;font-weight: bold;}
 .pplb7t-x-x-120{font-weight: bold;}
 .pplb7t-x-x-120{font-weight: bold;}
 .cmtt-8{font-size:80%;font-family: monospace,monospace;}
 .cmtt-8{font-family: monospace,monospace;}
 .cmtt-8{font-family: monospace,monospace;}
 .cmtt-9{font-size:90%;font-family: monospace,monospace;}
 .cmtt-9{font-family: monospace,monospace;}
 .cmtt-9{font-family: monospace,monospace;}
-.cmmi-8{font-size:80%;font-style: italic;}
+.pplr7t-x-x-70{font-size:70%;}
 .zplmr7m-x-x-90{font-size:90%;font-style: italic;}
 .zplmr7m-x-x-90{font-style: italic;}
 .zplmr7m-x-x-90{font-style: italic;}
 .zplmr7m-x-x-90{font-style: italic;}
 .zplmr7m-x-x-90{font-style: italic;}
 .zplmr7y-x-x-90{font-size:90%;}
 .zplmr7m-x-x-80{font-size:80%;font-style: italic;}
 .zplmr7m-x-x-80{font-style: italic;}
 .zplmr7m-x-x-80{font-style: italic;}
 .zplmr7m-x-x-80{font-style: italic;}
 .zplmr7m-x-x-80{font-style: italic;}
 .zplmr7t-x-x-80{font-size:80%;}
 .pplrc7t-x-x-90{font-size:90%;}
 .small-caps{font-variant: small-caps; }
 p{margin-top:0;margin-bottom:0}
 p.indent{text-indent:0;}
 p + p{margin-top:1em;}
@ -158,5 +187,11 @@ pre.listings{font-family: monospace,monospace; white-space: pre-wrap; margin-top
 pre.lstlisting{font-family: monospace,monospace; white-space: pre-wrap; margin-top:0.5em; margin-bottom:0.5em; }
 pre.lstinputlisting{ font-family: monospace,monospace; white-space: pre-wrap; }
 .lstinputlisting .label{margin-right:0.5em;}
 #TBL-24-1{border-left: 1px solid black;}
 #TBL-24-1{border-right:1px solid black;}
 #TBL-24-2{border-right:1px solid black;}
 #TBL-24-3{border-right:1px solid black;}
 #TBL-24-4{border-right:1px solid black;}
 #TBL-24-5{border-right:1px solid black;}
 /* end css.sty */
--- a/docs/html/userhtml.html
+++ b/docs/html/userhtml.html
@ -10,16 +10,16 @@
 <link rel="stylesheet" type="text/css" href="userhtml.css"> 
 </head><body 
 >
-<!--l. 91--><p class="noindent" ><span 
+<!--l. 99--><p class="noindent" ><span 
-class="cmbx-12x-x-144">PSBLAS</span><br 
+class="pplb7t-x-x-172">PSBLAS</span><br 
 class="newline" /> <span 
-class="cmbx-12x-x-144">User&#8217;s and Reference Guide</span><br 
+class="pplb7t-x-x-172">User&#8217;s and Reference Guide</span><br 
 class="newline" /> <span 
-class="cmti-12">A reference guide for the Parallel Sparse BLAS library</span><br 
+class="pplri7t-x-x-120">A reference guide for the Parallel Sparse BLAS library</span><br 
 class="newline" /> <span 
-class="cmbx-10">Salvatore Filippone</span><br 
+class="pplb7t-">Salvatore Filippone</span><br 
 class="newline" /><span 
-class="cmbx-10">Alfredo Buttari  </span><br 
+class="pplb7t-">Alfredo Buttari  </span><br 
 class="newline" />Software version: 3.9.0<br 
 class="newline" />Aug 1st, 2024
@ -52,13 +52,13 @@ href="userhtmlse9.html#x14-1280009" id="QQ2-14-158">Utilities</a></span>
 <br />   &#x00A0;<span class="sectionToc" >10 <a 
 href="userhtmlse10.html#x15-13500010" id="QQ2-15-165">Preconditioner routines</a></span>
 <br />   &#x00A0;<span class="sectionToc" >11 <a 
-href="userhtmlse11.html#x17-14200011" id="QQ2-17-172">Iterative Methods</a></span>
+href="userhtmlse11.html#x17-14300011" id="QQ2-17-174">Iterative Methods</a></span>
 <br />   &#x00A0;<span class="sectionToc" >12 <a 
-href="userhtmlse12.html#x19-14400012" id="QQ2-19-174">Extensions</a></span>
+href="userhtmlse12.html#x19-14600012" id="QQ2-19-177">Extensions</a></span>
 <br />   &#x00A0;<span class="sectionToc" >13 <a 
-href="userhtmlse13.html#x20-15300013" id="QQ2-20-189">CUDA Environment Routines</a></span>
+href="userhtmlse13.html#x20-15500013" id="QQ2-20-192">CUDA Environment Routines</a></span>
 <br />   &#x00A0;<span class="likesectionToc" ><a 
-href="userhtmlli2.html#x21-168000" id="QQ2-21-218">References</a></span>
+href="userhtmlli2.html#x21-170000" id="QQ2-21-221">References</a></span>
   </div>
--- a/docs/html/userhtml0x.png
+++ b/docs/html/userhtml0x.png
--- a/docs/html/userhtml10x.png
+++ b/docs/html/userhtml10x.png
--- a/docs/html/userhtml11x.png
+++ b/docs/html/userhtml11x.png
--- a/docs/html/userhtml12x.png
+++ b/docs/html/userhtml12x.png
--- a/docs/html/userhtml13x.png
+++ b/docs/html/userhtml13x.png
--- a/docs/html/userhtml14x.png
+++ b/docs/html/userhtml14x.png
--- a/docs/html/userhtml15x.png
+++ b/docs/html/userhtml15x.png
--- a/docs/html/userhtml16.html
+++ b/docs/html/userhtml16.html
@ -10,10 +10,10 @@
 <link rel="stylesheet" type="text/css" href="userhtml.css"> 
 </head><body 
 >
-      <div class="footnote-text">
+         <div class="footnote-text">
-  <!--l. 72--><p class="indent" >     <span class="footnote-mark"><a 
+  <!--l. 72--><p class="indent" >       <span class="footnote-mark"><a 
 id="fn4x0"><a 
- id="x16-136002x10.1"></a>   <sup class="textsuperscript">4</sup></a></span><span 
+ id="x16-136002x10.1"></a>    <sup class="textsuperscript">4</sup></a></span><span 
-class="cmr-8">The string is case-insensitive</span></div>
+class="pplr7t-x-x-80">The string is case-insensitive</span></div>
-      
+       
 </body></html> 
--- a/docs/html/userhtml16x.png
+++ b/docs/html/userhtml16x.png
--- a/docs/html/userhtml17x.png
+++ b/docs/html/userhtml17x.png
--- a/docs/html/userhtml18.html
+++ b/docs/html/userhtml18.html
@ -13,8 +13,11 @@
  <div class="footnote-text">
  <!--l. 53--><p class="noindent" ><span class="footnote-mark"><a 
 id="fn5x0"><a 
- id="x18-143004x11.1"></a>   <sup class="textsuperscript">5</sup></a></span><span 
+ id="x18-144004x11.1"></a>    <sup class="textsuperscript">5</sup></a></span><span 
-class="cmr-8">Note: the implementation is for </span><span 
+class="pplr7t-x-x-80">Note: the implementation is for </span><span 
-class="cmmi-8">FCG</span><span 
+class="zplmr7m-x-x-80">FCG</span><span 
-class="cmr-8">(1).</span></div> 
+class="zplmr7t-x-x-80">(</span><span 
 class="pplr7t-x-x-80">1</span><span 
 class="zplmr7t-x-x-80">)</span><span 
 class="pplr7t-x-x-80">.</span></div> 
 </body></html> 
--- a/docs/html/userhtml18x.png
+++ b/docs/html/userhtml18x.png
--- a/docs/html/userhtml19x.png
+++ b/docs/html/userhtml19x.png
--- a/docs/html/userhtml1x.png
+++ b/docs/html/userhtml1x.png
--- a/docs/html/userhtml20x.png
+++ b/docs/html/userhtml20x.png
--- a/docs/html/userhtml21x.png
+++ b/docs/html/userhtml21x.png
--- a/docs/html/userhtml22x.png
+++ b/docs/html/userhtml22x.png
--- a/docs/html/userhtml23x.png
+++ b/docs/html/userhtml23x.png
--- a/docs/html/userhtml24x.png
+++ b/docs/html/userhtml24x.png
--- a/docs/html/userhtml25x.png
+++ b/docs/html/userhtml25x.png
--- a/docs/html/userhtml26x.png
+++ b/docs/html/userhtml26x.png
--- a/docs/html/userhtml27x.png
+++ b/docs/html/userhtml27x.png
--- a/docs/html/userhtml28x.png
+++ b/docs/html/userhtml28x.png
--- a/docs/html/userhtml29x.png
+++ b/docs/html/userhtml29x.png
--- a/docs/html/userhtml2x.png
+++ b/docs/html/userhtml2x.png
--- a/docs/html/userhtml30x.png
+++ b/docs/html/userhtml30x.png
--- a/docs/html/userhtml31x.png
+++ b/docs/html/userhtml31x.png
--- a/docs/html/userhtml32x.png
+++ b/docs/html/userhtml32x.png
--- a/docs/html/userhtml33x.png
+++ b/docs/html/userhtml33x.png
--- a/docs/html/userhtml34x.png
+++ b/docs/html/userhtml34x.png
--- a/docs/html/userhtml35x.png
+++ b/docs/html/userhtml35x.png
--- a/docs/html/userhtml36x.png
+++ b/docs/html/userhtml36x.png
--- a/docs/html/userhtml3x.png
+++ b/docs/html/userhtml3x.png
--- a/docs/html/userhtml4x.png
+++ b/docs/html/userhtml4x.png
--- a/Show More
+++ b/Show More